In [119]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
import wikipedia
import re

In [95]:
os.environ['SPOTIPY_CLIENT_ID'] = 'ca625d92f685469c8a4dc07145c9ca85'
os.environ['SPOTIPY_CLIENT_SECRET'] = '8ae3bf01f749408090bb6dd21cbcace5'

In [96]:


def artist_top_tracks(artist_name: str) -> list[str]:
    """
    Get the top tracks of an artist from Spotify.
    
    :param artist_name: The name of the artist.
    :return: A list of the artist's top tracks.
    """
    # Authenticate with Spotify
    client_credentials_manager = SpotifyClientCredentials()
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

    # Search for the artist
    results = sp.search(q='artist:' + artist_name, type='artist')
    
    if results['artists']['items']:
        artist_id = results['artists']['items'][0]['id']
        top_tracks = sp.artist_top_tracks(artist_id)
        return [{"track_name": track['name'], "spotify_link": track['external_urls']['spotify']}  for track in top_tracks['tracks']]
    else:
        return []

In [None]:
def find_wikipedia_page(query):
    """
    Search for a Wikipedia page by query and return the page object.
    """
    try:
        # Search for the query and get the first result
        results = wikipedia.search(query)
        if results:
            page = wikipedia.page(results[0])
            return page
        else:
            print("No Wikipedia page found for:", query)
            return None
    except Exception as e:
        print("Error finding Wikipedia page:", e)
        return None

def song_check(wikipedia_page: wikipedia.page) -> wikipedia.page:
    """
    Check if the Wikipedia page is about a song using summary, title, categories, and then return it if true.
    """
    summary = wikipedia_page.summary.lower()
    categories = [cat.lower() for cat in wikipedia_page.categories]
    content = wikipedia_page.content.lower()
    all_sections = [summary] + categories + [content]
    # Check if the title contains song keywords, but not general enough to be a common word
    song_keywords = [
        "song", "discographies", "record label"
    ]
    # Check if any keyword appears in any section string
    if any(word in section for word in song_keywords for section in all_sections):
        return wikipedia_page.content
    return False


def extract_personnel_section(page_content):
    """
    Extracts the 'Personnel' section from the Wikipedia page content.
    """
    # Split the content into sections based on the '==' delimiter
    sections = page_content.split('\n==')
    personnel_section = None

    for section in sections:
        clean_section = section.replace(' ', '').replace('=', '').lower().strip()
        if clean_section.startswith('personnel'):
            # Remove the header and any leading/trailing whitespace
            personnel_section = section.split('==', 1)[-1].strip()
            break
    if personnel_section:
        return personnel_section
    return "Personnel section not found."


def parse_personnel_section(section_text):
    """
    Parses a personnel/credits section into a list of (name, roles) tuples.
    """
    people = []
    lines = section_text.strip().split('\n')
    for line in lines:
        # Try to split on en dash first, then hyphen if not found
        if '–' in line:
            name, roles = line.split('–', 1)
        elif '-' in line:
            name, roles = line.split('-', 1)
        else:
            continue  # Skip lines that don't match the pattern
        name = name.strip()
        roles = [role.strip() for role in roles.split(',')]
        people.append((name, roles))
    return people

# Example Impl

In [178]:
song = "Save your tears"

def get_song_personnel(song):
    wikipedia_page = find_wikipedia_page(song)
    song_wikipedia_page = song_check(wikipedia_page)
    personnel_section = extract_personnel_section(song_wikipedia_page)
    parsed_personnel = parse_personnel_section(personnel_section)
    return parsed_personnel

for name, roles in get_song_personnel(song):
    for role in roles:
        if "production" in role.lower():
            print(f"\nName: {name}")
            print(f"  - {role}")
            tracks = artist_top_tracks(name)
            if tracks:
                print("  Top Tracks:")
                for idx, track in enumerate(tracks, 1):
                    print(f"    {idx}. {track['track_name']} ({track['spotify_link']})")
            else:
                print("No top tracks found.")


Name: The Weeknd
  - production
  Top Tracks:
    1. Timeless (feat Playboi Carti) (https://open.spotify.com/track/0FIDCNYYjNvPVimz5icugS)
    2. One Of The Girls (with JENNIE, Lily Rose Depp) (https://open.spotify.com/track/7CyPwkp0oE8Ro9Dd5CUDjW)
    3. Starboy (https://open.spotify.com/track/7MXVkk9YMctZqd1Srtv4MB)
    4. Blinding Lights (https://open.spotify.com/track/0VjIjW4GlUZAMYd2vXMi3b)
    5. Die For You (https://open.spotify.com/track/2LBqCSwhJGcFQeTHMVGwy3)
    6. RATHER LIE (with The Weeknd) (https://open.spotify.com/track/68qeaZhtMZ6abrJCYt6nQn)
    7. São Paulo (feat. Anitta) (https://open.spotify.com/track/7DY756WOLyOz2Xnhw4EFiC)
    8. Cry For Me (https://open.spotify.com/track/3AWDeHLc88XogCaCnZQLVI)
    9. The Hills (https://open.spotify.com/track/7fBv7CLKzipRk6EC6TWHOB)
    10. Save Your Tears (https://open.spotify.com/track/5QO79kh1waicV47BqGRL3g)

Name: Max Martin
  - production
  Top Tracks:
    1. Tell Me What You Like (https://open.spotify.com/track/7hO5fHsdQB