In [8]:
import requests
import pandas as pd


In [30]:
BASE_URL = "https://api.deezer.com"

def fetch_data(endpoint, params=None):
    """Generic function to fetch data from the Deezer API."""
    try:
        response = requests.get(f"{BASE_URL}{endpoint}", params=params)
        response.raise_for_status()  # Raise HTTP errors
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

In [31]:
def fetch_artists(artist_names):
    """Fetch basic data for a list of artist names."""
    artists = []
    for name in artist_names:
        data = fetch_data(f"/search/artist", params={"q": name})
        if data and "data" in data and len(data["data"]) > 0:
            artist = data["data"][0]  # Get the first search result
            artists.append({
                "artist_id": artist["id"],
                "name": artist["name"],
                "nb_fan": artist.get("nb_fan", 0),
                "link": artist.get("link", "N/A")
            })
    return artists

In [32]:
def fetch_artist_details(artist_ids):
    """Fetch detailed data for a list of artist IDs."""
    detailed_artists = []
    for artist_id in artist_ids:
        data = fetch_data(f"/artist/{artist_id}")
        if data:
            detailed_artists.append({
                "artist_id": data["id"],
                "name": data["name"],
                "nb_fan": data.get("nb_fan", 0),
                "nb_album": data.get("nb_album", 0),
                "radio": data.get("radio", False),
                "link": data.get("link", "N/A")
            })
    return detailed_artists

In [33]:
artist_names = [
    "Kip Moore",
    "Breaking Benjamin",
    "Dermot Kennedy",
    "Bad Omens",
    "Sultan + Shepard",
    "Taylor Swift",
    "Shawn Mendes",
    "Katy Perry",
    "Meghan Trainor",
    "Ariana Grande",
    "I Prevail",
    "Our Last Night",
    "Chappell Roan",
    "girl in red",
    "Maisie Peters",
    "Central Cee",
    "Drake",
    "Vince Staples",
    "Lil Yachty",
    "Lil Tecca",
    "Ed Sheeran",
    "Billie Eilish",
    "Post Malone",
    "Dua Lipa",
    "The Weeknd",
    "Imagine Dragons",
    "Adele",
    "Bruno Mars",
    "BTS",
    "Maroon 5",
    "Justin Bieber",
    "Cardi B",
    "Travis Scott",
    "Halsey",
    "Khalid",
    "Sam Smith",
    "Lizzo",
    "Harry Styles",
    "J Balvin",
    "Bad Bunny",
    "Doja Cat",
    "Olivia Rodrigo",
    "SZA",
    "Lil Nas X",
    "Megan Thee Stallion",
    "BLACKPINK",
    "Camila Cabello",
    "Jonas Brothers",
    "Nicki Minaj"
]

In [34]:
artists = fetch_artists(artist_names)

# Step 2: Fetch detailed artist info using their IDs
artist_ids = [artist["artist_id"] for artist in artists]
detailed_artists = fetch_artist_details(artist_ids)

# Convert to DataFrame for easy viewing
artist_df = pd.DataFrame(detailed_artists)
artist_df

Unnamed: 0,artist_id,name,nb_fan,nb_album,radio,link
0,1436111,Kip Moore,40470,41,True,https://www.deezer.com/artist/1436111
1,5286,Breaking Benjamin,665596,22,True,https://www.deezer.com/artist/5286
2,12033224,Dermot Kennedy,150462,45,True,https://www.deezer.com/artist/12033224
3,9700940,Bad Omens,91287,19,True,https://www.deezer.com/artist/9700940
4,7584298,Sultan + Shepard,28256,14,True,https://www.deezer.com/artist/7584298
5,12246,Taylor Swift,11572071,103,True,https://www.deezer.com/artist/12246
6,5962948,Shawn Mendes,6546601,38,True,https://www.deezer.com/artist/5962948
7,144227,Katy Perry,9336825,75,True,https://www.deezer.com/artist/144227
8,1181430,Meghan Trainor,1654200,75,True,https://www.deezer.com/artist/1181430
9,1562681,Ariana Grande,12868390,55,True,https://www.deezer.com/artist/1562681


In [35]:
# Function to fetch an artist's top tracks
def fetch_artist_top_tracks(artist_ids):
    """Fetch the top tracks for a given artist ID."""
    endpoint = f"/artist/{artist_ids}/top"
    data = fetch_data(endpoint)
    tracks = []
    
    if data and "data" in data:
        for track in data["data"]:
            tracks.append({
                "track_id": track["id"],
                "title": track["title"],
                "album_name": track["album"]["title"] if "album" in track else "N/A",
                "rank": track.get("rank", "N/A"),
                "duration": track.get("duration", "N/A"),
                "explicit_lyrics": track.get("explicit_lyrics", "N/A")
            })
    return tracks

# Fetch top tracks for all artists in the DataFrame
all_top_tracks = []
for _, artist in artist_df.iterrows():
    print(f"Fetching top tracks for artist: {artist['name']} (ID: {artist['artist_id']})...")
    top_tracks = fetch_artist_top_tracks(artist["artist_id"])
    for track in top_tracks:
        track["artist_id"] = artist["artist_id"]  # Link track to artist
    all_top_tracks.extend(top_tracks)

# Convert top tracks to a DataFrame
top_tracks_df = pd.DataFrame(all_top_tracks)
top_tracks_df

Fetching top tracks for artist: Kip Moore (ID: 1436111)...
Fetching top tracks for artist: Breaking Benjamin (ID: 5286)...
Fetching top tracks for artist: Dermot Kennedy (ID: 12033224)...
Fetching top tracks for artist: Bad Omens (ID: 9700940)...
Fetching top tracks for artist: Sultan + Shepard (ID: 7584298)...
Fetching top tracks for artist: Taylor Swift (ID: 12246)...
Fetching top tracks for artist: Shawn Mendes (ID: 5962948)...
Fetching top tracks for artist: Katy Perry (ID: 144227)...
Fetching top tracks for artist: Meghan Trainor (ID: 1181430)...
Fetching top tracks for artist: Ariana Grande (ID: 1562681)...
Fetching top tracks for artist: I Prevail (ID: 7185748)...
Fetching top tracks for artist: Our Last Night (ID: 68249)...
Fetching top tracks for artist: Chappell Roan (ID: 12945219)...
Fetching top tracks for artist: girl in red (ID: 14226477)...
Fetching top tracks for artist: Maisie Peters (ID: 13022633)...
Fetching top tracks for artist: Central Cee (ID: 12096038)...
Fetchi

Unnamed: 0,track_id,title,album_name,rank,duration,explicit_lyrics,artist_id
0,3123161771,Bad Spot,Bad Spot,548153,198,False,1436111
1,2958082621,Live Here To Work,Live Here To Work,501385,282,True,1436111
2,18228848,Somethin' 'Bout A Truck,Up All Night,326243,213,False,1436111
3,401443612,More Girls Like You,SLOWHEART,300150,154,False,1436111
4,18228855,Hey Pretty Girl,Up All Night,302931,215,False,1436111
...,...,...,...,...,...,...,...
240,453844402,MotorSport,Culture II,630886,308,True,382937
241,28852751,Starships,Pink Friday ... Roman Reloaded (Deluxe),937480,210,True,382937
242,412843352,Super Bass,Pink Friday (Deluxe Edition),712380,201,False,382937
243,796342592,Tusa,Tusa,888444,200,False,382937


In [36]:
cleaned_artist_df = artist_df.rename(columns={
        "artist_id": "Artist ID",
        "name": "Artist Name",
        "nb_fan": "Number of Fans",
        "nb_album": "Number of Albums",
        "radio": "Radio Available",
        "link": "Deezer Link"
    })
cleaned_artist_df = cleaned_artist_df.drop(columns=["Radio Available"])
cleaned_artist_df["Fan to Album Ratio"] = cleaned_artist_df["Number of Fans"] / cleaned_artist_df["Number of Albums"]
cleaned_artist_df.head()

Unnamed: 0,Artist ID,Artist Name,Number of Fans,Number of Albums,Deezer Link,Fan to Album Ratio
0,1436111,Kip Moore,40470,41,https://www.deezer.com/artist/1436111,987.073171
1,5286,Breaking Benjamin,665596,22,https://www.deezer.com/artist/5286,30254.363636
2,12033224,Dermot Kennedy,150462,45,https://www.deezer.com/artist/12033224,3343.6
3,9700940,Bad Omens,91287,19,https://www.deezer.com/artist/9700940,4804.578947
4,7584298,Sultan + Shepard,28256,14,https://www.deezer.com/artist/7584298,2018.285714


In [37]:
cleaned_track_df = top_tracks_df.rename(columns={
        "track_id": "Track ID",
        "title": "Track Title",
        "album_name": "Album Name",
        "rank": "Popularity Rank",
        "duration": "Duration (seconds)",
        "explicit_lyrics": "Explicit Lyrics",
        "artist_id": "Artist ID" 
})
cleaned_track_df["Duration (min:sec)"] = cleaned_track_df["Duration (seconds)"].apply(
    lambda x: f"{x // 60}:{x % 60:02}"
)
cleaned_track_df = cleaned_track_df.drop(columns=["Explicit Lyrics"], axis=1)
cleaned_track_df.head()

Unnamed: 0,Track ID,Track Title,Album Name,Popularity Rank,Duration (seconds),Artist ID,Duration (min:sec)
0,3123161771,Bad Spot,Bad Spot,548153,198,1436111,3:18
1,2958082621,Live Here To Work,Live Here To Work,501385,282,1436111,4:42
2,18228848,Somethin' 'Bout A Truck,Up All Night,326243,213,1436111,3:33
3,401443612,More Girls Like You,SLOWHEART,300150,154,1436111,2:34
4,18228855,Hey Pretty Girl,Up All Night,302931,215,1436111,3:35


In [24]:
cleaned_artist_df.to_csv("cleaned_artist_data.csv", index=False)
cleaned_track_df.to_csv("cleaned_tracks_data.csv", index=False)

In [9]:
cleaned_track_df = pd.read_csv('cleaned_tracks_data.csv')

In [10]:
if 'index' in cleaned_track_df.columns or 'level_0' in cleaned_track_df.columns:
    cleaned_track_df.drop(columns=['index', 'level_0'], errors='ignore', inplace=True)

In [11]:
cleaned_track_df.reset_index(inplace=True)
cleaned_track_df.rename(columns={'index': 'entry_key'}, inplace=True)
cleaned_track_df['entry_key'] += 1
cleaned_track_df.to_csv('cleaned_tracks_data.csv', index=False)