In [3]:
import requests
import pandas as pd
from rich.console import Console

In [4]:
console = Console()

In [5]:
BASE_URL = "https://api.deezer.com"

def fetch_data(endpoint, params=None):
    try:
        response = requests.get(f"{BASE_URL}{endpoint}", params=params)
        response.raise_for_status()
        console.print(f"Successfully fetched data for [bold]{endpoint}[/bold]", style="green")
        return response.json()
    except requests.exceptions.RequestException as e:
        console.print(f"[bold red]Error fetching data:[/bold red] {e}", style="red")
        return None

In [6]:
def fetch_artists(artist_names):
    artists = []
    for name in artist_names:
        console.print(f"Fetching basic data for artist: [bold magenta]{name}[/bold magenta]...", style="cyan")
        data = fetch_data(f"/search/artist", params={"q": name})
        if data and "data" in data and len(data["data"]) > 0:
            artist = data["data"][0]  # Get the first search result
            artists.append({
                "artist_id": artist["id"],
                "name": artist["name"],
                "nb_fan": artist.get("nb_fan", 0),
                "link": artist.get("link", "N/A")
            })
    return artists

In [7]:
def fetch_artist_details(artist_ids):
    detailed_artists = []
    for artist_id in artist_ids:
        console.print(f"Fetching detailed data for artist ID: [bold]{artist_id}[/bold]...", style="cyan")
        data = fetch_data(f"/artist/{artist_id}")
        if data:
            detailed_artists.append({
                "artist_id": data["id"],
                "name": data["name"],
                "nb_fan": data.get("nb_fan", 0),
                "nb_album": data.get("nb_album", 0),
                "radio": data.get("radio", False),
                "link": data.get("link", "N/A")
            })
    return detailed_artists

In [8]:
artist_names = [
    "Kip Moore",
    "Breaking Benjamin",
    "Dermot Kennedy",
    "Bad Omens",
    "Sultan + Shepard",
    "Taylor Swift",
    "Shawn Mendes",
    "Katy Perry",
    "Meghan Trainor",
    "Ariana Grande",
    "I Prevail",
    "Our Last Night",
    "Chappell Roan",
    "girl in red",
    "Maisie Peters",
    "Central Cee",
    "Drake",
    "Vince Staples",
    "Lil Yachty",
    "Lil Tecca",
    "Ed Sheeran",
    "Billie Eilish",
    "Post Malone",
    "Dua Lipa",
    "The Weeknd",
    "Imagine Dragons",
    "Adele",
    "Bruno Mars",
    "BTS",
    "Maroon 5",
    "Justin Bieber",
    "Cardi B",
    "Travis Scott",
    "Halsey",
    "Khalid",
    "Sam Smith",
    "Lizzo",
    "Harry Styles",
    "J Balvin",
    "Bad Bunny",
    "Doja Cat",
    "Olivia Rodrigo",
    "SZA",
    "Lil Nas X",
    "Megan Thee Stallion",
    "BLACKPINK",
    "Camila Cabello",
    "Jonas Brothers",
    "Nicki Minaj"
]

In [9]:
artists = fetch_artists(artist_names)
console.print(f"[bold green]Extracted data for {len(artists)} artists.[/bold green]")

artist_ids = [artist["artist_id"] for artist in artists]
detailed_artists = fetch_artist_details(artist_ids)

artist_df = pd.DataFrame(detailed_artists)
artist_df = pd.DataFrame(detailed_artists)
console.print("[bold yellow]Artist DataFrame ready for further processing.[/bold yellow]")

In [10]:
# Function to fetch an artist's top tracks
def fetch_artist_top_tracks(artist_ids):
    endpoint = f"/artist/{artist_ids}/top"
    data = fetch_data(endpoint)
    tracks = []
    if data and "data" in data:
        for track in data["data"]:
            tracks.append({
                "track_id": track["id"],
                "title": track["title"],
                "album_name": track["album"]["title"] if "album" in track else "N/A",
                "rank": track.get("rank", "N/A"),
                "duration": track.get("duration", "N/A"),
                "explicit_lyrics": track.get("explicit_lyrics", "N/A")
            })
    return tracks

# Fetch top tracks for all artists in the DataFrame
all_top_tracks = []
for _, artist in artist_df.iterrows():
    print(f"Fetching top tracks for artist: {artist['name']} (ID: {artist['artist_id']})...")
    top_tracks = fetch_artist_top_tracks(artist["artist_id"])
    for track in top_tracks:
        track["artist_id"] = artist["artist_id"]
    all_top_tracks.extend(top_tracks)


top_tracks_df = pd.DataFrame(all_top_tracks)
top_tracks_df

Fetching top tracks for artist: Kip Moore (ID: 1436111)...


Fetching top tracks for artist: Breaking Benjamin (ID: 5286)...


Fetching top tracks for artist: Dermot Kennedy (ID: 12033224)...


Fetching top tracks for artist: Bad Omens (ID: 9700940)...


Fetching top tracks for artist: Sultan + Shepard (ID: 7584298)...


Fetching top tracks for artist: Taylor Swift (ID: 12246)...


Fetching top tracks for artist: Shawn Mendes (ID: 5962948)...


Fetching top tracks for artist: Katy Perry (ID: 144227)...


Fetching top tracks for artist: Meghan Trainor (ID: 1181430)...


Fetching top tracks for artist: Ariana Grande (ID: 1562681)...


Fetching top tracks for artist: I Prevail (ID: 7185748)...


Fetching top tracks for artist: Our Last Night (ID: 68249)...


Fetching top tracks for artist: Chappell Roan (ID: 12945219)...


Fetching top tracks for artist: girl in red (ID: 14226477)...


Fetching top tracks for artist: Maisie Peters (ID: 13022633)...


Fetching top tracks for artist: Central Cee (ID: 12096038)...


Fetching top tracks for artist: Drake (ID: 246791)...


Fetching top tracks for artist: Vince Staples (ID: 4037971)...


Fetching top tracks for artist: Lil Yachty (ID: 9776678)...


Fetching top tracks for artist: Lil Tecca (ID: 14492701)...


Fetching top tracks for artist: Ed Sheeran (ID: 384236)...


Fetching top tracks for artist: Billie Eilish (ID: 9635624)...


Fetching top tracks for artist: Post Malone (ID: 7543848)...


Fetching top tracks for artist: Dua Lipa (ID: 8706544)...


Fetching top tracks for artist: The Weeknd (ID: 4050205)...


Fetching top tracks for artist: Imagine Dragons (ID: 416239)...


Fetching top tracks for artist: Adele (ID: 75798)...


Fetching top tracks for artist: Bruno Mars (ID: 429675)...


Fetching top tracks for artist: BTS (ID: 6982223)...


Fetching top tracks for artist: Maroon 5 (ID: 1188)...


Fetching top tracks for artist: Justin Bieber (ID: 288166)...


Fetching top tracks for artist: Cardi B (ID: 9064930)...


Fetching top tracks for artist: Travis Scott (ID: 4495513)...


Fetching top tracks for artist: Halsey (ID: 5292512)...


Fetching top tracks for artist: Khalid (ID: 362326)...


Fetching top tracks for artist: Sam Smith (ID: 1097709)...


Fetching top tracks for artist: Lizzo (ID: 5200025)...


Fetching top tracks for artist: Harry Styles (ID: 5313805)...


Fetching top tracks for artist: J Balvin (ID: 4860761)...


Fetching top tracks for artist: Bad Bunny (ID: 10583405)...


Fetching top tracks for artist: Doja Cat (ID: 5578942)...


Fetching top tracks for artist: Olivia Rodrigo (ID: 11152580)...


Fetching top tracks for artist: SZA (ID: 5531258)...


Fetching top tracks for artist: Lil Nas X (ID: 15166511)...


Fetching top tracks for artist: Megan Thee Stallion (ID: 12816021)...


Fetching top tracks for artist: BLACKPINK (ID: 10803980)...


Fetching top tracks for artist: Camila Cabello (ID: 9236850)...


Fetching top tracks for artist: Jonas Brothers (ID: 15888)...


Fetching top tracks for artist: Nicki Minaj (ID: 382937)...


Unnamed: 0,track_id,title,album_name,rank,duration,explicit_lyrics,artist_id
0,2958082621,Live Here To Work,Live Here To Work,501385,282,True,1436111
1,18228848,Somethin' 'Bout A Truck,Up All Night,326243,213,False,1436111
2,3123161771,Bad Spot,Bad Spot,480370,198,False,1436111
3,18228855,Hey Pretty Girl,Up All Night,262652,215,False,1436111
4,401443652,Last Shot,SLOWHEART,314244,228,False,1436111
...,...,...,...,...,...,...,...
240,412843352,Super Bass,Pink Friday (Deluxe Edition),712380,201,False,382937
241,28852751,Starships,Pink Friday ... Roman Reloaded (Deluxe),937480,210,True,382937
242,81609384,Bang Bang,Bang Bang,800267,199,False,382937
243,796342592,Tusa,Tusa,888444,200,False,382937


In [11]:
cleaned_artist_df = artist_df.rename(columns={
        "artist_id": "Artist ID",
        "name": "Artist Name",
        "nb_fan": "Number of Fans",
        "nb_album": "Number of Albums",
        "radio": "Radio Available",
        "link": "Deezer Link"
    })
cleaned_artist_df = cleaned_artist_df.drop(columns=["Radio Available"])
cleaned_artist_df["Fan to Album Ratio"] = cleaned_artist_df["Number of Fans"] / cleaned_artist_df["Number of Albums"]
cleaned_artist_df.head()

Unnamed: 0,Artist ID,Artist Name,Number of Fans,Number of Albums,Deezer Link,Fan to Album Ratio
0,1436111,Kip Moore,40493,41,https://www.deezer.com/artist/1436111,987.634146
1,5286,Breaking Benjamin,666132,22,https://www.deezer.com/artist/5286,30278.727273
2,12033224,Dermot Kennedy,150597,45,https://www.deezer.com/artist/12033224,3346.6
3,9700940,Bad Omens,91694,19,https://www.deezer.com/artist/9700940,4826.0
4,7584298,Sultan + Shepard,28271,14,https://www.deezer.com/artist/7584298,2019.357143


In [12]:
cleaned_track_df = top_tracks_df.rename(columns={
        "track_id": "Track ID",
        "title": "Track Title",
        "album_name": "Album Name",
        "rank": "Popularity Rank",
        "duration": "Duration (seconds)",
        "explicit_lyrics": "Explicit Lyrics",
        "artist_id": "Artist ID" 
})
cleaned_track_df["Duration (min:sec)"] = cleaned_track_df["Duration (seconds)"].apply(
    lambda x: f"{x // 60}:{x % 60:02}"
)
cleaned_track_df = cleaned_track_df.drop(columns=["Explicit Lyrics"], axis=1)
cleaned_track_df.head()

Unnamed: 0,Track ID,Track Title,Album Name,Popularity Rank,Duration (seconds),Artist ID,Duration (min:sec)
0,2958082621,Live Here To Work,Live Here To Work,501385,282,1436111,4:42
1,18228848,Somethin' 'Bout A Truck,Up All Night,326243,213,1436111,3:33
2,3123161771,Bad Spot,Bad Spot,480370,198,1436111,3:18
3,18228855,Hey Pretty Girl,Up All Night,262652,215,1436111,3:35
4,401443652,Last Shot,SLOWHEART,314244,228,1436111,3:48


In [13]:
cleaned_artist_df.to_csv("cleaned_artist_data.csv", index=False)
cleaned_track_df.to_csv("cleaned_tracks_data.csv", index=False)

In [14]:
cleaned_track_df = pd.read_csv('cleaned_tracks_data.csv')

In [15]:
if 'index' in cleaned_track_df.columns or 'level_0' in cleaned_track_df.columns:
    cleaned_track_df.drop(columns=['index', 'level_0'], errors='ignore', inplace=True)

In [16]:
cleaned_track_df.reset_index(inplace=True)
cleaned_track_df.rename(columns={'index': 'entry_key'}, inplace=True)
cleaned_track_df['entry_key'] += 1
cleaned_track_df.to_csv('cleaned_tracks_data.csv', index=False)

In [17]:
cleaned_track_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   entry_key           245 non-null    int64 
 1   Track ID            245 non-null    int64 
 2   Track Title         245 non-null    object
 3   Album Name          245 non-null    object
 4   Popularity Rank     245 non-null    int64 
 5   Duration (seconds)  245 non-null    int64 
 6   Artist ID           245 non-null    int64 
 7   Duration (min:sec)  245 non-null    object
dtypes: int64(5), object(3)
memory usage: 15.4+ KB
