In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import os
from dotenv import load_dotenv
import musicbrainzngs
from api_calls import get_genre, get_track_id, get_artist_area
import time

In [2]:
env_path = r"C:\Users\marty\OneDrive\Pulpit\studia\sem6\hurtownie\spotify-dwh\.env"
load_dotenv(dotenv_path=env_path)

client_id = os.getenv('SPOTIFY_API_KEY')
client_secret = os.getenv('CLIENT_SECRET')

In [3]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

In [4]:
data = pd.read_csv(r'C:\Users\marty\OneDrive\Pulpit\studia\sem6\hurtownie\spotify-dwh\data\sample.csv')

In [5]:
data = data.copy()
data['spotify_track_id'] = None



In [25]:
data

Unnamed: 0,Date,Song,Artist,Rank,Last Week,Peak Position,Weeks in Charts,Image URL,track_id,genres
0,1958-08-06,Poor Little Fool,Ricky Nelson,1,1,1,2,#,5ayybTSXNwcarDtxQKqvWX,"[rockabilly, doo-wop]"
1,1958-08-06,Nel Blu Dipinto Di Blu (Volare),Domenico Modugno,2,54,2,2,https://charts-static.billboard.com/img/1958/0...,006Ndmw2hHxvnLbJsBFnPx,"[italian singer-songwriter, italo dance, canzo..."
2,1958-08-06,Patricia,Perez Prado And His Orchestra,3,2,2,2,#,4jmFSkpcqLOUN6scGU6BOO,"[mambo, cha cha cha, son cubano, latin jazz, e..."
3,1958-08-06,Splish Splash,Bobby Darin,4,3,3,2,https://charts-static.billboard.com/img/1958/0...,40fD7ct05FvQHLdQTgJelG,"[big band, adult standards, swing music, doo-wop]"
4,1958-08-06,When,Kalin Twins,5,5,5,2,#,3HZJ9BLBpDya4p71VfXSWp,[doo-wop]
...,...,...,...,...,...,...,...,...,...,...
62545,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]
62546,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]
62547,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]
62548,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]


In [6]:
for idx, row in data.iloc[0:50].iterrows():
    song = row['Song']
    artist = row['Artist']
    
    track_id = get_track_id(sp, song, artist)
    data.at[idx, 'spotify_track_id'] = track_id
    
    time.sleep(0.1)  

In [7]:
def chunk_list(lst, n):
    """Dzieli listę lst na kawałki po n elementów."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]


In [8]:
def get_genres_in_batches(sp, data, batch_size=50):
    track_ids = data['spotify_track_id'].tolist()
    results = []

    for chunk in chunk_list(track_ids, batch_size):
        df_chunk = get_genre(sp, chunk)
        results.append(df_chunk)

    return pd.concat(results, ignore_index=True)

# Przykład użycia:
df_genres = get_genres_in_batches(sp, data, 50)
print(df_genres.head())

                 track_id                                             genres
0  5ayybTSXNwcarDtxQKqvWX                              [rockabilly, doo-wop]
1  006Ndmw2hHxvnLbJsBFnPx  [italian singer-songwriter, italo dance, canzo...
2  4jmFSkpcqLOUN6scGU6BOO  [mambo, cha cha cha, son cubano, latin jazz, e...
3  40fD7ct05FvQHLdQTgJelG  [big band, adult standards, swing music, doo-wop]
4  3HZJ9BLBpDya4p71VfXSWp                                          [doo-wop]


In [9]:
data = pd.merge(data, df_genres, left_on='spotify_track_id', right_on='track_id', how='left')

In [10]:
data = data.drop(columns=['spotify_track_id'])

In [11]:
data

Unnamed: 0,Date,Song,Artist,Rank,Last Week,Peak Position,Weeks in Charts,Image URL,track_id,genres
0,1958-08-06,Poor Little Fool,Ricky Nelson,1,1,1,2,#,5ayybTSXNwcarDtxQKqvWX,"[rockabilly, doo-wop]"
1,1958-08-06,Nel Blu Dipinto Di Blu (Volare),Domenico Modugno,2,54,2,2,https://charts-static.billboard.com/img/1958/0...,006Ndmw2hHxvnLbJsBFnPx,"[italian singer-songwriter, italo dance, canzo..."
2,1958-08-06,Patricia,Perez Prado And His Orchestra,3,2,2,2,#,4jmFSkpcqLOUN6scGU6BOO,"[mambo, cha cha cha, son cubano, latin jazz, e..."
3,1958-08-06,Splish Splash,Bobby Darin,4,3,3,2,https://charts-static.billboard.com/img/1958/0...,40fD7ct05FvQHLdQTgJelG,"[big band, adult standards, swing music, doo-wop]"
4,1958-08-06,When,Kalin Twins,5,5,5,2,#,3HZJ9BLBpDya4p71VfXSWp,[doo-wop]
...,...,...,...,...,...,...,...,...,...,...
62545,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]
62546,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]
62547,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]
62548,1958-08-20,Treasure Of Your Love,Eileen Rodgers,100,100,1,-,#,,[]


In [12]:
data.iloc[0]['Artist']

'Ricky Nelson'

In [13]:
area = get_artist_area(data.iloc[0]['Artist'])

Nazwa:        Ricky Nelson
Kraj (area):  United States
MBID:         28d0c272-4d51-4c24-b31f-e20aac2ba7de


In [14]:
area

'United States'