# Spotify Songs'Decade Classification Dataset

**Import**

In [1]:
import warnings
warnings.filterwarnings("ignore")
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np

*Spotify Credentials and Authentication*

In [2]:
CLIENT_ID= ''
SECRET_CLIENT_ID = ''

spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=CLIENT_ID,client_secret=SECRET_CLIENT_ID))


## Track Class

In [3]:
class Track:
    def __init__(self,uri,name,decade):
        self.uri = uri
        self.name = name
        self.decade = decade
    
    def __eq__(self, other):
        if not isinstance(other,Track):
            return False
        return (set(self.name) == set(other.name) and
                set(self.uri) == set(other.uri))
    
    def __hash__(self):
        return hash((self.name,self.uri))
    
    def __str__(self):
        return f'Song: {self.name}, Decade: {self.decade}'

## Functions to build dataset

### Find Spotify playlists by decade

In [4]:
def search_playlists_by_decade(decade,spotify):
    query = f'{decade} HITS'
    try:
        results = spotify.search(query, type='playlist', limit=50) #Only 5 playlists, can be increased
    except Exception as e:
        print(e)
    res = []
    playlists = results['playlists']['items']
    for playlist in playlists:
        playlist_uri = playlist['uri']
        playlist_name = playlist['name']
        playlist_id = playlist['id']
        print(playlist_name)
        res.append({'name':playlist_name,'id':playlist_id,'uri':playlist_uri})
    return(res)


In [5]:
decade = '2010s' #Cambiare qui per cercare altre decadi
playlists =search_playlists_by_decade(decade,spotify) #potrebbe dare errore 404 riprovate a stecca finchè non va

2010s HITS | TOP 100 SONGS 🔥
Party Hits 2010s
2010s Hits
The Party Hits of the 2010s
2010s Hits Workout
Summer Hits 2023
2010s Hits | Spotify
All Out 2010s
2010s Hits Clean
Party Hits
2010s Hits Essentials
Top Hits of 2010
POP LATINO : 90s, 2000s & 2010s Hits
I Love My '10s R&B
Pop Inglés 🌼 (2023 & 2020s & 2010s) Música En Inglés 2010s Hits 
Party Hits 2000s
2000's and 2010's Hits
I Love My 2010s Hip-Hop
best of 2010s hits
Summer Hits of the 00s
Mid & Late 2010's Hits
10s Ballads
Hindi 2010s Hits
10s Latino
Tamil 2010s Hits
Headliners
Remember 2010s Hits
The Party Hits of the 2010s
Gym Teacher Playlist (early 2010s  hits) 🕺🏻
Dance Hits 2010s
Musica latina vieja 2000s & 2010s Hits | Pop latino 2023 | Pop viejo 
Party Hits 2010's
2000s-2010s hits
Top Party Hits of 2022
Greatest Hits 2010-2019 | Best of 2010s Hits
10s Pop Rock
tiktok throwbacks 💽 80s 90s 00s 2010s hits
Latin Hits 2010
POP inglés 🎶🔥 (2020s & 2000s ) Músicas En Inglés 2010s Hits  
Good Feeling 10s
2000s-2010s hits that are 

### Get all the tracks from a list of Spotify playlists

In [6]:
def get_all_tracks_from_playlist_uri(playlist_id,decade,spotify,tracks_to_insert):
    response_items= spotify.playlist_tracks(playlist_id)['items']
    for item in response_items:
        track = item['track']
        tr = Track(track['uri'],track['name'],decade)
        tracks_to_insert.add(tr)


def get_all_playlists_tracks(playlists,decade,spotify):
    tracks_to_insert=set([])
    for pl in playlists:
        #print(pl)
        get_all_tracks_from_playlist_uri(pl['id'],decade,spotify,tracks_to_insert)
    return tracks_to_insert

In [7]:
tracks_to_insert = get_all_playlists_tracks(playlists,decade,spotify)

In [8]:
len(tracks_to_insert)

2852

In [9]:
print(*tracks_to_insert)

Song: Beam Me Up - Radio Edit, Decade: 2010s Song: Summertime Sadness, Decade: 2010s Song: I AM WOMAN, Decade: 2010s Song: COCO LOCO, Decade: 2010s Song: We Are Never Ever Getting Back Together (Taylor's Version), Decade: 2010s Song: Rush, Decade: 2010s Song: Revolution (feat. Faustix & Imanos and Kai), Decade: 2010s Song: Stamp On The Ground, Decade: 2010s Song: Party Rock Anthem, Decade: 2010s Song: Play That Song, Decade: 2010s Song: Worth It (feat. Kid Ink), Decade: 2010s Song: Price Tag, Decade: 2010s Song: Un Siglo Sin Ti, Decade: 2010s Song: Way down We Go, Decade: 2010s Song: Cuando nadie me ve, Decade: 2010s Song: Yo x Ti, Tu x Mi, Decade: 2010s Song: Astronaut, Decade: 2010s Song: Brave, Decade: 2010s Song: Quevedo: Bzrp Music Sessions, Vol. 52, Decade: 2010s Song: Char Baj Gaye (From "F.A.L.T.U") - Party Abhi Baaki Hai, Decade: 2010s Song: Can I Have This Dance, Decade: 2010s Song: Habits (Stay High), Decade: 2010s Song: Girls Like (feat. Zara Larsson), Decade: 2010s Song: D

 Song: Sugar, Decade: 2010s Song: Lord Pretty Flacko Jodye 2 (LPFJ2), Decade: 2010s Song: La Mordidita (feat. Yotuel), Decade: 2010s Song: Wine Pon You (feat. Konshens), Decade: 2010s Song: I Don't Care (with Justin Bieber), Decade: 2010s Song: Veera, Decade: 2010s Song: Who Told You (feat. Drake), Decade: 2010s Song: One More Time, Decade: 2010s Song: Thumbi Penne, Decade: 2010s Song: She Don't (feat. Ty Dolla $Ign), Decade: 2010s Song: Pierre, Decade: 2010s Song: Get up - French Edit, Decade: 2010s Song: Am I Wrong, Decade: 2010s Song: We R Who We R, Decade: 2010s Song: The Spectre, Decade: 2010s Song: Idhazhin Oram - The Innocence of Love, Decade: 2010s Song: Teenage Dirtbag, Decade: 2010s Song: Hotel Room Service, Decade: 2010s Song: Amante Bandido, Decade: 2010s Song: Bitch Better Have My Money, Decade: 2010s Song: Start A Riot, Decade: 2010s Song: Tanto la Queria, Decade: 2010s Song: Soulman - New English Version, Decade: 2010s Song: All I Do Is Win (feat. T-Pain, Ludacris, Snoop

### Get all the audio featurees from a list of Spotify tracks

In [10]:
def get_audio_features(track,spotify):
    try:
        audio_features = spotify.audio_features(track.uri)[0]
        #print(audio_features)
        if(audio_features is None):
            return None
        df_row ={
            'track_uri':track.uri,'track_name':track.name,'decade':track.decade,
            'danceability':audio_features['danceability'],
            'energy':audio_features['energy'],
            'key':audio_features['key'],
            'loudness':audio_features['loudness'],
            'mode':audio_features['mode'],
            'speechiness':audio_features['speechiness'],
            'acousticness':audio_features['acousticness'],
            'instrumentalness':audio_features['instrumentalness'],
            'liveness':audio_features['liveness'],
            'valence':audio_features['valence'],
            'tempo':audio_features['tempo']
            }
        return df_row
    except Exception as e:
        print(e)
       

def audio_features_all_tracks(tracks,spotify):
    return pd.DataFrame([x for x in list(map(lambda x: get_audio_features(x,spotify),tracks)) if x is not None])

In [11]:
tracks_to_insert= audio_features_all_tracks(tracks_to_insert,spotify)

### Concat new df with old one 

DECADI FATTE:\
    -- 00\
    -- 90\
    -- 80\
    -- 70\
    -- 60 

In [13]:
spotify_songs = pd.read_csv('spotify_songs_or.csv')

In [14]:
spotify_songs = pd.concat([spotify_songs,tracks_to_insert])

In [15]:
spotify_songs

Unnamed: 0,track_uri,track_name,decade,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,spotify:track:21qnJAMtzC6S5SESuqQLEK,Pump Up The Jam,90s,0.885,0.844,10,-9.225,0,0.0733,0.014700,0.000004,0.0494,0.715,124.602
1,spotify:track:0eVZhPIexAYqdwKNqSp3Qm,All Outta Angst,90s,0.637,0.969,0,-4.682,1,0.0432,0.004730,0.001480,0.0428,0.940,100.260
2,spotify:track:4Dm32oO01YpIubCHaAtKkN,My Life Would Suck Without You,90s,0.526,0.882,9,-4.006,1,0.0509,0.001400,0.000000,0.1440,0.424,144.982
3,spotify:track:0EQknhinIfbJ12hwMGjOY5,Walkin' On The Sun,90s,0.735,0.974,6,-4.636,1,0.0318,0.430000,0.000000,0.1450,0.967,123.290
4,spotify:track:5KuuCvS8wK8euQUmH0Zfox,54,90s,0.793,0.681,6,-6.562,0,0.0477,0.021500,0.000612,0.0863,0.792,111.011
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2847,spotify:track:2atYprr9JsQaLynGhrWhld,Heartbeat,2010s,0.458,0.821,9,-5.076,1,0.0334,0.000594,0.000095,0.1610,0.323,144.460
2848,spotify:track:1D066zixBwqFYqBhKgdPzp,Fergalicious,2010s,0.906,0.583,8,-7.721,0,0.3170,0.056900,0.000000,0.1260,0.829,129.056
2849,spotify:track:7aE1vrBiMH5CNQj7uvt3X3,Womanizer,2010s,0.717,0.690,11,-5.236,0,0.0704,0.066100,0.000000,0.1090,0.242,139.005
2850,spotify:track:4k6Uh1HXdhtusDW5y8Gbvy,Bad Habit,2010s,0.686,0.494,1,-7.093,1,0.0355,0.613000,0.000058,0.4020,0.700,168.946


In [16]:
spotify_songs.to_csv('spotify_songs_new.csv',header=True,index=False)

### Remove duplicate

In [17]:
spotify_songs = pd.read_csv('spotify_songs_new.csv')

In [18]:
df_mask  = spotify_songs['track_uri'].duplicated(keep=False)

In [19]:
spotify_songs_unique = spotify_songs[~df_mask]

In [20]:
spotify_songs_unique['decade'] = spotify_songs_unique['decade'].apply(lambda x : '10s' if(x=='2010s') else x )

In [21]:
spotify_songs_unique.to_csv('spotify_songs_new.csv',header=True,index=False)