In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

In [2]:
from config import SPOTIPY_CLIENT_ID, SPOTIPY_CLIENT_SECRET

In [3]:
#Set up Spotify credentials
client_credentials_manager = SpotifyClientCredentials(SPOTIPY_CLIENT_ID, SPOTIPY_CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
#Set up Hot 100 csv
hot100 = pd.read_csv('charts.csv')
hot100.head()

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board
0,2021-06-12,1,Butter,BTS,1.0,1,2
1,2021-06-12,2,Good 4 U,Olivia Rodrigo,2.0,1,3
2,2021-06-12,3,Levitating,Dua Lipa Featuring DaBaby,4.0,2,35
3,2021-06-12,4,Leave The Door Open,Silk Sonic (Bruno Mars & Anderson .Paak),5.0,1,13
4,2021-06-12,5,Save Your Tears,The Weeknd & Ariana Grande,7.0,1,25


In [5]:
#Clean Hot 100 dataframe
hot100.isnull().sum()

date                  0
rank                  0
song                  0
artist                0
last-week         31971
peak-rank             0
weeks-on-board        0
dtype: int64

In [6]:
hot100 = hot100.drop(columns = ['last-week'])
hot100.head()

Unnamed: 0,date,rank,song,artist,peak-rank,weeks-on-board
0,2021-06-12,1,Butter,BTS,1,2
1,2021-06-12,2,Good 4 U,Olivia Rodrigo,1,3
2,2021-06-12,3,Levitating,Dua Lipa Featuring DaBaby,2,35
3,2021-06-12,4,Leave The Door Open,Silk Sonic (Bruno Mars & Anderson .Paak),1,13
4,2021-06-12,5,Save Your Tears,The Weeknd & Ariana Grande,1,25


In [7]:
hot100.value_counts()

date        rank  song                                                              artist                     peak-rank  weeks-on-board
2021-06-12  100   Botella Tras Botella                                              Gera MX + Christian Nodal  60         4                 1
1979-08-04  55    Saturdaynight                                                     Herman Brood               55         4                 1
1979-07-21  37    The Devil Went Down To Georgia                                    The Charlie Daniels Band   37         5                 1
            38    Just When I Needed You Most                                       Randy VanWarmer            4          18                1
            39    If I Said You Have A Beautiful Body Would You Hold It Against Me  Bellamy Brothers           39         9                 1
                                                                                                                                           ..
2000-07-01 

In [8]:
hot100.dtypes

date              object
rank               int64
song              object
artist            object
peak-rank          int64
weeks-on-board     int64
dtype: object

In [9]:
hot100["date"] = pd.to_datetime(hot100["date"])
hot100.dtypes

date              datetime64[ns]
rank                       int64
song                      object
artist                    object
peak-rank                  int64
weeks-on-board             int64
dtype: object

In [10]:
hot100 = hot100.sort_values(by = 'date', ascending = False)
hot100.head()

Unnamed: 0,date,rank,song,artist,peak-rank,weeks-on-board
0,2021-06-12,1,Butter,BTS,1,2
64,2021-06-12,65,amari,J. Cole,5,3
75,2021-06-12,76,What's Next,Drake,1,13
73,2021-06-12,74,4 Da Gang,42 Dugg & Roddy Ricch,67,9
72,2021-06-12,73,Quicksand,Morray,65,17


In [11]:
selecthot100 = hot100.loc[(hot100['date'] >= '2020-01-01') & (hot100['date'] <='2020-12-31')]
selecthot100 = selecthot100.head(10)

In [12]:
def searchSongs(title, artist):
    cleaned_artist = artist.replace(" Featuring", "")
    results = sp.search(q=f'track:{title} artist:{cleaned_artist}')
    top_result = results['tracks']['items'][0]
    song_dict = {'song_title': top_result['name'],
                'song_artist': top_result['artists'][0]['name'],
                'song_id': top_result['id'],
                'popularity': top_result['popularity']}
    return song_dict

In [13]:
#define songs for search
song_artist = pd.DataFrame(data = selecthot100, columns = ['song', 'artist'])

In [14]:
#Create song dicts
hot_song_dicts = [searchSongs(item['song'], item['artist']) for idx, item in song_artist.iterrows()]
hot_song_dicts

[{'song_title': 'Another Day',
  'song_artist': 'Kid Cudi',
  'song_id': '6myUpr3GDR80Dg3zqNTmmG',
  'popularity': 64},
 {'song_title': 'Big, Big Plans',
  'song_artist': 'Chris Lane',
  'song_id': '7yNJCsUH3tXlpQiHSsAc5l',
  'popularity': 71},
 {'song_title': 'Starting Over',
  'song_artist': 'Chris Stapleton',
  'song_id': '3K07bGe8iljQ3mOKArHLDo',
  'popularity': 70},
 {'song_title': 'cowboy like me',
  'song_artist': 'Taylor Swift',
  'song_id': '1XjHRolIXL2M1EEOUsGGR4',
  'popularity': 64},
 {'song_title': 'I Should Probably Go To Bed',
  'song_artist': 'Dan + Shay',
  'song_id': '5ovVcYo2MvjVydFwFyaaqy',
  'popularity': 73},
 {'song_title': 'Here Comes Santa Claus (Right Down Santa Claus Lane) - 1947 Version',
  'song_artist': 'Gene Autry',
  'song_id': '25leEEaz1gIpp7o21Fqyjo',
  'popularity': 42},
 {'song_title': 'long story short',
  'song_artist': 'Taylor Swift',
  'song_id': '2o2sgVJIgFXk8GQjWTgI6U',
  'popularity': 66},
 {'song_title': 'Way Out (feat. Big Sean)',
  'song_ar

In [15]:
def getTrackFeatures(song_id):
    meta = sp.track(song_id)
    features = sp.audio_features(song_id)
    
    track= {
        # Meta data
        "name": meta['name'],
        "album": meta['album']['name'],
        "artist": meta['album']['artists'][0]['name'],
        "release_date": meta['album']['release_date'],
        "length": meta['duration_ms'],
        "popularity": meta['popularity'],

        # features
        "acousticness": features[0]['acousticness'],
        "danceability": features[0]['danceability'],
        "energy": features[0]['energy'],
        "instrumentalness": features[0]['instrumentalness'],
        "liveness": features[0]['liveness'],
        "loudness": features[0]['loudness'],
        "speechiness": features[0]['speechiness'],
        "tempo": features[0]['tempo'],
        "time_signature": features[0]['time_signature'],

    }
    
    return track

In [16]:
# Access song_id to get the rest of the features
hot_song_features = [getTrackFeatures(song_dict['song_id']) for song_dict in hot_song_dicts]
hot_song_features

[{'name': 'Another Day',
  'album': 'Man On The Moon III: The Chosen',
  'artist': 'Kid Cudi',
  'release_date': '2020-12-11',
  'length': 199786,
  'popularity': 64,
  'acousticness': 0.556,
  'danceability': 0.646,
  'energy': 0.758,
  'instrumentalness': 0.00316,
  'liveness': 0.335,
  'loudness': -7.75,
  'speechiness': 0.0708,
  'tempo': 172.995,
  'time_signature': 4},
 {'name': 'Big, Big Plans',
  'album': 'Big, Big Plans',
  'artist': 'Chris Lane',
  'release_date': '2019-06-28',
  'length': 187306,
  'popularity': 71,
  'acousticness': 0.076,
  'danceability': 0.574,
  'energy': 0.58,
  'instrumentalness': 0,
  'liveness': 0.12,
  'loudness': -6.091,
  'speechiness': 0.0278,
  'tempo': 149.974,
  'time_signature': 4},
 {'name': 'Starting Over',
  'album': 'Starting Over',
  'artist': 'Chris Stapleton',
  'release_date': '2020-11-13',
  'length': 240413,
  'popularity': 70,
  'acousticness': 0.452,
  'danceability': 0.638,
  'energy': 0.538,
  'instrumentalness': 0.000415,
  'l

In [17]:
cleaned_hot_songs = pd.DataFrame(data = hot_song_features)
cleaned_hot_songs

Unnamed: 0,name,album,artist,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Another Day,Man On The Moon III: The Chosen,Kid Cudi,2020-12-11,199786,64,0.556,0.646,0.758,0.00316,0.335,-7.75,0.0708,172.995,4
1,"Big, Big Plans","Big, Big Plans",Chris Lane,2019-06-28,187306,71,0.076,0.574,0.58,0.0,0.12,-6.091,0.0278,149.974,4
2,Starting Over,Starting Over,Chris Stapleton,2020-11-13,240413,70,0.452,0.638,0.538,0.000415,0.0695,-8.445,0.0315,89.124,4
3,cowboy like me,evermore,Taylor Swift,2020-12-11,275040,64,0.768,0.604,0.517,0.000155,0.123,-9.014,0.0347,127.967,4
4,I Should Probably Go To Bed,I Should Probably Go To Bed,Dan + Shay,2020-07-31,171826,73,0.634,0.721,0.277,0.0,0.112,-7.128,0.0365,131.935,4
5,Here Comes Santa Claus (Right Down Santa Claus...,Rudolph The Red Nosed Reindeer And Other Chris...,Gene Autry,1947,150266,42,0.799,0.834,0.371,0.0,0.275,-11.99,0.0358,96.628,4
6,long story short,evermore,Taylor Swift,2020-12-11,215920,66,0.66,0.546,0.73,0.179,0.0972,-7.704,0.0417,157.895,4
7,Way Out (feat. Big Sean),Way Out (feat. Big Sean),Jack Harlow,2020-12-09,168906,73,0.114,0.945,0.582,0.0,0.103,-8.357,0.0719,108.025,4
8,dorothea,evermore,Taylor Swift,2020-12-11,225880,64,0.696,0.605,0.488,0.0,0.129,-8.322,0.0264,119.966,4
9,Diamonds,Diamonds,Sam Smith,2020-09-17,213869,77,0.154,0.653,0.667,0.0,0.107,-6.764,0.0423,104.181,4


In [18]:
cleaned_hot_songs.to_csv(path_or_buf = r"Song Features/test.csv", index=False)