In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time
from sqlalchemy import create_engine

In [2]:
from config import SPOTIPY_CLIENT_ID, SPOTIPY_CLIENT_SECRET, user_info, connection_string

In [3]:
#Set up Spotify credentials
client_credentials_manager = SpotifyClientCredentials(SPOTIPY_CLIENT_ID, SPOTIPY_CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
#Set up Hot 100 csv
hot100 = pd.read_csv('charts.csv')

#Empty Hot 100 cells?
hot100.isnull().sum()

In [None]:
# Clean DataFrame
hot100 = hot100.drop(columns = ['last-week', 'peak-rank'])
hot100.head()

In [None]:
hot100.dtypes

In [None]:
# Transform date into a date-time value
hot100["date"] = pd.to_datetime(hot100["date"])
hot100.dtypes

In [None]:
# Sort by date
hot100 = hot100.sort_values(by = 'date', ascending = False)
hot100.head()

In [None]:
# Get the desired timeframe
selecthot100 = hot100.loc[(hot100['date'] >= '2020-01-01') & (hot100['date'] <='2020-12-31')]
selecthot100 = selecthot100.head(10)

In [None]:
def getTrackFeatures(song_id):
    meta = sp.track(song_id)
    features = sp.audio_features(song_id)
    
    track= {
        # Meta data
        "song": meta['name'],
        "album": meta['album']['name'],
        "artist": meta['album']['artists'][0]['name'],
        "release_date": meta['album']['release_date'],
        "length": meta['duration_ms'],
        "popularity": meta['popularity'],

        # features
        "acousticness": features[0]['acousticness'],
        "danceability": features[0]['danceability'],
        "energy": features[0]['energy'],
        "instrumentalness": features[0]['instrumentalness'],
        "liveness": features[0]['liveness'],
        "loudness": features[0]['loudness'],
        "speechiness": features[0]['speechiness'],
        "tempo": features[0]['tempo']
    }
    
    return track

In [None]:
def searchSongs(title, artist):
    cleaned_artist = artist.replace(" Featuring", "")
    results = sp.search(q=f'track:{title} artist:{cleaned_artist}')
    top_result = results['tracks']['items'][0]
    hot_song_features = getTrackFeatures(top_result['id'])
    return hot_song_features

In [None]:
def getSongData(row):
    # Sleep to manage API calls for rate limitations
    time.sleep(1)
    song_details = searchSongs(row['song'], row['artist'])
    song_details['popular_date'] = row['date']
    song_details['ranking'] = row['rank']
    song_details['weeks_on_board'] = row['weeks-on-board']
    return song_details

In [None]:
hot_song_features = [getSongData(song) for idx, song in selecthot100.iterrows()]

In [None]:
cleaned_hot_songs = pd.DataFrame(data = hot_song_features)
cleaned_hot_songs

In [None]:
# Connect to Postgres
db_string = f'postgresql://{user_info}@{connection_string}'
engine = create_engine(db_string)

In [None]:
# Send to Postgres
cleaned_hot_songs.to_sql('popular_songs', con = engine, if_exists = 'replace', index = False, chunksize=5200)