In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util

import requests
import pandas as pd
import numpy as np
import json
import os
import dotenv
import sys
import tekore as tk
sys.tracebacklimit = 0 # turn off the error tracebacks

from colorthief import ColorThief
from urllib.request import urlopen
import io

In [2]:
dotenv.load_dotenv()


username = 'michael_vaden'

spot_id = os.getenv('spot_id')
spot_secret = os.getenv('spot_secret')
redirect_uri= 'https://www.virginia.edu/'


client_credentials_manager = SpotifyClientCredentials(client_id=spot_id, client_secret=spot_secret)


scope = "playlist-modify-public playlist-modify-private playlist-read-private playlist-read-collaborative user-library-modify"

token = util.prompt_for_user_token(username, scope, spot_id, spot_secret, redirect_uri, show_dialog=True)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager, auth=token)

In [3]:
def get_tracks(user, playlist):
    myjson = sp.user_playlist_tracks(user, playlist)

    tracks = myjson['items']

    while myjson['next']:
        myjson = sp.next(myjson)
        tracks.extend(myjson['items'])

    return pd.json_normalize(tracks)

In [4]:
def get_tracks_from_url(username, url):
    playlist_URI = url.split("/")[-1].split("?")[0]

    return get_tracks(username, playlist_URI)

In [5]:
def add_song_features(df):
    song_features = pd.DataFrame()
    for track in df['track.uri']:
        song_features = pd.concat([song_features, pd.json_normalize(sp.audio_features(track))])

    return df.merge(song_features, left_on='track.uri', right_on='uri')


In [6]:
rainy1 = get_tracks_from_url('spotify', "https://open.spotify.com/playlist/37i9dQZF1DXbvABJXBIyiY?si=e2da83ce834b40c5") # rainy day
rainy2 = get_tracks_from_url('Circles Records', "https://open.spotify.com/playlist/3r82Jvzw3SSGKKiKf3dXMM?si=01242f93e6524a23").loc[:210,] # rainy days music
rainy3 = get_tracks_from_url('tiarafernando', "https://open.spotify.com/playlist/47S4MBG0EEXwA0GdJUA4Ur?si=0e19386208f944c3") # a playlist for rainy days

In [7]:
sunny1 = get_tracks_from_url('spotify', "https://open.spotify.com/playlist/37i9dQZF1DX1BzILRveYHb?si=ac80c28e8a104400") # sunny day
sunny2 = get_tracks_from_url('spotify', "https://open.spotify.com/playlist/37i9dQZF1DXd1MXcE8WTXq?si=8662accbcb644c0e") # summer throwbacks
sunny3 = get_tracks_from_url('spotify', "https://open.spotify.com/playlist/37i9dQZF1DX5Ozry5U6G0d?si=953f1b53a9584ce7") # summer party
sunny4 = get_tracks_from_url('spotify', 'https://open.spotify.com/playlist/37i9dQZF1DX9fZ7amiNVu6?si=bf527d1910ff4453') # feel good summer

In [8]:
rainy = pd.concat([rainy1, rainy2, rainy3]).drop_duplicates('track.uri')
#rainy

In [9]:
sunny = pd.concat([sunny1, sunny2, sunny3, sunny4]).drop_duplicates('track.uri')
#sunny

In [10]:
#rainy.describe()
rainy = rainy[['track.uri', 'track.id', 'track.name', 'track.popularity']].dropna()

In [11]:
#sunny.describe()
sunny = sunny[['track.uri', 'track.id', 'track.name', 'track.popularity']].dropna()

In [16]:
sunny_features = add_song_features(sunny).drop(['id', 'uri', 'track_href', 'analysis_url', 'type'], axis=1)
rainy_features = add_song_features(rainy).drop(['id', 'uri', 'track_href', 'analysis_url', 'type'], axis=1)

In [17]:
sunny_features['weather_type'] = 'sun'
rainy_features['weather_type'] = 'rain'
weather_songs = pd.concat([sunny_features, rainy_features])
weather_songs

Unnamed: 0,track.uri,track.id,track.name,track.popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,weather_type
0,spotify:track:3ZpQiJ78LKINrW9SQTgbXd,3ZpQiJ78LKINrW9SQTgbXd,All I Wanna Do,74.0,0.820,0.528,9,-11.179,1,0.0321,0.1110,0.018600,0.2570,0.9310,120.091,272107,4,sun
1,spotify:track:3XKIUb7HzIF1Vu9usunMzc,3XKIUb7HzIF1Vu9usunMzc,Maria Maria (feat. The Product G&B),83.0,0.777,0.601,2,-5.931,1,0.1260,0.0406,0.002010,0.0348,0.6800,97.911,261973,4,sun
2,spotify:track:0ofHAoxe9vBkTCp2UQIavz,0ofHAoxe9vBkTCp2UQIavz,Dreams - 2004 Remaster,87.0,0.828,0.492,0,-9.744,1,0.0276,0.0644,0.004280,0.1280,0.7890,120.151,257800,4,sun
3,spotify:track:0bRXwKfigvpKZUurwqAlEh,0bRXwKfigvpKZUurwqAlEh,Lovely Day,81.0,0.692,0.651,9,-8.267,1,0.0324,0.2920,0.002410,0.1050,0.7060,97.923,254560,4,sun
4,spotify:track:1YLJVmuzeM2YSUkCCaTNUB,1YLJVmuzeM2YSUkCCaTNUB,Dog Days Are Over,79.0,0.492,0.810,7,-5.315,1,0.0847,0.0416,0.003790,0.1170,0.2450,149.954,251840,4,sun
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,spotify:track:39sDitIeCMrVX2QyXHY46t,39sDitIeCMrVX2QyXHY46t,Blue Hair,86.0,0.751,0.720,4,-6.376,1,0.0303,0.5540,0.049700,0.2580,0.8840,135.730,217000,4,rain
387,spotify:track:3Q4zjvPKaxkGUo0MTdrbir,3Q4zjvPKaxkGUo0MTdrbir,Stereotypes,34.0,0.677,0.475,11,-6.113,1,0.0308,0.5320,0.000000,0.2030,0.6120,117.822,168424,4,rain
388,spotify:track:1MHb9ROmrgyGvPXx68i04u,1MHb9ROmrgyGvPXx68i04u,Vulnerable,44.0,0.598,0.183,9,-13.487,1,0.0304,0.5350,0.000062,0.1290,0.0796,137.035,155146,4,rain
389,spotify:track:1jCcfSHdXosT54e64GFRE1,1jCcfSHdXosT54e64GFRE1,Deceive Me So Easy,3.0,0.400,0.457,9,-10.543,0,0.0404,0.3680,0.006790,0.3150,0.1390,77.182,238213,4,rain


In [30]:
weather_for_ML = weather_songs.iloc[:,3:].reset_index()
weather_for_ML['weather_type'] = weather_for_ML['weather_type'].map({'rain':0, 'sun':1})
weather_for_ML

Unnamed: 0,index,track.popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,weather_type
0,0,74.0,0.820,0.528,9,-11.179,1,0.0321,0.1110,0.018600,0.2570,0.9310,120.091,272107,4,1
1,1,83.0,0.777,0.601,2,-5.931,1,0.1260,0.0406,0.002010,0.0348,0.6800,97.911,261973,4,1
2,2,87.0,0.828,0.492,0,-9.744,1,0.0276,0.0644,0.004280,0.1280,0.7890,120.151,257800,4,1
3,3,81.0,0.692,0.651,9,-8.267,1,0.0324,0.2920,0.002410,0.1050,0.7060,97.923,254560,4,1
4,4,79.0,0.492,0.810,7,-5.315,1,0.0847,0.0416,0.003790,0.1170,0.2450,149.954,251840,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,386,86.0,0.751,0.720,4,-6.376,1,0.0303,0.5540,0.049700,0.2580,0.8840,135.730,217000,4,0
697,387,34.0,0.677,0.475,11,-6.113,1,0.0308,0.5320,0.000000,0.2030,0.6120,117.822,168424,4,0
698,388,44.0,0.598,0.183,9,-13.487,1,0.0304,0.5350,0.000062,0.1290,0.0796,137.035,155146,4,0
699,389,3.0,0.400,0.457,9,-10.543,0,0.0404,0.3680,0.006790,0.3150,0.1390,77.182,238213,4,0


In [31]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(weather_for_ML, test_size=0.2, random_state=42)

test_set, validation_set = train_test_split(test_set, test_size = 0.5, random_state=42)

X_train = train_set.drop("weather_type", axis=1)
y_train = train_set["weather_type"].copy()

X_test = test_set.drop("weather_type", axis=1)
y_test = test_set["weather_type"].copy()

X_validation = validation_set.drop("weather_type", axis=1)
y_validation = validation_set["weather_type"].copy()

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(560, 15)
(70, 15)
(560,)
(70,)


In [32]:
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor(random_state=42)
tree_reg.fit(X_train, y_train)
y_predict_DT = tree_reg.predict(X_test)
tree_mse = mean_squared_error(y_test, y_predict_DT)
tree_rmse = np.sqrt(tree_mse)
tree_rmse

0.4140393356054125

In [36]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_predict_DT, y_test)

array([[30,  7],
       [ 5, 28]])

In [37]:
from sklearn.metrics import classification_report
 
print(classification_report(y_test, y_predict_DT))

              precision    recall  f1-score   support

           0       0.81      0.86      0.83        35
           1       0.85      0.80      0.82        35

    accuracy                           0.83        70
   macro avg       0.83      0.83      0.83        70
weighted avg       0.83      0.83      0.83        70

