In [18]:
pip install spotipy

Note: you may need to restart the kernel to use updated packages.


In [19]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [20]:
import warnings
# Suppress warnings
warnings.filterwarnings("ignore")

In [21]:
cid = "3aef8f1d3ef540fc9f677ab240127907"
secret = "f1988553f1744abeb9370ff3d7e91387"
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [22]:
playlists = {
    "Happy": "https://open.spotify.com/playlist/37i9dQZF1DXdPec7aLTmlC",
    "Sad": "https://open.spotify.com/playlist/37i9dQZF1DWVV27DiNWxkR",
    "Energetic": "https://open.spotify.com/playlist/37i9dQZF1DWVY4eLfA3XFQ",
    "Relaxing": "https://open.spotify.com/playlist/37i9dQZF1DX4sWSpwq3LiO"
}

In [23]:
data = {
    "track_uri": [], "track_name": [], "artist_uri": [], "artist_info": [],
    "artist_name": [], "artist_pop": [], "artist_genres": [], "album": [],
    "track_pop": [], "mood": []
}
track_features = {
    "danceability": [], "energy": [], "key": [], "loudness": [], "mode": [],
    "speechiness": [], "acousticness": [], "instrumentalness": [], "liveness": [],
    "valence": [], "tempo": [], "id": [], "uri": [], "track_href": [],
    "analysis_url": [], "duration_ms": [], "time_signature": []
}

In [24]:
def extract_playlist_data(playlist_link, mood_label):
    playlist_URI = playlist_link.split("/")[-1].split("?")[0]
    for track in sp.playlist_tracks(playlist_URI)["items"]:
        # URI
        track_uri = track["track"]["uri"]
        data["track_uri"].append(track_uri)
        data["mood"].append(mood_label)

        # Track info
        track_name = track["track"]["name"]
        data["track_name"].append(track_name)

        # Artist info
        artist_uri = track["track"]["artists"][0]["uri"]
        data["artist_uri"].append(artist_uri)
        artist_info = sp.artist(artist_uri)
        data["artist_info"].append(artist_info)

        artist_name = track["track"]["artists"][0]["name"]
        data["artist_name"].append(artist_name)
        data["artist_pop"].append(artist_info["popularity"])
        data["artist_genres"].append(artist_info["genres"])

        # Album and track popularity
        album = track["track"]["album"]["name"]
        data["album"].append(album)
        data["track_pop"].append(track["track"]["popularity"])

        # Audio features
        features = sp.audio_features(track_uri)[0]
        if features:
            for key in track_features.keys():
                track_features[key].append(features[key])

In [25]:
for mood, link in playlists.items():
    extract_playlist_data(link, mood)

In [26]:
df_track_data = pd.DataFrame.from_dict(data)
df_track_features = pd.DataFrame.from_dict(track_features)
df_merged = pd.merge(df_track_data, df_track_features, how="inner", left_on="track_uri", right_on="uri")

In [27]:
df_merged["track_uri"] = df_merged["track_uri"].apply(lambda x: re.findall(r"\w+$", x)[0])
df_merged["artist_uri"] = df_merged["artist_uri"].apply(lambda x: re.findall(r"\w+$", x)[0])
df_merged["uri"] = df_merged["uri"].apply(lambda x: re.findall(r"\w+$", x)[0])

In [28]:
y = df_merged['mood']
features = [
    'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
    'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature'
]
X = df_merged[features]

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [31]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)

In [32]:
y_pred = model.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9714285714285714
              precision    recall  f1-score   support

   Energetic       1.00      1.00      1.00        21
       Happy       0.92      1.00      0.96        24
    Relaxing       1.00      1.00      1.00        17
         Sad       1.00      0.75      0.86         8

    accuracy                           0.97        70
   macro avg       0.98      0.94      0.95        70
weighted avg       0.97      0.97      0.97        70



In [33]:
joblib.dump(model, 'trained_mood_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [34]:
# %%
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# Load the trained model and scaler
model = joblib.load('trained_mood_model.pkl')
scaler = joblib.load('scaler.pkl')

# Spotify API credentials
cid = "3aef8f1d3ef540fc9f677ab240127907"
secret = "f1988553f1744abeb9370ff3d7e91387"
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# %%
def get_audio_features(track_uri):
    """Retrieve audio features for a single track using its URI."""
    features = sp.audio_features(track_uri)[0]
    if features:
        feature_values = [
            features['danceability'], features['energy'], features['key'],
            features['loudness'], features['mode'], features['speechiness'],
            features['acousticness'], features['instrumentalness'],
            features['liveness'], features['valence'], features['tempo'],
            features['duration_ms'], features['time_signature']
        ]
        return feature_values
    else:
        print("No features available for this track.")
        return None

# %%
def predict_mood(track_uri):
    """Predict the mood of a song given its URI."""
    # Get audio features
    features = get_audio_features(track_uri)
    if features:
        # Scale the features
        features_scaled = scaler.transform([features])
        # Predict mood
        predicted_mood = model.predict(features_scaled)[0]
        return predicted_mood
    else:
        return "Could not retrieve features for prediction."

track_uri = "spotify:track:5BYyEjrCDVqtppYcxsXqZs"  
mood = predict_mood(track_uri)
print(f"The predicted mood for the track is: {mood}")


The predicted mood for the track is: Sad
