In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
import os

zip_path = "/content/drive/MyDrive/Moodify/Moodify.zip"
extract_path = "/content/drive/MyDrive/Moodify/278k_dataset"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Files in extracted folder:", os.listdir(extract_path))


Files in extracted folder: ['278k_labelled_uri.csv', '278k_song_labelled.csv']


In [None]:
import pandas as pd
import os

extract_path = "/content/drive/MyDrive/Moodify/278k_dataset"

# Load URI dataset
uri_df = pd.read_csv(os.path.join(extract_path, "278k_labelled_uri.csv"))

# Load song metadata dataset (if needed)
song_df = pd.read_csv(os.path.join(extract_path, "278k_song_labelled.csv"))


In [None]:
print("URI DF columns:", uri_df.columns)
print("Song DF columns:", song_df.columns)

URI DF columns: Index(['Unnamed: 0.1', 'Unnamed: 0', 'duration (ms)', 'danceability', 'energy',
       'loudness', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'spec_rate', 'labels', 'uri'],
      dtype='object')
Song DF columns: Index(['Unnamed: 0', 'duration (ms)', 'danceability', 'energy', 'loudness',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'spec_rate', 'labels'],
      dtype='object')


In [None]:
print("URI DF shape:", uri_df.shape)
print("Song DF shape:", song_df.shape)

URI DF shape: (277938, 15)
Song DF shape: (277938, 13)


In [None]:
# URI dataset
print("URI DF label counts:")
print(uri_df['labels'].value_counts())

# Song dataset
print("\nSong DF label counts:")
print(song_df['labels'].value_counts())



URI DF label counts:
labels
1    106429
0     82058
2     47065
3     42386
Name: count, dtype: int64

Song DF label counts:
labels
1    106429
0     82058
2     47065
3     42386
Name: count, dtype: int64


In [None]:
moodify_label_map = {
    0: "sad",
    1: "happy",
    2: "angry",   # energetic → angry
    3: "calm"
}

In [None]:
uri_df['mood'] = uri_df['labels'].map(moodify_label_map)
print("\nMood distribution:\n", uri_df['mood'].value_counts())


Mood distribution:
 mood
happy    106429
sad       82058
angry     47065
calm      42386
Name: count, dtype: int64


In [None]:
print("\nMissing values:\n", uri_df.isnull().sum())
print("Duplicates before drop:", uri_df.duplicated().sum())


Missing values:
 Unnamed: 0.1        0
Unnamed: 0          0
duration (ms)       0
danceability        0
energy              0
loudness            0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
spec_rate           0
labels              0
uri                 0
mood                0
dtype: int64
Duplicates before drop: 0


In [None]:
pip install spotipy

Collecting spotipy
  Downloading spotipy-2.25.1-py3-none-any.whl.metadata (5.1 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-6.4.0-py3-none-any.whl.metadata (10 kB)
Downloading spotipy-2.25.1-py3-none-any.whl (31 kB)
Downloading redis-6.4.0-py3-none-any.whl (279 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m279.8/279.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis, spotipy
Successfully installed redis-6.4.0 spotipy-2.25.1


In [None]:
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import time  # optional, to avoid hitting rate limits

In [None]:
client_id = "90196bf080544bc8b1a40a989b291c03"
client_secret = "552a5d1183654a6f9f1cb33bee715199"


In [None]:
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)


In [None]:
def get_song_info(uri):
    try:
        track = sp.track(uri)
        return {
            'name': track['name'],
            'artist': track['artists'][0]['name'],
            'album': track['album']['name'],
            'url': track['external_urls']['spotify']
        }
    except Exception as e:
        # If URI is invalid or track not found
        return {'name': None, 'artist': None, 'album': None, 'url': None}


In [None]:
# Preprocessed dataset (lightweight)
final_df = uri_df[['uri', 'mood']]
final_df.to_csv("moodify_light.csv", index=False)

# Later, when predicting for one song:
def fetch_song_details(uri):
    try:
        track = sp.track(uri)
        return {
            'name': track['name'],
            'artist': track['artists'][0]['name'],
            'album': track['album']['name'],
            'url': track['external_urls']['spotify']
        }
    except Exception:
        return None

# Example prediction
sample_uri = final_df['uri'].iloc[0]
song_info = fetch_song_details(sample_uri)
print(song_info)


{'name': 'Way Up (feat. Ava Re)', 'artist': 'Floduxe', 'album': 'Way Up (feat. Ava Re)', 'url': 'https://open.spotify.com/track/3v6sBj3swihU8pXQQHhDZo'}


In [None]:
sample_uri2 = final_df['uri'].iloc[5]  # pick 6th song in dataset
song_info2 = fetch_song_details(sample_uri2)
print("Example 2:", song_info2)

Example 2: {'name': 'The Safety Dance - Video Version', 'artist': 'Men Without Hats', 'album': 'Rhythm of Youth', 'url': 'https://open.spotify.com/track/41MOCUNOgWtaYBFUsGnpZ5'}
