### 0. Imports

In [13]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [14]:
path = "data/spotify_songs.csv"
df = pd.read_csv(path)

df.head()

Unnamed: 0,track_id,track_name,track_artist,lyrics,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,language
0,0017A6SJgTbfQVU2EtsPNo,Pangarap,Barbie's Cradle,Minsan pa Nang ako'y napalingon Hindi ko alam ...,41,1srJQ0njEQgd8w4XSqI4JQ,Trip,2001-01-01,Pinoy Classic Rock,37i9dQZF1DWYDQ8wBxd7xt,...,-10.068,1,0.0236,0.279,0.0117,0.0887,0.566,97.091,235440,tl
1,004s3t0ONYlzxII9PLgU6z,I Feel Alive,Steady Rollin,"The trees, are singing in the wind The sky blu...",28,3z04Lb9Dsilqw68SHt6jLB,Love & Loss,2017-11-21,Hard Rock Workout,3YouF0u7waJnolytf9JCXf,...,-4.739,1,0.0442,0.0117,0.00994,0.347,0.404,135.225,373512,en
2,00chLpzhgVjxs1zKC9UScL,Poison,Bell Biv DeVoe,"NA Yeah, Spyderman and Freeze in full effect U...",0,6oZ6brjB8x3GoeSYdwJdPc,Gold,2005-01-01,"Back in the day - R&B, New Jack Swing, Swingbe...",3a9y4eeCJRmG9p4YKfqYIx,...,-7.504,0,0.216,0.00432,0.00723,0.489,0.65,111.904,262467,en
3,00cqd6ZsSkLZqGMlQCR0Zo,Baby It's Cold Outside (feat. Christina Aguilera),CeeLo Green,I really can't stay Baby it's cold outside I'v...,41,3ssspRe42CXkhPxdc12xcp,CeeLo's Magic Moment,2012-10-29,Christmas Soul,6FZYc2BvF7tColxO8PBShV,...,-5.819,0,0.0341,0.689,0.0,0.0664,0.405,118.593,243067,en
4,00emjlCv9azBN0fzuuyLqy,Dumb Litty,KARD,Get up out of my business You don't keep me fr...,65,7h5X3xhh3peIK9Y0qI5hbK,KARD 2nd Digital Single ‘Dumb Litty’,2019-09-22,K-Party Dance Mix,37i9dQZF1DX4RDXswvP6Mj,...,-1.993,1,0.0409,0.037,0.0,0.138,0.24,130.018,193160,en


### 1. Sentiment analyser

In [15]:
analyzer = SentimentIntensityAnalyzer()

In [16]:
# Function to extract sentiment scores from lyrics
def get_sentiment_score(text):
    if pd.isna(text): 
        return 0  # Neutral if no lyrics
    scores = analyzer.polarity_scores(text)
    return scores['compound']  # Compound score (-1 to 1)

In [17]:
# Apply sentiment analysis to lyrics
df["sentiment_score"] = df["lyrics"].apply(get_sentiment_score)

In [None]:
# Select relevant numerical features (excluding non-numeric ones)
# numeric_features = df.select_dtypes(include=[np.number]).copy()

In [None]:
# Add sentiment scores to features
# numeric_features["sentiment_score"] = df["sentiment_score"]

In [None]:
# Normalize features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(numeric_features)

In [21]:
# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df["cluster"] = kmeans.fit_predict(X_scaled)

In [22]:
# Assign sentiment labels based on cluster means
cluster_means = df.groupby("cluster")["sentiment_score"].mean().sort_values()
sentiment_map = {cluster_means.index[0]: "angry",  # Most negative sentiment
                 cluster_means.index[1]: "relaxed",  
                 cluster_means.index[2]: "happy"}  # Most positive sentiment

In [30]:
df["predicted_sentiment"] = df["cluster"].map(sentiment_map)

# Save results
df.to_csv("results/songs_with_sentiments.csv", index=False)

df[["lyrics", "sentiment_score"]].head(20)

Unnamed: 0,lyrics,sentiment_score
0,Minsan pa Nang ako'y napalingon Hindi ko alam ...,0.0
1,"The trees, are singing in the wind The sky blu...",0.9838
2,"NA Yeah, Spyderman and Freeze in full effect U...",-0.9984
3,I really can't stay Baby it's cold outside I'v...,0.8654
4,Get up out of my business You don't keep me fr...,-0.9993
5,"Hold your breath, don't look down, keep trying...",0.8234
6,All I want is somebody who's gonna love me for...,0.9983
7,Feels good Everybody Tender lover Tender love ...,0.9986
8,"Don't run away, it's getting colder Our hearts...",0.9936
9,Ho una cosa da dirti da tempo Ma non ho mai t...,-0.5267


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load dataset (replace with your actual CSV file)
df = pd.read_csv("data/spotify_songs.csv")

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to extract sentiment scores from lyrics
def get_sentiment_score(text):
    if pd.isna(text) or text.strip() == "":
        return 0  # Neutral if no lyrics
    return analyzer.polarity_scores(text)["compound"]

# Apply sentiment analysis to lyrics
df["sentiment_score"] = df["lyrics"].apply(get_sentiment_score)

# Vectorize lyrics using TF-IDF
vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
X_tfidf = vectorizer.fit_transform(df["lyrics"].fillna(""))

# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df["cluster"] = kmeans.fit_predict(X_tfidf)

# Assign sentiment labels based on sentiment score means per cluster
cluster_means = df.groupby("cluster")["sentiment_score"].mean().sort_values()
sentiment_map = {
    cluster_means.index[0]: "angry",   # Most negative sentiment
    cluster_means.index[1]: "relaxed",  
    cluster_means.index[2]: "happy"    # Most positive sentiment
}

df["predicted_sentiment"] = df["cluster"].map(sentiment_map)

# Save results
df.to_csv("songs_with_sentiments.csv", index=False)

# Show sample results
df[["lyrics", "predicted_sentiment"]].head()


                                              lyrics predicted_sentiment
0  Minsan pa Nang ako'y napalingon Hindi ko alam ...             relaxed
1  The trees, are singing in the wind The sky blu...             relaxed
2  NA Yeah, Spyderman and Freeze in full effect U...             relaxed
3  I really can't stay Baby it's cold outside I'v...             relaxed
4  Get up out of my business You don't keep me fr...             relaxed


In [34]:
df[["lyrics", "predicted_sentiment"]].head(20)


Unnamed: 0,lyrics,predicted_sentiment
0,Minsan pa Nang ako'y napalingon Hindi ko alam ...,relaxed
1,"The trees, are singing in the wind The sky blu...",relaxed
2,"NA Yeah, Spyderman and Freeze in full effect U...",relaxed
3,I really can't stay Baby it's cold outside I'v...,relaxed
4,Get up out of my business You don't keep me fr...,relaxed
5,"Hold your breath, don't look down, keep trying...",relaxed
6,All I want is somebody who's gonna love me for...,happy
7,Feels good Everybody Tender lover Tender love ...,happy
8,"Don't run away, it's getting colder Our hearts...",relaxed
9,Ho una cosa da dirti da tempo Ma non ho mai t...,relaxed
