<a href="https://colab.research.google.com/github/shrutiso05/Clustering_Music_Genres_using_k-means-/blob/main/Clustering_Music_Genres_With_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [61]:
import pandas as pd
import numpy as np
from sklearn import cluster
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px




#Data preprocessor


In [62]:
data = pd.read_csv("Spotify-2000.csv")
print(data.head())

   Index                   Title             Artist            Top Genre  \
0      1                 Sunrise        Norah Jones      adult standards   
1      2             Black Night        Deep Purple           album rock   
2      3          Clint Eastwood           Gorillaz  alternative hip hop   
3      4           The Pretender       Foo Fighters    alternative metal   
4      5  Waitin' On A Sunny Day  Bruce Springsteen         classic rock   

   Year  Beats Per Minute (BPM)  Energy  Danceability  Loudness (dB)  \
0  2004                     157      30            53            -14   
1  2000                     135      79            50            -11   
2  2001                     168      69            66             -9   
3  2007                     173      96            43             -4   
4  2002                     106      82            58             -5   

   Liveness  Valence Length (Duration)  Acousticness  Speechiness  Popularity  
0        11       68          

## Drop unnecessary columns


In [63]:
data = data.drop("Index", axis=1)



# Extract relevant features for clustering

In [64]:
features = ["Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness", "Speechiness"]
data2 = data[features]

## Handle missing values (if any)

In [65]:
data2 = data2.dropna()

## Scale the features

In [66]:
scaler = MinMaxScaler()
data2_scaled = scaler.fit_transform(data2)

# Perform K-means clustering

In [67]:
kmeans = KMeans(n_clusters=10, random_state=42)
clusters = kmeans.fit_predict(data2_scaled)

# Add the cluster labels to the original dataset

In [68]:
data["Music Segments"] = clusters

# Optional: Map cluster numbers to cluster names for readability

In [69]:
data["Music Segments"] = data["Music Segments"].map({
    0: "Cluster 1", 1: "Cluster 2", 2: "Cluster 3", 3: "Cluster 4",
    4: "Cluster 5", 5: "Cluster 6", 6: "Cluster 7", 7: "Cluster 8",
    8: "Cluster 9", 9: "Cluster 10"
})

# Visualize clusters using 3D scatter plot

In [71]:
PLOT = go.Figure()

for cluster in data["Music Segments"].unique():
    PLOT.add_trace(go.Scatter3d(
        x=data[data["Music Segments"] == cluster]['Beats Per Minute (BPM)'],
        y=data[data["Music Segments"] == cluster]['Energy'],
        z=data[data["Music Segments"] == cluster]['Danceability'],
        mode='markers',
        marker=dict(size=6, line=dict(width=1)),
        name=str(cluster)
    ))

PLOT.update_traces(hovertemplate='BPM: %{x}<br>Energy: %{y}<br>Danceability: %{z}')
PLOT.update_layout(
    width=800, height=800, autosize=True, showlegend=True,
    scene=dict(
        xaxis=dict(title='Beats Per Minute (BPM)', titlefont_color='black'),
        yaxis=dict(title='Energy', titlefont_color='black'),
        zaxis=dict(title='Danceability', titlefont_color='black')
    ),
    font=dict(family="Arial", color='black', size=12)
)

PLOT.show()