In [3]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Load your dataset
df = pd.read_csv('dataset/Spotify-2000.csv')  # Adjust this to your dataset path

# Assuming 'Title' and 'Top Genre' are columns with the song titles and genres respectively
df_numeric = df.select_dtypes(include=['number'])  # Only select numeric columns for scaling

# Standardize the data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_numeric)
df_scaled = pd.DataFrame(df_scaled, columns=df_numeric.columns)

# Add the song titles and genres back into the scaled DataFrame
df_scaled[['Title', 'Top Genre']] = df[['Title', 'Top Genre']]  # Make sure 'Title' and 'Top Genre' are the correct column names

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Interactive K-Means Clustering with Song Playlists"),
    html.Div([
        dcc.Dropdown(
            id='x-axis',
            options=[{'label': i, 'value': i} for i in df_scaled.columns if i not in ['Title', 'Top Genre']],
            value='Energy',
            style={'width': '30%'}
        ),
        dcc.Dropdown(
            id='y-axis',
            options=[{'label': i, 'value': i} for i in df_scaled.columns if i not in ['Title', 'Top Genre']],
            value='Danceability',
            style={'width': '30%'}
        ),
        dcc.Dropdown(
            id='z-axis',
            options=[{'label': i, 'value': i} for i in df_scaled.columns if i not in ['Title', 'Top Genre']],
            value='Valence',
            style={'width': '30%'}
        ),
        dcc.Input(
            id='num-clusters',
            type='number',
            value=3,
            min=1,
            max=10,
            step=1,
            style={'width': '10%'}
        )
    ], style={'display': 'flex', 'justifyContent': 'space-around'}),
    dcc.Graph(id='cluster-graph'),
    html.Div(id='playlists-container')
])

# Callback to update graph and display playlists and radar charts
@app.callback(
    [Output('cluster-graph', 'figure'),
     Output('playlists-container', 'children')],
    [Input('x-axis', 'value'),
     Input('y-axis', 'value'),
     Input('z-axis', 'value'),
     Input('num-clusters', 'value')]
)
def update_graph_and_playlists(x_col, y_col, z_col, n_clusters):
    # Perform K-means clustering
    kmeans = KMeans(n_clusters=n_clusters, n_init=10)
    clusters = kmeans.fit_predict(df_scaled[[x_col, y_col, z_col]])
    df_scaled['cluster'] = clusters

    # Create a 3D scatter plot
    fig = px.scatter_3d(df_scaled, x=x_col, y=y_col, z=z_col, color='cluster',
                        title='K-means Clustering Visualization',
                        labels={x_col: x_col, y_col: y_col, z_col: z_col})

    # Define colors for playlists
    colors = ['#FF6347', '#4682B4', '#32CD32', '#FFD700', '#FF69B4', '#6A5ACD', '#20B2AA', '#FF4500', '#DA70D6', '#BDB76B']
    colors = colors[:n_clusters]  # Limit the list of colors to the number of clusters

    # Prepare playlists and radar charts
    playlist_divs = []
    for i in range(n_clusters):
        cluster_data = df_scaled[df_scaled['cluster'] == i]
        mean_values = cluster_data[df_numeric.columns].mean().tolist()
        radar_chart = go.Figure(data=go.Scatterpolar(
            r=mean_values,
            theta=df_numeric.columns,
            fill='toself'
        ))
        radar_chart.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[-3, 3]
                )
            ),
            showlegend=False
        )

        cluster_songs = cluster_data[['Title', 'Top Genre']].sample(n=5, random_state=1)
        songs_list = html.Ul([
            html.Li(f"{row['Title']} ({row['Top Genre']})", style={'color': colors[i]}) for idx, row in cluster_songs.iterrows()
        ])
        radar_html = dcc.Graph(figure=radar_chart, style={'width': '400px', 'height': '400px'})

        # Style the div
        playlist_div = html.Div([
            html.Div([html.H3(f'Playlist for Cluster {i + 1}', style={'color': colors[i]}), songs_list], style={'width': '50%', 'display': 'inline-block'}),
            html.Div([radar_html], style={'width': '50%', 'display': 'inline-block'})
        ], style={'background': '#f9f9f9', 'margin': '10px', 'padding': '10px', 'border-radius': '10px', 'box-shadow': '2px 2px 10px #aaa'})

        playlist_divs.append(playlist_div)

    return fig, playlist_divs

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8050)


---------------------------------------------------------------------------
InvalidParameterError                     Traceback (most recent call last)
Cell In[3], line 75, in update_graph_and_playlists(
    x_col='Index',
    y_col='Acousticness',
    z_col='Danceability',
    n_clusters=None
)
     64 @app.callback(
     65     [Output('cluster-graph', 'figure'),
     66      Output('playlists-container', 'children')],
   (...)
     72 def update_graph_and_playlists(x_col, y_col, z_col, n_clusters):
     73     # Perform K-means clustering
     74     kmeans = KMeans(n_clusters=n_clusters, n_init=10)
---> 75     clusters = kmeans.fit_predict(df_scaled[[x_col, y_col, z_col]])
        kmeans = KMeans(n_clusters=None, n_init=10)
        df_scaled =          Index      Year  Beats Per Minute (BPM)    Energy  Danceability  \
0    -1.731182  0.683156                1.312739 -1.340009     -0.015521   
1    -1.729445  0.434894                0.527616  0.872304     -0.210991   
2    -1.727708