In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from mpl_toolkits.mplot3d import Axes3D
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv('./data/musicdata_cleaned.csv')
df['Length'] = '0:' + df['Length'].astype(str)
df['Length'] = pd.to_timedelta(df['Length'], errors='coerce').dt.total_seconds()
df['Length'] = pd.to_timedelta(df['Length']).dt.total_seconds()
df['Release'] = pd.to_datetime(df['Release'], errors='coerce')
df.dropna(subset=['Release'], inplace=True)


## Sunburst Chart – Energy and Valence Hierarchy

In [8]:
import plotly.express as px
import plotly.io as pio

pio.renderers.default = 'browser'  
df['ValenceLevel'] = pd.cut(df['Valence'], bins=[0, 25, 50, 75, 100], labels=['Low', 'Med-Low', 'Med-High', 'High'])
df['EnergyLevel'] = pd.cut(df['Energy'], bins=[0, 25, 50, 75, 100], labels=['Low', 'Med-Low', 'Med-High', 'High'])
fig = px.sunburst(df, path=['EnergyLevel', 'ValenceLevel'], values='Pop.', title='Energy-Valence Sunburst')
fig.show()

## Radar Chart – Audio Profiles of Songs

In [9]:
sample = df.sample(25, random_state=42)
categories = ['BPM', 'Energy', 'Dance', 'Loud', 'Valence', 'Acoustic']
fig = go.Figure()
for i, row in sample.iterrows():
    fig.add_trace(go.Scatterpolar(r=row[categories].values,
                                  theta=categories,
                                  fill='toself',
                                  name=row['Title']))
fig.update_layout(title='Radar Chart of Audio Profiles', polar=dict(radialaxis=dict(visible=True)), showlegend=True)
fig.show()

## 3D Scatter – BPM vs Dance vs Energy

In [5]:
fig = px.scatter_3d(df, x='BPM', y='Dance', z='Energy', color='Pop.', size='Valence', hover_data=['Title'])
fig.update_layout(title='3D Scatter of BPM, Dance, Energy')
fig.show()