In [24]:
import pandas as pd
import altair as alt

Enable large dataset support

In [31]:
alt.data_transformers.enable("vegafusion")
alt.renderers.enable("default")

Load and sample dataset

In [44]:
df = pd.read_csv("dataset.csv")
df_melted = df.melt(
    id_vars=["popularity"],
    value_vars=["danceability", "energy"],
    var_name="Feature",
    value_name="Value"
)

Sample data for clarity

In [45]:
df_melted = df_melted.sample(2000, random_state=42)

Base Scatter Plot

In [46]:
base = (
    alt.Chart(df_melted)
    .mark_circle(size=40, opacity=0.4)
    .encode(
        x=alt.X("Value", title="Feature Value"),
        y=alt.Y("popularity", title="Popularity"),
        color=alt.Color("Feature", legend=None),
        tooltip=["Feature", "Value", "popularity"]
    )
)

Trend lines

In [47]:
trend = (
    alt.Chart(df_melted)
    .transform_regression("Value", "popularity", groupby=["Feature"])
    .mark_line(size=3)
    .encode(x="Value", y="popularity", color="Feature")
)

Combine and facet for clarity

In [48]:
final_chart = (base + trend).facet(column="Feature").properties(
    title="Danceability and Energy vs Popularity (sampled, with trend lines)"
).interactive()

In [49]:
final_chart