In [None]:
import altair as alt
import pandas as pd
import warnings

In [None]:
import altair as alt

songs = pd.read_csv("songs.csv")

# Let's convert milliseconds to minutes
songs["duration_mins"] = songs["duration_ms"]/60000

df_all2k = songs[songs.year >= 2000]
df_2010s = songs[songs.year >= 2010]


artists = df_2010s["artist"].unique()
artists.sort()



df_big3 = df_all2k.loc[(df_all2k["artist"] == "Kendrick Lamar") | (df_all2k["artist"] == "Drake") | (df_all2k["artist"] == "J. Cole")]
# Display the heads of df_big3 and df_2010s side by side


In [None]:
df_2010s.head()

Unnamed: 0,artist,song,duration_ms,explicit,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre,duration_mins
10,Gigi D'Agostino,L'Amour Toujours,238759,False,2011,1,0.617,0.728,7,-7.932,1,0.0292,0.0328,0.0482,0.36,0.808,139.066,pop,3.979317
29,Chicane,Don't Give Up,210786,False,2016,47,0.644,0.72,10,-9.635,0,0.0419,0.00145,0.504,0.0839,0.53,132.017,Dance/Electronic,3.5131
46,Samantha Mumba,Gotta Tell You,201946,False,2018,43,0.729,0.632,0,-8.75,0,0.0279,0.191,0.0,0.166,0.774,109.981,pop,3.365767
60,DJ Ötzi,Hey Baby (Radio Mix),219240,False,2010,58,0.666,0.968,10,-3.196,1,0.046,0.123,0.0,0.347,0.834,135.099,"pop, easy listening, Dance/Electronic",3.654
62,Mariah Carey,Against All Odds (Take A Look at Me Now) (feat...,199480,False,2011,0,0.471,0.514,1,-5.599,1,0.0315,0.584,0.0,0.103,0.373,117.338,"pop, R&B",3.324667


In [None]:
df_big3.head()

Unnamed: 0,artist,song,duration_ms,explicit,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre,duration_mins
954,Drake,Forever,357706,True,2009,73,0.457,0.906,5,-2.278,0,0.342,0.249,0.0,0.182,0.54,104.02,"hip hop, pop, R&B",5.961767
986,Drake,Best I Ever Had,258760,True,2010,54,0.431,0.894,5,-2.673,0,0.33,0.0951,0.0,0.188,0.605,162.161,"hip hop, pop, R&B",4.312667
1071,Drake,Over,233560,True,2010,57,0.325,0.848,7,-5.611,1,0.279,0.0109,0.0,0.124,0.433,100.093,"hip hop, pop, R&B",3.892667
1081,Drake,Find Your Love,208946,False,2010,56,0.625,0.613,6,-6.005,0,0.173,0.0209,0.0,0.0286,0.738,96.033,"hip hop, pop, R&B",3.482433
1172,Drake,Headlines,235986,True,2011,74,0.636,0.566,6,-7.16,0,0.106,0.365,0.000353,0.0917,0.425,151.894,"hip hop, pop, R&B",3.9331


In [None]:
songs2 = songs.copy()
songs2['genre'] = songs2['genre'].str.split(', ')
songs['MainGenre'] = songs2['genre'].str[0].replace('set()', 'other')
genres = songs2.explode('genre').replace('set()', 'other')

genre_popularity = genres.groupby('genre')['popularity'].mean().reset_index()
genre_popularity.columns = ['genre', 'popularity_avg']
genres = genres.merge(genre_popularity, on='genre', how='left')


In [None]:
brush_interval = alt.selection_interval()
warnings.filterwarnings("ignore", category=FutureWarning, module="altair.utils.core")
barChart_years = alt.Chart(data=songs, title='Songs per Year').mark_bar().encode(
    x=alt.X("year:O", title="Year", axis=alt.Axis(labelFontSize=12, titleFontSize=12)),
    y=alt.Y("count(song)", title="Number of Songs", scale=alt.Scale(zero=False), axis=alt.Axis(labelFontSize=12, titleFontSize=14))
).properties(
    width=500, 
    height=250
).add_params(brush_interval)

intervalChart = alt.Chart(songs, title='Danceability and Valence').mark_circle().encode(
    x=alt.X('danceability', title='Danceability', axis=alt.Axis(labelFontSize=12, titleFontSize=14)),
    y=alt.Y('valence', title='Valence', axis=alt.Axis(labelFontSize=12, titleFontSize=14)),
    color=alt.condition(brush_interval, "MainGenre:N", alt.value("lightgray")),
    tooltip=["artist", "song", "genre"]
).transform_filter(brush_interval).properties(width=1000)

# Create the bar chart for genres
barChart_genre = alt.Chart(genres, title='Songs per Genre').mark_bar().encode(
    x=alt.X("genre:O", title="Genre", axis=alt.Axis(labelFontSize=12, titleFontSize=12)),
    y=alt.Y("count(song)", title="Number of Songs", scale=alt.Scale(zero=False), axis=alt.Axis(labelFontSize=12, titleFontSize=14)),
    color=alt.Color('popularity_avg:Q', scale=alt.Scale(scheme='viridis'), title="Average Popularity")
).properties(
    width=500, 
    height=250
).add_params(brush_interval)

viz = (( barChart_genre| barChart_years) & intervalChart).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_legend(
    labelFontSize=12,
    titleFontSize=10,
    orient= 'top'
).configure_title(
    fontSize=16
).interactive()

viz

This visual maps songs by their danceability and valence while showing filtering using brushing and linking in altair.
The brushing and linking graphs on the left shows and allow filtering of genres, and the right shows and allows filtering of the year

- valence: A measure from 0.0 to 1.0 describing the musical sentiment conveyed by a track. Tracks with high valence sound more positive , while tracks with low valence sound more negative 
- danceability: How suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable

Tools/features
- filter (hold and drag area) year with brushing/linking by highlighting years on the right
- filter (hold and drag area) genre with brushing/linking by highlighting genres on the left
- Tooltip (hover over point) on the scatterplot showing Artist, Song, and Genres
- Interactive Scatterplot (click and drag to move, scroll wheel for zoom) 

Note: This was pulled out of work with other visualizations and transformation requirements. Some transformations or definitions may be unapplicable