In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [2]:
# bring in data
df = pd.read_csv('data_master.csv')

In [3]:
# Create year column
df['year'] = df['release_date'].astype('datetime64[ns]').apply(lambda x: x.year)

In [5]:
df.columns

Index(['acousticness', 'artists', 'danceability', 'duration_ms', 'energy',
       'explicit', 'id', 'instrumentalness', 'key', 'liveness', 'loudness',
       'mode', 'name', 'popularity', 'release_date', 'speechiness', 'tempo',
       'valence', 'year', 'primary_artist', 'collaborators', 'is_collab',
       'primary_genre'],
      dtype='object')

In [12]:
collabs = df[['primary_genre', 'is_collab', 'popularity']].groupby(['primary_genre']).mean().reset_index()
collabs = collabs.sort_values(by='popularity', ascending = False)
collabs.head()

Unnamed: 0,primary_genre,is_collab,popularity
16,k-pop,0.152047,57.087719
21,pop,0.227953,56.318913
15,indie pop,0.063091,55.990536
23,pop rap,0.301639,54.019672
19,modern rock,0.03453,53.966851


In [46]:
# Load into a chart
chart_scatter = alt.Chart(collabs, title='Collaboration Rate and Mean Popularity by Genre')
chart_bar = alt.Chart(collabs)

brush = alt.selection_interval(encodings=['x', 'y'])  # selection of type "interval"

# Interactive Scatter Plot
# Dots are filled in, variables are mapped accordingly to x, y, size, color, and tooltip
scatter = chart_scatter.mark_point(filled=True, size = 100).encode( 
    x='is_collab',
    y='popularity',
    color=alt.Color('primary_genre', legend=None),
    tooltip=['is_collab', 'popularity', 'primary_genre']
).add_selection(
    brush
)

# Linked Bar Chart for popularity
bar_pop = chart_bar.mark_bar().encode(
    y= alt.Y('primary_genre:N', sort='-x'),
    x='popularity:Q', 
    color = 'primary_genre'
).transform_filter(brush)

# Linked Bar Chart for collab rate
bar_collab = chart_bar.mark_bar().encode(
    y= alt.Y('primary_genre:N', sort='-x'),
    x='is_collab:Q', 
    color = 'primary_genre'
).transform_filter(brush)

scatter | bar_pop & bar_collab