In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [2]:
# bring in data
df = pd.read_csv('data_master.csv')

In [3]:
# Create year column
df['year'] = df['release_date'].astype('datetime64[ns]').apply(lambda x: x.year)

In [4]:
# Setup data
collabs = df[['year', 'primary_genre', 'is_collab']].groupby(['year', 'primary_genre']).mean().reset_index()
genres = collabs['primary_genre'].unique()

top_g = set(df['primary_genre'].value_counts().index[2:14]\
            .drop(['country rock', 'tango', 'latin', 'funk', 'soft rock']))

collabs = collabs[collabs['primary_genre'].isin(top_g)][(collabs['year'] >= 1970) & (collabs['year'] <= 2020)]

  collabs = collabs[collabs['primary_genre'].isin(top_g)][(collabs['year'] >= 1970) & (collabs['year'] <= 2020)]


In [31]:
# Line Graph to show which genre’s have highest/lowest collaboration rate through the years
# X-axis: time(year)
# Y-axis: percentage of collaborations in that year
# Color: genre 

# Setup chart
chart = alt.Chart(collabs, title='Collaboration rate per year by Genre from 1970 to 2020').properties(
    width=800,
    height=500
)

# Creating checkboxes
input_checkboxes = alt.binding_radio(options=genres)
selection = alt.selection_multi(fields=['primary_genre'], bind='legend')
color = alt.condition(selection,
                    alt.Color('primary_genre:N'),
                    alt.value('lightgray'))

# Plot accordingly
line_chart = chart.mark_line(size=3).encode(
    x='year:O',
    y='is_collab:Q',
    color='primary_genre',
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
    tooltip=['is_collab', 'year', 'primary_genre']
).add_selection(
    selection
).properties(
    width=1100, height=500
).configure_legend(
    labelFontSize = 20,
    symbolSize = 900,
    symbolStrokeWidth = 20,
    titleFontSize = 20
)

line_chart # For usage, shift click different countries