In [1]:
#Import libraries
import altair as alt
import pandas as pd

In [5]:
#Reads data into a CSV and selects the relevant columns
df = pd.read_csv('data_master.csv')
df = df[['year','is_collab']]
df

Unnamed: 0,year,is_collab
0,1920,False
1,1920,False
2,1920,False
3,1920,False
4,1920,False
...,...,...
174384,2020,True
174385,2021,False
174386,2020,False
174387,2021,False


In [6]:
#Groups data by year and aggregates with average
data = df.groupby('year').mean().reset_index().rename(columns={'year': 'Year', 'is_collab': 'Proportion of Collaborations'})
data

Unnamed: 0,Year,Proportion of Collaborations
0,1920,0.183381
1,1921,0.237179
2,1922,0.000000
3,1923,0.043243
4,1924,0.211864
...,...,...
97,2017,0.305659
98,2018,0.293294
99,2019,0.304852
100,2020,0.300419


In [4]:
#Creates a tooltip over the nearest year's value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Year'], empty='none')

#Creates the standard line graph of year vs proportion of collaborators
line = alt.Chart(data).mark_line(
).encode(
    x='Year',
    y='Proportion of Collaborations',
)

# Transparent selectors across the chart. This is what tells us the x-value of the cursor
selectors = alt.Chart(data).mark_point().encode(
    x='Year',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'Proportion of Collaborations', alt.value(' '))
)

# Draw a rule at the location of the selection
rules = alt.Chart(data).mark_rule(color='gray').encode(
    x='Year',
).transform_filter(
    nearest
)

# Put the five layers into a chart and bind the data
alt.layer(
    line, selectors, points, rules, text
).properties(
    width=600, height=300
)