In [37]:
import pandas as pd
import numpy as np
import altair as alt

In [38]:
goals = pd.read_csv('goals.csv')

In [39]:
goals.head()

Unnamed: 0,date,home_team,away_team,scorer_team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals,winner,winner_team,penalty_point,home_penalties,away_penalties
0,1916-07-02,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,0,4,Uruguay,Away,0,0,0
1,1916-07-02,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,0,4,Uruguay,Away,0,0,0
2,1916-07-02,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,0,4,Uruguay,Away,0,0,0
3,1916-07-02,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,0,4,Uruguay,Away,0,0,0
4,1916-07-06,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6,1,Argentina,Home,0,2,0


In [40]:
# set custom theme
def custom_theme():
    return {
        'config': {
            'view': {
                'height': 600,
                'width': 600,
                
            },
            "axis": {
                "labelFontSize": 12, 
                "titleFontSize": 13,
            },
            "title": {
              "fontSize": 14
            },
            "legend": {
              "labelFontSize": 12,
              "titleFontSize": 13
            },
            'mark': {
                'fill': '#005391'
            }
        }
    }

# register the custom theme under a chosen name
alt.themes.register('custom_theme', custom_theme)

# enable the newly registered theme
alt.themes.enable('custom_theme')

ThemeRegistry.enable('custom_theme')

### Top Players of all time

In [41]:
#New data frame summing up goals per player per year
goals['date'] = pd.to_datetime(goals['date'])
goals['year'] = goals['date'].dt.year
total_goals_by_player = goals.groupby(['year', 'scorer', 'scorer_team'])['point_earned'].sum().reset_index()
total_goals_by_player = total_goals_by_player.rename(columns={'point_earned': 'total_goals'})
total_goals_by_player = total_goals_by_player.sort_values(by='total_goals', ascending=False)

In [42]:
#Filter by 2021
total_goals_by_player2021 = total_goals_by_player[total_goals_by_player['year'] == 2021]
total_goals_by_player2021 = total_goals_by_player2021.sort_values(by='total_goals', ascending=False)

In [43]:
#Sum up goals scored by player/team
total_goals_by_player_alltime = total_goals_by_player.groupby(['scorer', 'scorer_team'])['total_goals'].sum().reset_index()
total_goals_by_player_alltime = total_goals_by_player_alltime.sort_values(by='total_goals', ascending=False)

In [44]:
base = alt.Chart(total_goals_by_player_alltime.head(10)).mark_bar().encode(
    alt.X('total_goals:Q', title="Total lifetime goals", scale=alt.Scale(domain=[0, 125])),
    alt.Y('scorer:N', title = 'Player', sort='-x'),
    text = 'scorer_team:N'
).properties(
    title='Top Ten Scorers of All Time'
)
topplayers = base.mark_bar() + base.mark_text(align='left', dx=2)

topplayers.configure_axis(
    titleAngle=0  # Rotate the Y-axis title to be horizontal
)

### Explore Cristiano Ronaldo

In [45]:
#filter goal count by ronaldo
ronaldo = total_goals_by_player[total_goals_by_player['scorer'] == 'Cristiano Ronaldo']

In [46]:
#calculate average goals for each player per year
average_scorer_by_year = total_goals_by_player.groupby('year')['total_goals'].mean().reset_index()
average_scorer_by_year = average_scorer_by_year.rename(columns={'total_goals': 'avg_goals_per_player'})

In [47]:
#ronaldo goals, average player goals for each year ronaldo played
ronaldo = ronaldo.merge(average_scorer_by_year, on='year', how='left')

In [48]:
#reformat
ronaldo = pd.melt(ronaldo, id_vars=['year', 'scorer'], value_vars=['total_goals', 'avg_goals_per_player'], 
                    var_name='type', value_name='goal count')
ronaldo['type'] = ronaldo['type'].replace('total_goals', 'Cristiano Ronaldo')
ronaldo['type'] = ronaldo['type'].replace('avg_goals_per_player', 'Average Player')

In [49]:
chart = alt.Chart(ronaldo).mark_point().encode(
    alt.X('year:O', title="Year", axis=alt.Axis(labelAngle=-45)),
    alt.Y('goal count:Q', title = 'Total Goals Per Season'),
    alt.Color('type:N', legend = None),
    alt.Shape('type:N', legend = None)
).properties(
    title='Cristiano Ronaldo: How does he compare to his peers?'
)

c2 = alt.Chart(ronaldo).mark_point().transform_filter(
    alt.datum.year == 2013
).encode(
    alt.X('year:O', title="Year", axis=alt.Axis(labelAngle=-45)),
    alt.Y('goal count:Q', title = 'Total Goals Per Season'),
    alt.Color('type:N', legend = None)
)

c3 = c2.mark_text(dy = -10).encode(text='type:N')

ronaldogoals = chart + c3
ronaldogoals

In [50]:
#Calculate the goals scored per year played ratio
goalyearratio = goals
goalyearratio['scorer_goals'] = goals.groupby('scorer')['point_earned'].transform('sum')
goalyearratio['years_played'] = goals.groupby('scorer')['year'].transform('nunique')
goalyearratio['goals_to_year_ratio'] = goals['scorer_goals'] / goals['years_played']
goalyearratio = goalyearratio[['scorer', 'scorer_goals', 'years_played','goals_to_year_ratio']].drop_duplicates()


In [51]:
alt.data_transformers.enable("json")

ratio = alt.Chart(goalyearratio).mark_point().encode(
    alt.X('years_played:Q', title = 'Years Played'),
    alt.Y('scorer_goals:Q', title = 'Total Goals Scored'),
    color=alt.condition(alt.datum.scorer == 'Cristiano Ronaldo', alt.value('red'), alt.value('blue'))
).properties(
    title='Goals scored to years played ratio'
)

text = ratio.mark_text(
    align='left',
    baseline='middle',
    dx=-105,
    fontSize=12,
).encode(
    x='years_played:Q',
    y='scorer_goals:Q',
    text=alt.condition(alt.datum.scorer == 'Cristiano Ronaldo', 'scorer', alt.value('')),
    color=alt.value('black')
)

goalstoyears = ratio + text
goalstoyears

Does Ronaldo score early in the game?

In [52]:
#Filter data to Ronaldo/non Ronaldo
goals['ronaldo'] = goals['scorer'] == 'Cristiano Ronaldo'
goals['ronaldo'] = np.where(goals['scorer'] == 'Cristiano Ronaldo', 'Cristiano Ronaldo', 'All other players')

In [53]:
alt.data_transformers.enable("json")

alt.Chart(goals).mark_boxplot(size = 40).transform_filter(
    alt.datum.year == 2004
).encode(
    alt.X('ronaldo:N', axis=alt.Axis(labelAngle=0), title = None),
    alt.Y('minute:Q', title = 'Minute Goal Scored', scale=alt.Scale(domain=[0, 90], clamp = True))
).properties(
    title='Does Ronaldo score early in games?'
)