In [1]:
# imports
import pandas as pd
import numpy as np
import altair as alt

In [2]:
# raw data
goals = pd.read_csv('goalscorers.csv')

In [3]:
goals.head()

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False


In [4]:
# check if point was earned on goal
goals['point_earned'] = np.where(goals['own_goal']=='False', '0', '1')
goals['point_earned'] = goals['point_earned'].astype(int)
goals.head()

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty,point_earned
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1


In [5]:
# total goals for home/away
goals['home_goals'] = goals[goals['team'] == goals['home_team']].groupby(['date', 'home_team'])['point_earned'].transform('sum')
goals['away_goals'] = goals[goals['team'] == goals['away_team']].groupby(['date', 'away_team'])['point_earned'].transform('sum')
goals.head()

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,,4.0
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,,4.0
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,,4.0
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,,4.0
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6.0,


In [6]:
# fill in nulls
goals['home_goals'] = goals['home_goals'].fillna(0).astype(int)
goals['away_goals'] = goals['away_goals'].fillna(0).astype(int)
goals.iloc[0:20, :]

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,0,4
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,0,4
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,0,4
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,0,4
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6,0
5,7/6/1916,Argentina,Chile,Chile,Telésforo Báez,44.0,False,False,1,0,1
6,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,60.0,False,True,1,6,0
7,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,62.0,False,True,1,6,0
8,7/6/1916,Argentina,Chile,Argentina,Alberto Marcovecchio,67.0,False,False,1,6,0
9,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,75.0,False,False,1,6,0


In [7]:
goals['home_goals'] = goals.groupby(['date', 'home_team'])['home_goals'].transform('max')
goals['away_goals'] = goals.groupby(['date', 'away_team'])['away_goals'].transform('max')
goals.iloc[0:20, :]

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,0,4
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,0,4
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,0,4
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,0,4
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6,1
5,7/6/1916,Argentina,Chile,Chile,Telésforo Báez,44.0,False,False,1,6,1
6,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,60.0,False,True,1,6,1
7,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,62.0,False,True,1,6,1
8,7/6/1916,Argentina,Chile,Argentina,Alberto Marcovecchio,67.0,False,False,1,6,1
9,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,75.0,False,False,1,6,1


In [8]:
# check for game winner (home or away)
goals['winner'] = np.where(goals['away_goals'] > goals['home_goals'], goals['away_team'],
                            np.where(goals['away_goals'] < goals['home_goals'], goals['home_team'], 'tie'))
goals.iloc[0:20, :]

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals,winner
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,0,4,Uruguay
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,0,4,Uruguay
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,0,4,Uruguay
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,0,4,Uruguay
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6,1,Argentina
5,7/6/1916,Argentina,Chile,Chile,Telésforo Báez,44.0,False,False,1,6,1,Argentina
6,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,60.0,False,True,1,6,1,Argentina
7,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,62.0,False,True,1,6,1,Argentina
8,7/6/1916,Argentina,Chile,Argentina,Alberto Marcovecchio,67.0,False,False,1,6,1,Argentina
9,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,75.0,False,False,1,6,1,Argentina


In [9]:
# check for game winner team
goals['winner_team' ]= np.where(goals['winner'] == goals['home_team'], 'Home',
                            np.where(goals['winner'] == goals['away_team'], 'Away', 'tie'))
goals.iloc[0:20, :]

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals,winner,winner_team
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,0,4,Uruguay,Away
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,0,4,Uruguay,Away
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,0,4,Uruguay,Away
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,0,4,Uruguay,Away
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6,1,Argentina,Home
5,7/6/1916,Argentina,Chile,Chile,Telésforo Báez,44.0,False,False,1,6,1,Argentina,Home
6,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,60.0,False,True,1,6,1,Argentina,Home
7,7/6/1916,Argentina,Chile,Argentina,Juan Domingo Brown,62.0,False,True,1,6,1,Argentina,Home
8,7/6/1916,Argentina,Chile,Argentina,Alberto Marcovecchio,67.0,False,False,1,6,1,Argentina,Home
9,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,75.0,False,False,1,6,1,Argentina,Home


In [10]:
# rename team for scorer
goals = goals.rename(columns={'team': 'scorer_team'})

In [11]:
# save to csv
goals.to_csv('goals_final.csv', index=False)

AttributeError: module 'pandas' has no attribute 'to_csv'