In [1]:
# imports
import pandas as pd
import numpy as np
import altair as alt

In [2]:
# raw data
goals = pd.read_csv('goals_raw.csv')
goals.head()

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty
0,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False
1,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False
2,7/2/1916,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False
3,7/2/1916,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False
4,7/6/1916,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False


In [3]:
# check if point was earned on goal
goals['point_earned'] = np.where(goals['own_goal']=='False', '0', '1')
goals['point_earned'] = goals['point_earned'].astype(int)

In [4]:
# total goals for home/away
goals['home_goals'] = goals[goals['team'] == goals['home_team']].groupby(
    ['date', 'home_team'])['point_earned'].transform('sum')
goals['away_goals'] = goals[goals['team'] == goals['away_team']].groupby(
    ['date', 'away_team'])['point_earned'].transform('sum')

In [5]:
# fill in nulls
goals['home_goals'] = goals['home_goals'].fillna(0).astype(int)
goals['away_goals'] = goals['away_goals'].fillna(0).astype(int)

goals['home_goals'] = goals.groupby(['date', 'home_team'])['home_goals'].transform('max')
goals['away_goals'] = goals.groupby(['date', 'away_team'])['away_goals'].transform('max')

In [6]:
# check for game winner (home or away)
goals['winner'] = np.where(goals['away_goals'] > goals['home_goals'], goals['away_team'],
                            np.where(goals['away_goals'] < goals['home_goals'], goals['home_team'], 'tie'))

In [7]:
# check for game winner team
goals['winner_team' ]= np.where(goals['winner'] == goals['home_team'], 'Home',
                            np.where(goals['winner'] == goals['away_team'], 'Away', 'tie'))

In [8]:
# rename team for scorer
goals = goals.rename(columns={'team': 'scorer_team'})

In [9]:
# reformat date 
goals['date'] = pd.to_datetime(goals['date'])

In [10]:
# view cleaned data
goals.head()

Unnamed: 0,date,home_team,away_team,scorer_team,scorer,minute,own_goal,penalty,point_earned,home_goals,away_goals,winner,winner_team
0,1916-07-02,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False,1,0,4,Uruguay,Away
1,1916-07-02,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False,1,0,4,Uruguay,Away
2,1916-07-02,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False,1,0,4,Uruguay,Away
3,1916-07-02,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False,1,0,4,Uruguay,Away
4,1916-07-06,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False,1,6,1,Argentina,Home


In [11]:
# save to csv
goals.to_csv('goals.csv', index=False)