In [1]:
import pandas as pd
from collections import defaultdict

In [2]:
data = pd.read_csv('clean_data.csv', sep='\t', index_col=0, infer_datetime_format=True)
data['Date'] =  pd.to_datetime(data['Date'])
data.drop('index', axis=1, inplace=True)

In [3]:
# Create a new dataframe to store the games with the points of both home and away teams
# coming into the game.
cols = list(data.columns)
cols.append('HP')
cols.append('AP')
data_with_points = pd.DataFrame(columns=cols)

In [4]:
# Iterate over all the seasons
for year in range(2000, 2018):
    
    # Store the current points of each team in the season
    table = defaultdict(int)
    
    # Get the games from the season
    season = data[data['Date'] >= pd.to_datetime(f'{year}-08-01')]
    season = season[season['Date'] <= pd.to_datetime(f'{year+1}-07-01')]
    
    # Create a copy to be iterated over
    season_c = season.copy()
    
    # Fill the home points and away points with -1 placeholders
    season['HP'] = pd.Series(data=[-1] * len(season), index=season.index)
    season['AP'] = pd.Series(data=[-1] * len(season), index=season.index)
    
    # Go over each game in the season, updating the score of each of the teams
    # who played
    for i, row in season_c.iterrows():
        home_team = row['HomeTeam']
        away_team = row['AwayTeam']
        
        if row['FTR'] == 'H':
            table[home_team] += 3
        elif row['FTR'] == 'D':
            table[home_team] += 1
            table[away_team] += 1
        else:
            table[away_team] += 3
        
        # Get the next  games in the season as both teams should
        # play again in this time
        try:
            future = season_c.loc[i+1:i+41]
        except IndexError:
            print(f'Got to the end of the {year} season')
            
        # Find the next game containing the home team and put in their
        # current points total
        try:
            next_for_home = future.index[(future['HomeTeam'] == home_team) | (future['AwayTeam'] == home_team)][0]
            if season.loc[next_for_home]['HomeTeam'] == home_team:
                season.loc[next_for_home,'HP'] = table[home_team]
            else:
                season.loc[next_for_home,'AP'] = table[home_team]
        except IndexError:
            print(f'season {year}')
            print('could not find next home team game')
            print(f'index {i}')
            print(f"{home_team}")
            
        try:
            next_for_away = future.index[(future['HomeTeam'] == away_team) | (future['AwayTeam'] == away_team)][0]
            if season.loc[next_for_away]['HomeTeam'] == away_team:
                season.loc[next_for_away,'HP'] = table[away_team]
            else:
                season.loc[next_for_away,'AP'] = table[away_team]
        except IndexError:
            print(f'season {year}')
            print('could not find next away team game')
            print(f'index {i}')
            print(f"{away_team}")
    data_with_points  = data_with_points.append(season)

season 2000
could not find next home team game
index 350
Charlton
season 2000
could not find next away team game
index 350
Liverpool
season 2000
could not find next home team game
index 351
Coventry
season 2000
could not find next away team game
index 351
Bradford
season 2000
could not find next home team game
index 352
Derby
season 2000
could not find next away team game
index 352
Ipswich
season 2000
could not find next home team game
index 353
Everton
season 2000
could not find next away team game
index 353
Sunderland
season 2000
could not find next home team game
index 354
Leeds
season 2000
could not find next away team game
index 354
Leicester
season 2000
could not find next home team game
index 355
Man City
season 2000
could not find next away team game
index 355
Chelsea
season 2000
could not find next home team game
index 356
Middlesbrough
season 2000
could not find next away team game
index 356
West Ham
season 2000
could not find next home team game
index 357
Newcastle
season 20

season 2007
could not find next home team game
index 2597
Birmingham
season 2007
could not find next away team game
index 2597
Blackburn
season 2007
could not find next home team game
index 2598
Chelsea
season 2007
could not find next away team game
index 2598
Bolton
season 2007
could not find next home team game
index 2599
Derby
season 2007
could not find next away team game
index 2599
Reading
season 2007
could not find next home team game
index 2600
Everton
season 2007
could not find next away team game
index 2600
Newcastle
season 2007
could not find next home team game
index 2601
Middlesbrough
season 2007
could not find next away team game
index 2601
Man City
season 2007
could not find next home team game
index 2602
Portsmouth
season 2007
could not find next away team game
index 2602
Fulham
season 2007
could not find next home team game
index 2603
Sunderland
season 2007
could not find next away team game
index 2603
Arsenal
season 2007
could not find next home team game
index 2604
To

season 2014
could not find next home team game
index 5136
Arsenal
season 2014
could not find next away team game
index 5136
West Brom
season 2014
could not find next home team game
index 5137
Aston Villa
season 2014
could not find next away team game
index 5137
Burnley
season 2014
could not find next home team game
index 5138
Chelsea
season 2014
could not find next away team game
index 5138
Sunderland
season 2014
could not find next home team game
index 5139
Crystal Palace
season 2014
could not find next away team game
index 5139
Swansea
season 2014
could not find next home team game
index 5140
Everton
season 2014
could not find next away team game
index 5140
Tottenham
season 2014
could not find next home team game
index 5141
Hull
season 2014
could not find next away team game
index 5141
Man United
season 2014
could not find next home team game
index 5142
Leicester
season 2014
could not find next away team game
index 5142
QPR
season 2014
could not find next home team game
index 5143
Ma

In [8]:
data_with_points = data_with_points[data_with_points['HP'] != -1]
data_with_points = data_with_points[data_with_points['AP'] != -1]


In [11]:
data_with_points.to_csv(path_or_buf='data_with_points.csv', sep=',', header=True)