# Get points of team before date

The idea of this is to create a table, where rows are dates which cover every day of the season and columns represent each team. The values will represent the number of league points each team has at each date.

In [1]:
# import packages
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_option('display.max_columns', None)

In [2]:
# Read fixtures
fixtures = pd.read_csv('../data/csv/fixtures.csv')
fixtures['kickoff_time'] = pd.to_datetime(fixtures['kickoff_time'], utc=True)
fixtures.head()

Unnamed: 0,event,kickoff_time,team_h,team_a,team_h_score,team_a_score,team_h_points,team_a_points
0,1,2018-08-12 15:00:00+00:00,0,12,0,2,0,3
1,1,2018-08-11 14:00:00+00:00,1,4,2,0,3,0
2,1,2018-08-11 14:00:00+00:00,8,6,0,2,0,3
3,1,2018-08-11 14:00:00+00:00,9,5,0,3,0,3
4,1,2018-08-12 12:30:00+00:00,11,18,4,0,3,0


In [3]:
# Read teams
teams = pd.read_csv('../data/csv/teams.csv')
teams.head()

Unnamed: 0,name,short_name,strength,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,strength_overall_home,strength_overall_away
0,Arsenal,ARS,4,1240,1270,1310,1340,1260,1320
1,Bournemouth,BOU,3,1040,1100,1120,1130,1030,1130
2,Brighton,BHA,2,1040,1140,1010,1070,1030,1050
3,Burnley,BUR,3,990,1030,1000,1040,1070,1100
4,Cardiff,CAR,2,1030,1060,1020,1090,1030,1080


In [4]:
# Find dates of first and last days of the season
first_day = np.min(fixtures['kickoff_time']).replace(hour=0, minute=0, second=0)
last_day = np.max(fixtures['kickoff_time']).replace(hour=0, minute=0, second=0)
season_length = (last_day - first_day).days

print('First day of the season:       ' + first_day.strftime('%Y-%m-%d'))
print('Last day of the season:        ' + last_day.strftime('%Y-%m-%d'))
print('Days between first and last:   ' + str(season_length))


First day of the season:       2018-08-10
Last day of the season:        2019-05-12
Days between first and last:   275


In [5]:
# Create array of dates
date_table = pd.DataFrame(pd.date_range(first_day, last_day, season_length+1), columns=['date'])
date_dict = dict(date_table)
date_table.head()

Unnamed: 0,date
0,2018-08-10 00:00:00+00:00
1,2018-08-11 00:00:00+00:00
2,2018-08-12 00:00:00+00:00
3,2018-08-13 00:00:00+00:00
4,2018-08-14 00:00:00+00:00


In total, there will be three tables which represent
1. Total number of points
2. Total number of HOME points
3. Total number of AWAY points

as a function of date.

In [28]:
# Get number of teams
n_teams = len(teams)

# Get number of fixtures
n_fixtures = len(fixtures)

# Running window size
wsz = 3

# Loop through teams
for i in range(0, 1):
    
    # Points per game
    team_points_per_game = [0] * 38
    
    # Running totals
    team_points_running = [0] * 38
    team_h_points_running = [0] * 38
    team_a_points_running = [0] * 38
    
    # Average over last N games
    team_points_mean_last_n = [0] * 38
    team_h_points_mean_last_n = [0] * 38
    
    
    team_fixtures = fixtures[np.logical_or(fixtures['team_h']==i, fixtures['team_a']==i)]
    team_fixtures = team_fixtures.reset_index(drop=True)
    
    team_h_points_store = [0] * wsz
    team_a_points_store = [0] * wsz
    
    # Loop through a games for each team
    for j in range(0,38):
        if team_fixtures['team_h'][j]==i:
            # if team is at home
            
            # manipulate store of home game points
            #team_h_points_store[0] = team_h_points_store[1]
            #team_h_points_store[1] = team_h_points_store[2]
            #team_h_points_store[2] = team_fixtures['team_h_points'][j]
            #team_h_points_mean_last_n[j] = np.mean(team_h_points_store)
            
            # Add to running points total
            team_points_per_game[j] = team_fixtures['team_h_points'][j]
            
            if j==0:
                # if first game of the season
                team_h_points_running[j] = team_fixtures['team_h_points'][j]
            else:
                # if NOT first game of the season
                team_a_points_running[j] = team_a_points_running[j-1]
                team_h_points_running[j] = team_h_points_running[j-1] + team_fixtures['team_h_points'][j]
        else:
            # if team is away
            
            # Add to running points total
            team_points_per_game[j] = team_fixtures['team_a_points'][j]
            
            if j==0:
                # if first game of the season
                team_a_points_running[j] = team_fixtures['team_a_points'][j]
            else:
                # if NOT first game of the season
                team_a_points_running[j] = team_a_points_running[j-1] + team_fixtures['team_a_points'][j]
                team_h_points_running[j] = team_h_points_running[j-1]
             
            
        # Add running points (total, home and away) to dataframe
        team_fixtures['team_h_points_running'] = team_h_points_running
        team_fixtures['team_a_points_running'] = team_a_points_running
        team_fixtures['team_points_per_game'] = team_points_per_game
        team_points_running = np.array(team_h_points_running) + np.array(team_a_points_running)
        team_fixtures['team_points_running']  = team_points_running
        
        
        # Calculate the moving average for total points
        if j < wsz:
            team_points_mean_last_n[j] = np.mean(team_points_per_game[0:j])
        else:
            team_points_mean_last_n[j] = np.mean(team_points_per_game[(j-wsz):j])
        
    
        team_fixtures['team_points_mean_last_n'] = team_points_mean_last_n
        
        # Anything above here is still inside the fixture loop ^
        

    team_fixtures['team_points_mean_last_n'] = team_fixtures['team_points_mean_last_n'].fillna(0)
    team_fixtures = team_fixtures[['kickoff_time', 'team_points_per_game', 'team_points_running', \
                                   'team_h_points_running', 'team_a_points_running', 'team_points_mean_last_n']]
    
    
    # Anything above here is still inside the team loop
    
team_fixtures.head(5)


Unnamed: 0,kickoff_time,team_points_per_game,team_points_running,team_h_points_running,team_a_points_running,team_points_mean_last_n
0,2018-08-12 15:00:00+00:00,0,0,0,0,0.0
1,2018-08-18 16:30:00+00:00,0,0,0,0,0.0
2,2018-08-25 14:00:00+00:00,3,3,3,0,0.0
3,2018-09-02 12:30:00+00:00,3,6,3,3,1.0
4,2018-09-15 14:00:00+00:00,3,9,3,6,2.0
