# Running Team Points Total

This script will look at the fixtures, assess the results and award teams the appropriate points. This will also calculate home and away results.

#### Import Packages

In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import scipy.stats

#### Read files

In [2]:
fixtures = pd.read_csv('../../data/csv/fixtures.csv')
fixtures.head()

Unnamed: 0,id,event,kickoff_time,team_h,team_a,team_h_score,team_a_score
0,0,1,2018-08-12 15:00:00+00:00,0,12,0,2
1,1,1,2018-08-11 14:00:00+00:00,1,4,2,0
2,2,1,2018-08-11 14:00:00+00:00,8,6,0,2
3,3,1,2018-08-11 14:00:00+00:00,9,5,0,3
4,4,1,2018-08-12 12:30:00+00:00,11,18,4,0


In [3]:
teams = pd.read_csv('../../data/csv/teams.csv')
teams.head()

Unnamed: 0,name,short_name,strength,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,strength_overall_home,strength_overall_away
0,Arsenal,ARS,4,1240,1270,1310,1340,1260,1320
1,Bournemouth,BOU,3,1040,1100,1120,1130,1030,1130
2,Brighton,BHA,2,1040,1140,1010,1070,1030,1050
3,Burnley,BUR,3,990,1030,1000,1040,1070,1100
4,Cardiff,CAR,2,1030,1060,1020,1090,1030,1080


#### Add new columns giving home and away teams points

In [4]:
# number of fixtures
nfix = len(fixtures)

# empty array for points data
team_h_points = np.zeros(nfix, dtype='int')
team_a_points = np.zeros(nfix, dtype='int')

for i in range(nfix):
    if fixtures.loc[i,'team_h_score'] > fixtures.loc[i,'team_a_score']:
        team_h_points[i] = 3
        team_a_points[i] = 0
    elif fixtures.loc[i,'team_h_score'] < fixtures.loc[i,'team_a_score']:
        team_h_points[i] = 0
        team_a_points[i] = 3
    else:
        team_h_points[i] = 1
        team_a_points[i] = 1
        
fixtures['team_h_points'] = pd.Series(team_h_points)
fixtures['team_a_points'] = pd.Series(team_a_points)
fixtures.head(10)

Unnamed: 0,id,event,kickoff_time,team_h,team_a,team_h_score,team_a_score,team_h_points,team_a_points
0,0,1,2018-08-12 15:00:00+00:00,0,12,0,2,0,3
1,1,1,2018-08-11 14:00:00+00:00,1,4,2,0,3,0
2,2,1,2018-08-11 14:00:00+00:00,8,6,0,2,0,3
3,3,1,2018-08-11 14:00:00+00:00,9,5,0,3,0,3
4,4,1,2018-08-12 12:30:00+00:00,11,18,4,0,3,0
5,5,1,2018-08-10 19:00:00+00:00,13,10,2,1,3,0
6,6,1,2018-08-11 11:30:00+00:00,14,16,1,2,0,3
7,7,1,2018-08-12 12:30:00+00:00,15,3,0,0,1,1
8,8,1,2018-08-11 14:00:00+00:00,17,2,2,0,3,0
9,9,1,2018-08-11 16:30:00+00:00,19,7,2,2,1,1


In [5]:
fixtures.to_csv(r'../../data/csv/fixtures_with_points.csv', index=False, index_label=False)

#### Find running point totals (home and away)

In [6]:
# Empty data frames to store running points
team_fixture_datetime = pd.DataFrame()
running_league_points = pd.DataFrame()
running_league_points_home = pd.DataFrame()
running_league_points_away = pd.DataFrame()

# Loop through teams
for i in range(20):
    
    # Isolate one team's fixtures
    team_all_fixtures_index = np.logical_or(fixtures['team_h']==i, fixtures['team_a']==i)
    team_fixtures = fixtures.loc[team_all_fixtures_index,:].sort_values(by=['kickoff_time']).reset_index(drop=True)
    
    # Empty arrays for home/away points
    team_points = np.zeros(38, dtype='int')
    team_points_home = np.zeros(38, dtype='int')
    team_points_away = np.zeros(38, dtype='int')
    team_datetime = ['']*38
    
    # Loop thorugh fixtures
    for j in range(38):
        
        # Make datetime table of fixtures
        team_datetime[j] = team_fixtures.loc[j,'kickoff_time']
        
        if team_fixtures.loc[j,'team_h']==i:
            # Team is at home
            team_points[j] = team_fixtures.loc[j,'team_h_points']
            team_points_home[j] = team_fixtures.loc[j,'team_h_points']
            team_points_away[j] = 0
        else:
            # Team is away
            team_points[j] = team_fixtures.loc[j,'team_a_points']
            team_points_home[j] = 0
            team_points_away[j] = team_fixtures.loc[j,'team_a_points']
    
    # Add team's information to dataframe
    team_fixture_datetime[teams.loc[i,'short_name']] = pd.Series(team_datetime)
    running_league_points[teams.loc[i,'short_name']] = pd.Series(np.cumsum(team_points))
    running_league_points_home[teams.loc[i,'short_name']] = pd.Series(np.cumsum(team_points_home))
    running_league_points_away[teams.loc[i,'short_name']] = pd.Series(np.cumsum(team_points_away))

Save new dataframes to csv files

In [7]:
team_fixture_datetime.to_csv(r'../../data/csv/fixture_datetimes.csv')
running_league_points.to_csv(r'../../data/csv/running_league_points.csv')
running_league_points_home.to_csv(r'../../data/csv/running_league_points_home.csv')
running_league_points_away.to_csv(r'../../data/csv/running_league_points_away.csv')

Create a function to return the number of a league points a team had on a certain day, priot to any games on that day.

In [8]:
def teamLeaguePointsOnDate(teamid, fixture_date):
    # teamid is a number between 0 and 19 refering to the team of interest
    # fixture_date is the date in question
    fixture_date = pd.to_datetime(fixture_date, format='%Y-%m-%d %H:%M:%S', utc=True)
    
    # Read files
    teams = pd.read_csv('../../data/csv/teams.csv')
    team_fixture_datetime = pd.read_csv('../../data/csv/fixture_datetimes.csv')
    running_league_points = pd.read_csv('../../data/csv/running_league_points.csv')
    
    # convert datetime table to actual datetimes
    dt_df = pd.DataFrame()
    for m in team_fixture_datetime:
        dt_df[m] = pd.to_datetime(team_fixture_datetime[m], format='%Y-%m-%d %H:%M:%S', utc=True);
    
    # empty dataframe to add to
    answer = np.zeros(len(teamid), dtype='int')
    
    # loop thorugh array
    for i in range(len(teamid)):
        
        # Get short name
        team_name = teams.loc[teamid[i], 'short_name']
        
        a = sum(fixture_date[i]>dt_df.loc[:,team_name])-1
        
        if a<0:
            answer[i] = 0
        else:
            answer[i] = running_league_points.loc[a,team_name]
            
    return answer

In [9]:
def teamLeagueHomePointsOnDate(teamid, fixture_date):
    # teamid is a number between 0 and 19 refering to the team of interest
    # fixture_date is the date in question
    fixture_date = pd.to_datetime(fixture_date, format='%Y-%m-%d %H:%M:%S', utc=True)
    
    # Read files
    teams = pd.read_csv('../../data/csv/teams.csv')
    team_fixture_datetime = pd.read_csv('../../data/csv/fixture_datetimes.csv')
    running_league_points = pd.read_csv('../../data/csv/running_league_points_home.csv')
    
    # convert datetime table to actual datetimes
    dt_df = pd.DataFrame()
    for m in team_fixture_datetime:
        dt_df[m] = pd.to_datetime(team_fixture_datetime[m], format='%Y-%m-%d %H:%M:%S', utc=True);
    
    # empty dataframe to add to
    answer = np.zeros(len(teamid), dtype='int')
    
    # loop thorugh array
    for i in range(len(teamid)):
        
        # Get short name
        team_name = teams.loc[teamid[i], 'short_name']
        
        a = sum(fixture_date[i]>dt_df.loc[:,team_name])-1
        
        if a<0:
            answer[i] = 0
        else:
            answer[i] = running_league_points.loc[a,team_name]
            
    return answer

In [10]:
def teamLeagueAwayPointsOnDate(teamid, fixture_date):
    # teamid is a number between 0 and 19 refering to the team of interest
    # fixture_date is the date in question
    fixture_date = pd.to_datetime(fixture_date, format='%Y-%m-%d %H:%M:%S', utc=True)
    
    # Read files
    teams = pd.read_csv('../../data/csv/teams.csv')
    team_fixture_datetime = pd.read_csv('../../data/csv/fixture_datetimes.csv')
    running_league_points = pd.read_csv('../../data/csv/running_league_points_away.csv')
    
    # convert datetime table to actual datetimes
    dt_df = pd.DataFrame()
    for m in team_fixture_datetime:
        dt_df[m] = pd.to_datetime(team_fixture_datetime[m], format='%Y-%m-%d %H:%M:%S', utc=True);
    
    # empty dataframe to add to
    answer = np.zeros(len(teamid), dtype='int')
    
    # loop thorugh array
    for i in range(len(teamid)):
        
        # Get short name
        team_name = teams.loc[teamid[i], 'short_name']
        
        a = sum(fixture_date[i]>dt_df.loc[:,team_name])-1
        
        if a<0:
            answer[i] = 0
        else:
            answer[i] = running_league_points.loc[a,team_name]
            
    return answer

#### Add new column to element-summary of team running points

In [11]:
data01 = pd.read_csv('../../data/csv/data01.csv')
data01.head()

Unnamed: 0,element_name,element,element_type,fixture,kickoff_time,was_home,team,opponent_team,minutes,total_points
0,Cech,0,0,0,2018-08-12T15:00:00Z,True,0,12,90,3
1,Cech,0,0,13,2018-08-18T16:30:00Z,False,0,5,90,3
2,Cech,0,0,20,2018-08-25T14:00:00Z,True,0,18,90,3
3,Cech,0,0,32,2018-09-02T12:30:00Z,False,0,4,90,1
4,Cech,0,0,45,2018-09-15T14:00:00Z,False,0,14,90,2


Loop through rows and apply functions for home/away/overall team league points

In [12]:
# Number of rows in data table
nrows = len(data01)

# empty array for running points, home points, away points
running_team_points = teamLeaguePointsOnDate(data01.loc[:,'team'], data01.loc[:,'kickoff_time'])
running_team_points_home = teamLeagueHomePointsOnDate(data01.loc[:,'team'], data01.loc[:,'kickoff_time'])
running_team_points_away = teamLeagueAwayPointsOnDate(data01.loc[:,'team'], data01.loc[:,'kickoff_time'])

running_opponent_team_points = teamLeaguePointsOnDate(data01.loc[:,'opponent_team'], data01.loc[:,'kickoff_time'])
running_opponent_team_points_home = teamLeagueHomePointsOnDate(data01.loc[:,'opponent_team'], data01.loc[:,'kickoff_time'])
running_opponent_team_points_away = teamLeagueAwayPointsOnDate(data01.loc[:,'opponent_team'], data01.loc[:,'kickoff_time'])

data01['running_team_points'] = pd.Series(running_team_points)
data01['running_team_points_home'] = pd.Series(running_team_points_home)
data01['running_team_points_away'] = pd.Series(running_team_points_away)

data01['running_opponent_team_points'] = pd.Series(running_opponent_team_points)
data01['running_opponent_team_points_home'] = pd.Series(running_opponent_team_points_home)
data01['running_opponent_team_points_away'] = pd.Series(running_opponent_team_points_away)


In [24]:
a = np.ones(20, dtype='int')*9520 + range(20)
data01.loc[a,:]

Unnamed: 0,element_name,element,element_type,fixture,kickoff_time,was_home,team,opponent_team,minutes,total_points,running_team_points,running_team_points_home,running_team_points_away,running_opponent_team_points,running_opponent_team_points_home,running_opponent_team_points_away,total_points_difference,relative_points_difference
9520,Mane,250,2,206,2019-01-03T20:00:00Z,False,11,12,76,2,54,28,26,47,27,20,7,-1
9521,Mane,250,2,210,2019-01-12T15:00:00Z,False,11,2,89,3,54,28,26,26,18,8,28,8
9522,Mane,250,2,224,2019-01-19T15:00:00Z,True,11,6,90,7,57,28,29,22,9,13,35,15
9523,Mane,250,2,236,2019-01-30T20:00:00Z,True,11,10,90,9,60,31,29,31,14,17,29,14
9524,Mane,250,2,249,2019-02-04T20:00:00Z,False,11,18,90,8,61,32,29,31,17,14,30,12
9525,Mane,250,2,254,2019-02-09T15:00:00Z,True,11,1,86,8,62,32,30,33,24,9,29,23
9526,Mane,250,2,267,2019-02-24T14:05:00Z,False,11,13,90,3,65,35,30,51,25,26,14,5
9527,Mane,250,2,276,2019-02-27T20:00:00Z,True,11,17,77,13,66,35,31,40,20,20,26,15
9528,Mane,250,2,283,2019-03-03T16:15:00Z,False,11,7,83,3,69,38,31,36,21,15,33,10
9529,Mane,250,2,296,2019-03-10T12:00:00Z,True,11,3,90,14,70,38,32,30,17,13,40,25


Find the difference in league points between the two teams.

In [14]:
data01['total_points_difference'] = data01['running_team_points']-data01['running_opponent_team_points']

If a team is playing at home, find the difference in points between their home points and the opponents away points. Or if the team is playing away, find the difference in points between their away points and the opponents home points. Call this the relative points difference.

In [23]:
relative_points_difference = np.zeros(nrows, dtype='int')
for i in range(nrows):
    if data01.loc[i,'was_home']:
        relative_points_difference[i] = data01.loc[i,'running_team_points_home']-data01.loc[i,'running_opponent_team_points_away']
    else:
        relative_points_difference[i] = data01.loc[i,'running_team_points_away']-data01.loc[i,'running_opponent_team_points_home']

data01['relative_points_difference'] = pd.Series(relative_points_difference)

Save most recent table

In [53]:
data01.to_csv(r'../../data/csv/data02.csv', index=False, index_label=False)