<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Read-in-Data" data-toc-modified-id="Read-in-Data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Read in Data</a></span></li><li><span><a href="#Add-team-scored/conceeded-column" data-toc-modified-id="Add-team-scored/conceeded-column-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Add team scored/conceeded column</a></span></li><li><span><a href="#Pick-columns-to-calculate-form" data-toc-modified-id="Pick-columns-to-calculate-form-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Pick columns to calculate form</a></span></li><li><span><a href="#Calculate-response-function" data-toc-modified-id="Calculate-response-function-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Calculate response function</a></span></li><li><span><a href="#Loop-through-data-table" data-toc-modified-id="Loop-through-data-table-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Loop through data table</a></span></li></ul></div>

# Add Form

## Read in Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

pp = pd.read_csv('../../data/csv/player_performance_01.csv')
pp.head()

## Add team scored/conceeded column

In [None]:
team_scored = np.zeros(len(pp), dtype='int')
team_conceded = np.zeros(len(pp), dtype='int')

for i in pp.index:
    if pp.loc[i,"was_home"]:
        team_scored[i] = pp.loc[i,"team_h_score"]
        team_conceded[i] = pp.loc[i,"team_a_score"]
    else:
        team_scored[i] = pp.loc[i,"team_a_score"]
        team_conceded[i] = pp.loc[i,"team_h_score"]

pp["team_scored"] = pd.Series(team_scored)
pp["team_conceded"] = pd.Series(team_conceded)

pp.head()

## Pick columns to calculate form

In [None]:
print(list(pp.columns))

In [None]:
# these columns will have form calculated
form_on_columns = [
    "total_points", "minutes", "goals_scored", "assists", "clean_sheets",
    "goals_conceded", "own_goals", "penalties_saved", "penalties_missed",
    "yellow_cards", "red_cards", "saves", "bonus", "bps", "influence",
    "creativity", "threat", "ict_index", "ea_index", "open_play_crosses",
    "big_chances_created", "clearances_blocks_interceptions", "recoveries",
    "key_passes", "tackles", "winning_goals", "attempted_passes",
    "completed_passes", "penalties_conceded", "big_chances_missed",
    "errors_leading_to_goal", "errors_leading_to_goal_attempt", "tackled", "offside", "target_missed", "fouls",
    "dribbles", "value", "team_scored", "team_conceded", "transfers_balance"
]

len(form_on_columns)

## Calculate response function

In [None]:
# normpdf function
import math
def normpdf(x, mu, sigma):
    return (1/np.sqrt(2*math.pi*(sigma**2)))*np.exp(-((x-mu)**2)/(2*(sigma**2)))

# define form response
nGames = 3 # number of games to consider
sigma = 1 # fall off from most recent game. inf sigma is just the average

# games array
games = np.array(range(1,nGames+1))
games_fill = np.linspace(1,nGames,41)

# response array
response = normpdf(games, 1, sigma)
response_fill = normpdf(games_fill, 1, sigma)

# normalise array
norm_response = response/np.trapz(response, games)
norm_response_fill = response_fill/np.trapz(reponse_fill, games_fill)

# plot response
plt.plot(games_fill, norm_response_fill);
plt.xticks(games);
plt.scatter(games, norm_response);
plt.ylim(0,1);
plt.xlabel('Number of Games Previous');
plt.ylabel('Relative Weighting');


def formResponseFunction(form, response):
    # this will use the reponse curve to calculate the weighted average
    # any nan values will be ignore from the calculation
    norm = np.sum(~np.isnan(form)*response)
    form0 = np.nan_to_num(form,0)
    if np.sum(form0)==0:
        return 0
    else:
        return np.sum(np.nan_to_num(form,0)*response/norm)

## Loop through data table

There are 36 columns in which form will be considered. The form will be considered on home and away games separately, and also an overall form. Additionally, form will be calculate using three different methods; mean, mode, weighted mean. This means there will be {{len(form_on_columns)*3}} new columns.

In [None]:
# Get player ids to consider
playerIDs = np.unique(pp["id"])

# loop thorugh columns
for j in form_on_columns:

    # empty columns to store form data
    tmp_form = np.zeros(len(pp), dtype='float')
    tmp_home_form = np.zeros(len(pp), dtype='float')
    tmp_away_form = np.zeros(len(pp), dtype='float')

    for i in playerIDs:

        # stores
        form_store = [np.NAN] * nGames
        form_home_store = [np.NAN] * nGames
        form_away_store = [np.NAN] * nGames

        # Find rows belonging to player i
        player_df = pp.loc[pp["id"] == i, :]

        # Find player home/away games
        wh = pp["was_home"]

        # row indexing
        p_index = player_df.index

        # loop thorugh all rows
        for k in p_index:

            # calculate form
            tmp_form[k] = formResponseFunction(form_store, response)
            tmp_home_form[k] = formResponseFunction(form_home_store, response)
            tmp_away_form[k] = formResponseFunction(form_away_store, response)
            
            # shuffle stores
            for l in range(nGames - 1, 0, -1):
                form_store[l] = form_store[l - 1]
                if wh[k]:
                    form_home_store[l] = form_home_store[l - 1]
                else:
                    form_away_store[l] = form_away_store[l - 1]

            # store new values at the start of the array
            form_store[0] = player_df.loc[k, j]
            if wh[k]:
                form_home_store[0] = player_df.loc[k, j]
            else:
                form_away_store[0] = player_df.loc[k, j]
                
    # Create new columns
    pp[j+"_overall_form"] = pd.Series(tmp_form)
    pp[j+"_home_form"] = pd.Series(tmp_home_form)
    pp[j+"_away_form"] = pd.Series(tmp_away_form)

In [None]:
pp

pp.to_csv(r'../../data/csv/player_performance_02.csv',
          index=False,
          index_label=False)

In [None]:
columns_to_remove = [
    "minutes", "goals_scored", "assists", "clean_sheets", "goals_conceded",
    "own_goals", "penalties_saved", "penalties_missed", "yellow_cards",
    "red_cards", "saves", "bonus", "bps", "influence", "creativity", "threat",
    "ict_index", "ea_index", "open_play_crosses", "big_chances_created",
    "clearances_blocks_interceptions", "recoveries", "key_passes", "tackles",
    "winning_goals", "attempted_passes", "completed_passes",
    "penalties_conceded", "big_chances_missed", "errors_leading_to_goal",
    "errors_leading_to_goal_attempt", "tackled", "offside", "target_missed",
    "fouls", "dribbles", "value", "team_a_score", "team_h_score",
    "transfers_balance", "selected", "transfers_in", "transfers_out", "loaned_in", "loaned_out", "kickoff_time"
]

pp02 = pp.copy()
pp02 = pp02.drop(columns=columns_to_remove)

In [None]:
pp02.to_csv(r'../../data/csv/player_performance_03.csv',
            index=False,
            index_label=False)

In [None]:
pp02