# Imports

In [1]:
import pandas as pd
import time
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt

import utilities.densmore_v3 as dns

In [2]:
df = pd.read_csv('../data/api_football_data/05_merged_data/elo_merged_data_names_short.csv')

In [3]:
df.head()

Unnamed: 0,result,game_id,season,game_date,h_name,h_id,h_goals,h_possession,h_pass_acc,a_name,a_id,a_goals,a_possession,a_pass_acc
0,away,148307,2016,2016-03-06,nyrb,1602,0,0.63,0.72,tfc,1601,2,0.37,0.58
1,away,148308,2016,2016-03-06,chi,1607,3,0.39,0.66,nyc,1604,4,0.61,0.78
2,draw,148309,2016,2016-03-06,orl,1598,2,0.52,0.8,rsl,1606,2,0.48,0.76
3,home,148310,2016,2016-03-06,sje,1596,1,0.41,0.71,cor,1610,0,0.59,0.8
4,draw,148311,2016,2016-03-06,hou,1600,3,0.52,0.75,ner,1609,3,0.48,0.73


# SETUP

### ELO Model Equation

**Where:**  

|           Ratings                  | Stats | Coefficients|
|:-----------------------------------|:------------------------------|:------------|
| $R_{old}$ = Team's previous rating | $GF - GA$ = Goal differential | $K$ = Impact of all terms on team's new rating |
| $R_{new}$ = Team's new rating      | $R_{opp} - R_{old}$ = Matchup difficulty | $C$ = Spread of all ratings |
| $R_{opp}$ = Opponent's rating      |  |    |
|                                    |  |    |

**FIRST GAME OF SEASON**
$$\large R_{new} = R_{start} + K \begin{pmatrix} GF - GA + \frac{R_{opp} - R_{old}}{C} \end{pmatrix} $$

**SUBSEQUENT GAMES**

$$\large R_{new} = R_{old} + K \begin{pmatrix} GF - GA + \frac{R_{opp} - R_{old}}{C} \end{pmatrix} $$

```python
    new_h_rating = h_rating + K * (h_goals - a_goals + ((a_rating - h_rating) / C))
    new_a_rating = a_rating + K * (a_goals - h_goals + ((h_rating - a_rating) / C))
````

### Reference Variables

In [4]:
teams = ['atl', 'chi', 'cin', 'cor', 'crw', 'dcu', 
         'fcd', 'hou', 'lafc', 'lag', 'min', 'mtl', 
         'ner', 'nyc', 'nyrb', 'orl', 'phi', 'por', 
         'rsl', 'sea', 'sje', 'skc', 'tfc', 'van']

years = [2016, 2017, 2018, 2019]

### Add Game Numbers by Season

#### Splitt Data by Season

In [5]:
df_2016 = df[df['season'] == 2016].copy()
df_2017 = df[df['season'] == 2017].copy()
df_2018 = df[df['season'] == 2018].copy()
df_2019 = df[df['season'] == 2019].copy()

In [6]:
# df_2016
# df_2017
# df_2018
# df_2019

#### Add Game Numbers

In [7]:
def add_game_nums(season_df):

    game_counts = {team: 1 for team in teams}
    game_nums_by_id = [] 

    for row, data in season_df.iterrows():

        for team, game_num in game_counts.items():
            if team == data['h_name']: 
                game_counts[team] += 1
                data['h_game_num'] = game_num
            elif team == data['a_name']:
                game_counts[team] += 1
                data['a_game_num'] = game_num
            else:
                continue

        game_nums_by_id.append({'game_id': data['game_id'], 
                                'h_game_num': data['h_game_num'], 
                                'a_game_num': data['a_game_num']})

    #     print(f"{data['h_name']}: {data['h_game_num']}", f"{data['a_name']}: {data['a_game_num']}")

    game_nums_df = pd.DataFrame(game_nums_by_id, index=range(len(game_nums_by_id)))
    
    new_df = pd.merge(left=season_df, right=game_nums_df, on='game_id')
    
    return new_df

In [8]:
df_2016 = add_game_nums(df_2016)
df_2017 = add_game_nums(df_2017)
df_2018 = add_game_nums(df_2018)
df_2019 = add_game_nums(df_2019)

In [9]:
df_2016.index = df_2016.index + 1
df_2017.index = df_2017.index + 1
df_2018.index = df_2018.index + 1
df_2019.index = df_2019.index + 1

#### Re-join Tables

In [10]:
df = pd.concat([df_2016, df_2017, df_2018, df_2019], axis=0)

In [11]:
df.drop(columns=['h_possession', 'h_pass_acc', 'a_possession', 'a_pass_acc'], inplace=True)

In [12]:
df.columns

Index(['result', 'game_id', 'season', 'game_date', 'h_name', 'h_id', 'h_goals',
       'a_name', 'a_id', 'a_goals', 'h_game_num', 'a_game_num'],
      dtype='object')

In [13]:
df = df[['result', 'game_id', 'season', 'game_date', 
        'h_name', 'h_id', 'h_goals', 'h_game_num',
        'a_name', 'a_id', 'a_goals', 'a_game_num']]

In [14]:
df.to_csv('../data/api_football_data/05_merged_data/elo_merged_data_ready.csv')

### Setting Up ELO Table

#### Method 1 - As dictionary first, then convert to df

In [15]:
# elo_dict = {'game_num' : 0, 
#             'season' : 0, 
#             'new_false_preds': 0, 
#             'total_false_preds': 0}

# for team in teams:
#     elo_dict[team] = [0]

# elo_table = pd.DataFrame(elo_dict)

# for i in range(1,136):
#     elo_table = elo_table.append(pd.Series(name=i, dtype=int)).fillna(0).astype(int)

# elo_table.index = elo_table.index + 1

In [16]:
# elo_table.head()

#### Method 2 (better) - As Dataframe

In [17]:
elo_table = pd.DataFrame(columns=['new_false_preds', 'total_false_preds'] + [team for team in teams])
for i in range(1,137):
    elo_table = elo_table.append(pd.Series(name=i, dtype=int)).fillna(0).astype(int)
indeces = list(zip([2016]*34 + [2017]*34 + [2018]*34 + [2019]*34, list(range(1,35)) * 4))
elo_table.index = pd.MultiIndex.from_tuples(indeces, names=['year', 'game'])

In [18]:
elo_table.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,new_false_preds,total_false_preds,atl,chi,cin,cor,crw,dcu,fcd,hou,...,nyrb,orl,phi,por,rsl,sea,sje,skc,tfc,van
year,game,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2016,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2016,2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2016,3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2016,4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2016,5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Rating Update Function

In [19]:
def update_ratings(h_rating, h_goals, a_rating, a_goals, K=20, C=200):
    
    if h_rating > a_rating:
        expected_result = 'home'
    elif h_rating < a_rating:
        expected_result = 'away'
    else:
        expected_result = np.random.Generator.choice(['home', 'away'])
            
    if h_goals > a_goals:
        actual_result = 'home'
    elif h_goals < a_goals:
        actual_result = 'away'
    else:
        actual_result = 'draw'
                
    new_h_rating = h_rating + K * (h_goals - a_goals + ((a_rating - h_rating) / C))
    new_a_rating = a_rating + K * (a_goals - h_goals + ((h_rating - a_rating) / C))
    
#     new_h_rating = h_rating + K * (h_goals - a_goals + ((a_rating - h_rating) / C) + D * (h_ball_poss + h_pass_acc))
#     new_a_rating = a_rating + K * (a_goals - h_goals + ((h_rating - a_rating) / C) + D * (a_ball_poss + a_pass_acc))
    
    if expected_result != actual_result:
        false_pred = 1
    else:
        false_pred = 0
        
    return new_h_rating, new_a_rating, false_pred, expected_result, actual_result

In [20]:
def create_elo_table(games=df, K=30, C=300):
    
    log = []
    
    # establish list of team name abbreviations
    teams = ['atl', 'chi', 'cin', 'cor', 'crw', 'dcu', 
             'fcd', 'hou', 'lafc', 'lag', 'min', 'mtl', 
             'ner', 'nyc', 'nyrb', 'orl', 'phi', 'por', 
             'rsl', 'sea', 'sje', 'skc', 'tfc', 'van']

    # create elo table with necessary columns
    elo_table = pd.DataFrame(columns=['new_false_preds', 'total_false_preds'] + [team for team in teams])

    # fill 136 rows with 0s 
    for i in range(1,137):
        elo_table = elo_table.append(pd.Series(name=i, dtype=int)).fillna(0).astype(int) 

    # set up MultiIndex - will allow me to update specific values precisely
    indeces = list(zip([2016]*34 + [2017]*34 + [2018]*34 + [2019]*34, list(range(1,35)) * 4))
    elo_table.index = pd.MultiIndex.from_tuples(indeces, names=['year', 'game'])
    
    # start a running total of false predictions
    false_preds_count = 0
    
    # loop through match data and updating 
    for row, data in games.iterrows():
        # set variables for each game - for easier referencing in code below
        h_team, a_team = data['h_name'], data['a_name']
        h_game_num, a_game_num = data['h_game_num'], data['a_game_num']
        h_goals, a_goals = data['h_goals'], data['a_goals']
        season = data['season']       
        
                        
        # collect each team's previous rating
        try:
            h_prev_rating = elo_table.at[(season, h_game_num-1), h_team]
        except:
            h_prev_rating = 500
        try:    
            a_prev_rating = elo_table.at[(season, a_game_num-1), a_team]
        except: 
            a_prev_rating = 500

        # running 'update_rating' function and saving results
        new_h_rating, new_a_rating, false_pred, expected_result, actual_result = \
        update_rating(h_prev_rating, h_goals, a_prev_rating, a_goals, K, C)

        # update running count of total false predictions
        false_preds_count += false_pred

        # adding results to elo_table if game number is the same for both teams
        if h_game_num == a_game_num:
            game_num = h_game_num
            elo_table.at[(season, game_num), h_team] = new_h_rating
            elo_table.at[(season, game_num), a_team] = new_a_rating
            elo_table.at[(season, game_num), 'new_false_preds'] += false_pred
            elo_table.at[(season, game_num), 'total_false_preds'] = false_preds_count

        # adding results to elo_table if game number is different for each team
        else:
            # home
            elo_table.at[(season, h_game_num), h_team] = new_h_rating
            elo_table.at[(season, h_game_num), 'new_false_preds'] += false_pred
            elo_table.at[(season, h_game_num), 'total_false_preds'] = false_preds_count
            # away
            elo_table.at[(season, a_game_num), a_team] = new_a_rating
            elo_table.at[(season, a_game_num), 'new_false_preds'] += false_pred
            elo_table.at[(season, a_game_num), 'total_false_preds'] = false_preds_count
        
        log.append({'match_id' : data['game_id'],
                    'season' : season,
                    'home_team' : h_team,
                    'home_game_num': h_game_num,
                    'away_team' : a_team,
                    'away_game_num' : a_game_num,
                    'result_expected' : expected_result,
                    'result_actual' : actual_result,
                    'false_pred' : false_pred})
            
    return elo_table, pd.DataFrame(log)

In [21]:
elo_table_filled, elo_table_log = create_elo_table()

NameError: name 'update_rating' is not defined

In [None]:
elo_table_filled.to_csv('../data/api_football_data/05_merged_data/elo_table_filled.csv')
elo_table_log.to_csv('../data/api_football_data/05_merged_data/elo_table_log.csv')

In [None]:
elo_table_log['false_pred'].value_counts(normalize=True)

#### Code Testing

In [None]:
update_rating(723, 3, 438, 0, K=20, C=200)

In [None]:
update_rating(723, 0, 438, 3, K=20, C=200)

In [None]:
723-754.5, 723-634.5, 438-406.5, 438-526.5

## Saved Code

In [None]:
# Realized an easier way to handle first games of the season. Using try/except to set previous ratings.
# When doing it for game 1, 1-1 will be 0, which will raise a 'key' error. If an error occurs for one 
# or both of the teams their previous_rating variable will be set to 500. There shouldnt be errors
# otherwise.

# saving this code for safety

#         # first game - update each team's game 1 using predetermined ratings at the start of each season
#         if h_game_num == 1 or a_game_num == 1:
#             # running 'update_rating' function and saving results
#             new_h_rating, new_a_rating, false_pred, expected_result, actual_result = \
#             update_rating(500, h_goals, 500, a_goals, K, C)
            
            
#             # add results to elo_table if game number is the same for both teams
#             if h_game_num == a_game_num:
#                 game_num = h_game_num
#                 elo_table.at[(season, game_num), h_team] = new_h_rating
#                 elo_table.at[(season, game_num), a_team] = new_a_rating
#                 elo_table.at[(season, game_num), 'new_false_preds'] += false_pred
#                 elo_table.at[(season, game_num), 'total_false_preds'] = false_preds_count
                
#             # add results to elo_table if game numbers aren't the same (when number of teams is odd)
#             else:
#                 # home
#                 elo_table.at[(season, h_game_num), h_team] = new_h_rating
#                 elo_table.at[(season, h_game_num), 'new_false_preds'] += false_pred
#                 elo_table.at[(season, h_game_num), 'total_false_preds'] = false_preds_count
#                 # away
#                 elo_table.at[(season, a_game_num), a_team] = new_a_rating
#                 elo_table.at[(season, a_game_num), 'new_false_preds'] += false_pred
#                 elo_table.at[(season, a_game_num), 'total_false_preds'] = false_preds_count 

### ELO Model Equation

**Where:**  

|  Ratings | Stats | Coefficients|
|:---------|:------|:------------|
| $R_{start}$ = 500 | $Pass\%$ = Pass accuracy percentage      | $K$ = Impact of all terms on team's new rating                |
| $R_{old}$ = Team's previous rating    | $Pos.\%$ = Possession percentage         | $C$ = Spread of all ratings                             |
| $R_{new}$ = Team's new rating         | $GF - GA$ = Goal differential            | $D$ = Impact of misc stats on rating ($Pass\%$ and $Pos.\%$)   |
| $R_{opp}$ = Opponent's rating         | $R_{opp} - R_{old}$ = Matchup difficulty |    |

**FIRST GAME OF SEASON**
$$\large R_{new} = R_{start} + K \begin{pmatrix} GF - GA + \frac{R_{opp} - R_{old}}{C} + D( Pass\% + Pos.\%) \end{pmatrix} $$

**SUBSEQUENT GAMES**

$$\large R_{new} = R_{old} + K \begin{pmatrix} GF - GA + \frac{R_{opp} - R_{old}}{C} + D( Pass\% + Pos.\%) \end{pmatrix} $$