<a href="https://colab.research.google.com/github/nescoba/portafolio/blob/main/backfitting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# pip install itscalledsoccer

Collecting itscalledsoccer
  Downloading itscalledsoccer-0.1.4-py3-none-any.whl (9.8 kB)
Collecting pandas==1.3.1
  Downloading pandas-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)
[K     |████████████████████████████████| 11.5 MB 11.0 MB/s 
[?25hCollecting rapidfuzz==1.9.1
  Downloading rapidfuzz-1.9.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (893 kB)
[K     |████████████████████████████████| 893 kB 61.0 MB/s 
[?25hCollecting CacheControl==0.12.6
  Downloading CacheControl-0.12.6-py2.py3-none-any.whl (19 kB)
Collecting requests==2.25.1
  Downloading requests-2.25.1-py2.py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 7.9 MB/s 
Installing collected packages: requests, rapidfuzz, pandas, CacheControl, itscalledsoccer
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: pandas
    Fo

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline 
sns.set()

In [None]:
import statsmodels.api as sm

  import pandas.util.testing as tm


In [None]:
from itscalledsoccer.client import AmericanSoccerAnalysis

asa_client = AmericanSoccerAnalysis() 

Gathering all players
Gathering all teams
Gathering all stadia
Gathering all managers
Gathering all referees
Finished initializing client


In [None]:
def construct_time_series_date(team_id, date):
    seasons = asa_client.get_game_xgoals(leagues='mls')
    seasons.loc[:, 'date_formated'] = pd.to_datetime(seasons['date_time_utc'])
    seasons = seasons[seasons['date_formated'] < date]

    seasons_team = seasons[(seasons['home_team_id']==team_id) | (seasons['away_team_id']==team_id)]
    team_home = seasons_team[seasons_team['home_team_id']==team_id]
    team_away = seasons_team[seasons_team['away_team_id']==team_id]

    team_home.loc[:,'teamxgoals'] = team_home['home_team_xgoals']
    team_away.loc[:,'teamxgoals'] = team_away['away_team_xgoals']

    team_home.loc[:,'teamgoals'] = team_home['home_goals']
    team_away.loc[:,'teamgoals'] = team_away['away_goals']

    team_home.loc[:,'oppoxgoals'] = team_home['away_team_xgoals']
    team_away.loc[:,'oppoxgoals'] = team_away['home_team_xgoals']

    team_home.loc[:,'oppogoals'] = team_home['away_goals']
    team_away.loc[:,'oppogoals'] = team_away['home_goals']

    team_home_red = team_home.loc[:, ['teamgoals', 'teamxgoals', 'oppoxgoals', 'oppogoals', 'date_formated']]
    team_away_red = team_away.loc[:, ['teamgoals', 'teamxgoals', 'oppoxgoals', 'oppogoals', 'date_formated']]

    team_red = pd.merge(team_home_red, team_away_red, how='outer')

    team_red = team_red.sort_values(by='date_formated')

    return team_red

In [None]:
def normal_with_nans(mean, sd):
    try:
        return np.random.normal(mean, sd)
    except:
        return np.nan

def poisson_with_nans(mean):
    try:
        return np.random.poisson(mean)
    except:
        return 0

In [None]:
def probs_game(homeid, awayid, date):

    homedf = construct_time_series_date(homeid, date)
    awaydf = construct_time_series_date(awayid, date)



    homedf.loc[:, 'rollxgmean'] = homedf['teamxgoals'].rolling(5).mean()
    homedf.loc[:, 'rollxgstd'] = homedf['teamxgoals'].rolling(5).std()

    try:
        homepredxgoals = normal_with_nans(homedf['rollxgmean'].iloc[-1], homedf['rollxgstd'].iloc[-1])
    except:
        homepredxgoals = 1

    awaydf.loc[:, 'rolloppoxgmean'] = awaydf['oppoxgoals'].rolling(5).mean()
    awaydf.loc[:, 'rolloppoxgstd'] = awaydf['oppoxgoals'].rolling(5).std()

    try:
        awaypredoppoxgoals = normal_with_nans(awaydf['rolloppoxgmean'].iloc[-1], awaydf['rolloppoxgstd'].iloc[-1])
    except:
        awaypredoppoxgoals=1

    hometoawaypredxgoals = np.mean(np.array([float(homepredxgoals),float(awaypredoppoxgoals)]))



    awaydf.loc[:, 'rollxgmean'] = awaydf['teamxgoals'].rolling(5).mean()
    awaydf.loc[:, 'rollxgstd'] = awaydf['teamxgoals'].rolling(5).std()

    try:
        awaypredxgoals = normal_with_nans(awaydf['rollxgmean'].iloc[-1], awaydf['rollxgstd'].iloc[-1])
    except:
        awaypredxgoals = 1
        
    homedf.loc[:, 'rolloppoxgmean'] = homedf['oppoxgoals'].rolling(5).mean()
    homedf.loc[:, 'rolloppoxgstd'] = homedf['oppoxgoals'].rolling(5).std()

    try:
        homepredoppoxgoals = normal_with_nans(homedf['rolloppoxgmean'].iloc[-1], homedf['rolloppoxgstd'].iloc[-1])
    except:
        homepredoppoxgoals = 1

    awaytohomepredxgoals = np.mean(np.array([float(awaypredxgoals), float(homepredoppoxgoals)]))


    n_simulations = 10000
    n_home = 0
    n_draw = 0
    n_away = 0

    for n in range(n_simulations):
        home_score = poisson_with_nans(hometoawaypredxgoals)
        away_score = poisson_with_nans(awaytohomepredxgoals)
        if home_score > away_score:
            n_home += 1
        elif home_score < away_score:
            n_away += 1
        else:
            n_draw += 1 

    return (n_home / n_simulations, n_draw / n_simulations, n_away / n_simulations)

In [None]:
def probs_game_from_id(matchid):
    all_games = asa_client.get_games(leagues='mls')
    homeid = all_games.loc[all_games['game_id']==matchid, 'home_team_id'].values[0]

    awayid = all_games.loc[all_games['game_id']==matchid, 'away_team_id'].values[0]

    date = pd.to_datetime(all_games.loc[all_games['game_id']==matchid, 'date_time_utc'].values[0])

    probs_game(homeid, awayid, date)


In [None]:
def choose_result_from_id(matchid):
    return np.random.choice(['home', 'draw', 'away'], p=probs_game_from_id(matchid))

In [None]:
choose_result_from_id('9z5knLXjMA')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


'draw'

In [None]:
v_choose_result_from_id = np.vectorize(choose_result_from_id)

all_games = asa_client.get_games(leagues='mls')

all_games.loc[:, 'projected_result'] = v_choose_result_from_id(all_games['game_id'])

all_games

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
def decide_result(game_id):
    games = asa_client.get_games(leagues='mls')
    game = games[games['game_id']==game_id]
    home_score = game['home_goals']
    away_score = game['away_goals']

    if home_score > away_score:
        return 'home'
    elif away_score > home_score:
        return 'away'
    else:
        return 'draw'

v_decide_result = np.vectorize(decide_result)

all_games.loc[:, 'result'] = v_decide_result(all_games['game_id'])

from sklearn.metrics import accuracy_score
accuracy_score(all_games['result'], all_games['projected_result'])

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(all_games['result'], all_games['projected_result'])