<a href="https://colab.research.google.com/github/nescoba/portafolio/blob/main/alternative2_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline 

sns.set()

In [5]:
# pip install itscalledsoccer

In [6]:
from itscalledsoccer.client import AmericanSoccerAnalysis

asa_client = AmericanSoccerAnalysis()

Gathering all players
Gathering all teams
Gathering all stadia
Gathering all managers
Gathering all referees
Finished initializing client


In [7]:
import statsmodels.api as sm

  import pandas.util.testing as tm


In [8]:
def construct_time_series_date(team_id, date, seasons):

    seasons.loc[:, 'date_formated'] = pd.to_datetime(seasons['date_time_utc'])
    seasons = seasons[seasons['date_formated'] < date]

    seasons_team = seasons[(seasons['home_team_id']==team_id) | (seasons['away_team_id']==team_id)]
    team_home = seasons_team[seasons_team['home_team_id']==team_id]
    team_away = seasons_team[seasons_team['away_team_id']==team_id]

    team_home.loc[:,'teamxgoals'] = team_home['home_team_xgoals']
    team_away.loc[:,'teamxgoals'] = team_away['away_team_xgoals']

    team_home.loc[:,'teamgoals'] = team_home['home_goals']
    team_away.loc[:,'teamgoals'] = team_away['away_goals']

    team_home.loc[:,'oppoxgoals'] = team_home['away_team_xgoals']
    team_away.loc[:,'oppoxgoals'] = team_away['home_team_xgoals']

    team_home.loc[:,'oppogoals'] = team_home['away_goals']
    team_away.loc[:,'oppogoals'] = team_away['home_goals']

    team_home_red = team_home.loc[:, ['teamgoals', 'teamxgoals', 'oppoxgoals', 'oppogoals', 'date_formated']]
    team_away_red = team_away.loc[:, ['teamgoals', 'teamxgoals', 'oppoxgoals', 'oppogoals', 'date_formated']]

    team_red = pd.merge(team_home_red, team_away_red, how='outer')

    team_red = team_red.sort_values(by='date_formated')

    return team_red

In [9]:
def normal_with_nans(mean, sd):
    try: 
        value = np.random.normal(mean, sd)
        if pd.isna(value):
            return 1
        else:
            return value
    except:
        return 1

In [10]:
def probs_game(homeid, awayid, date, seasons):

    n_simulations = 1000
    n_home = 0
    n_draw = 0
    n_away = 0

    

    homedf = construct_time_series_date(homeid, date, seasons)
    awaydf = construct_time_series_date(awayid, date, seasons)



    homedf.loc[:, 'rollxgmean'] = homedf['teamxgoals'].rolling(5).mean()
    homedf.loc[:, 'rollxgstd'] = homedf['teamxgoals'].rolling(5).std()

    awaydf.loc[:, 'rolloppoxgmean'] = awaydf['oppoxgoals'].rolling(5).mean()
    awaydf.loc[:, 'rolloppoxgstd'] = awaydf['oppoxgoals'].rolling(5).std()

    awaydf.loc[:, 'rollxgmean'] = awaydf['teamxgoals'].rolling(5).mean()
    awaydf.loc[:, 'rollxgstd'] = awaydf['teamxgoals'].rolling(5).std()

    homedf.loc[:, 'rolloppoxgmean'] = homedf['oppoxgoals'].rolling(5).mean()
    homedf.loc[:, 'rolloppoxgstd'] = homedf['oppoxgoals'].rolling(5).std()


    for n in range(n_simulations):

        try:
            homepredxgoals = normal_with_nans(homedf['rollxgmean'].iloc[-1], homedf['rollxgstd'].iloc[-1])
        except:
            homepredxgoals = 1
        # print(homepredxgoals)

        try:
            # print(awaydf)
            # print(awaydf['rolloppoxgstd'])
            # print(awaydf['rolloppoxgmean'].iloc[-1])
            # print(awaydf['rolloppoxgstd'].iloc[-1])
            awaypredoppoxgoals = normal_with_nans(awaydf['rolloppoxgmean'].iloc[-1], awaydf['rolloppoxgstd'].iloc[-1])
        except:
            awaypredoppoxgoals=1

        # print(awaypredoppoxgoals)

        hometoawaypredxgoals = np.mean(np.array([float(homepredxgoals),float(awaypredoppoxgoals)]))
        # print(hometoawaypredxgoals)
        
        try:
            awaypredxgoals = normal_with_nans(awaydf['rollxgmean'].iloc[-1], awaydf['rollxgstd'].iloc[-1])
        except:
            awaypredxgoals = 1
                   

        try:
            homepredoppoxgoals = normal_with_nans(homedf['rolloppoxgmean'].iloc[-1], homedf['rolloppoxgstd'].iloc[-1])
        except:
            homepredoppoxgoals = 1

        awaytohomepredxgoals = np.mean(np.array([float(awaypredxgoals), float(homepredoppoxgoals)]))
    
        home_score = int(hometoawaypredxgoals)
        away_score = int(awaytohomepredxgoals)
        if home_score > away_score:
            n_home += 1
        elif home_score < away_score:
            n_away += 1
        else:
            n_draw += 1 

    return (n_home / n_simulations, n_draw / n_simulations, n_away / n_simulations)

In [11]:
def probs_game_from_id(matchid, all_games):

    # print(all_games)
    # print(matchid)
   
    homeid = all_games.loc[all_games['game_id']==matchid, 'home_team_id'].values[0]

    awayid = all_games.loc[all_games['game_id']==matchid, 'away_team_id'].values[0]

    date = pd.to_datetime(all_games.loc[all_games['game_id']==matchid, 'date_time_utc'].values[0])

    print(date)

    return probs_game(homeid, awayid, date, all_games)

In [12]:
def choose_result_from_id(matchid, all_games):
    return np.random.choice(['home', 'draw', 'away'], p=probs_game_from_id(matchid, all_games))

In [13]:
v_choose_result_from_id = np.vectorize(choose_result_from_id, excluded=['all_games'])

In [14]:
v_probs_game_from_id = np.vectorize(probs_game_from_id, excluded=['all_games'])

In [24]:
def decide_result(game_id, games):
    game = games[games['game_id']==game_id]
    home_score = game['home_score'].values[0]
    away_score = game['away_score'].values[0]

    if home_score > away_score:
        return 'H'
    elif away_score > home_score:
        return 'A'
    else:
        return 'D'

In [32]:
seasons = asa_client.get_game_xgoals(leagues='mls')

all_games = asa_client.get_games(leagues='mls')
games2021 = all_games.iloc[:500, :]

games2021

Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,expanded_minutes,season_name,matchday,attendance,knockout_game,last_updated_utc,extra_time,penalties,home_penalties,away_penalties
0,2lqRoy3JQr,2022-03-13 23:00:00 UTC,0,1,a2lqRX2Mr0,kRQand1MKZ,2lqRG1WQr0,NWMW84L5lz,9z5kKNb5A3,OlMlYvy5Lz,99,2022,3,14848,False,2022-03-14 01:17:32 UTC,,,,
1,9Yqd3nDL5v,2022-03-13 20:30:00 UTC,2,1,KAqBN0Vqbg,NPqxKXZ59d,wvq9vKlQWn,NWMWoaeMlz,7vQ7m6Y5D1,gjMNG4k5Kp,101,2022,3,43055,False,2022-03-13 23:08:07 UTC,,,,
2,KXMeBo1r56,2022-03-13 03:00:00 UTC,1,0,WBLMvYAQxe,gpMOLwl5zy,odMX96a5YL,p6qbX06M0G,gpMOYv1qzy,0Oq6mNP56D,98,2022,3,22795,False,2022-03-13 05:16:37 UTC,,,,
3,EGMPOxJVQa,2022-03-13 02:00:00 UTC,2,0,pzeQZ6xQKw,Z2vQ1xlqrA,KPqjDeNM6v,9z5ka6gQA3,0Oq6VPgq6D,N6MmpyLQEG,99,2022,3,12387,False,2022-03-13 12:25:12 UTC,,,,
4,gjMNGxZ05K,2022-03-13 01:30:00 UTC,2,0,mKAqBBmqbg,vzqoOgNqap,4wM4voBqjB,eVq3alGMWO,KAqBwv7Qbg,gOMnDkAqwN,104,2022,3,13448,False,2022-03-14 01:35:01 UTC,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,NPqxXGajq9,2021-04-24 22:00:00 UTC,1,1,eVq3ya6MWO,jYQJ19EqGR,gjMNk4v5Kp,7vQ7xbOMD1,0Oq6zkzq6D,odMXxreMYL,98,2021,2,4900,False,2021-04-28 00:29:37 UTC,,,,
496,9vQ283Ar5K,2021-04-24 19:30:00 UTC,3,1,0KPqjA456v,mKAqBBmqbg,kRQa9k8QKZ,Vj58W84M8n,2vQ10jwqrA,eV5DNVJQKn,102,2021,2,0,False,2021-04-27 15:01:07 UTC,,,,
497,XVqK8XxaQ0,2021-04-24 19:00:00 UTC,2,2,kRQabn8MKZ,lgpMOvnQzy,9z5kJ7gMA3,vzqoJrj5ap,vzqo2dv5ap,0Oq6mxd56D,100,2021,2,0,False,2021-04-27 06:01:05 UTC,,,,
498,4JMAVNAD5K,2021-04-24 18:00:00 UTC,2,2,vzqoOgNqap,APk5LGOMOW,zeQZlGK5Kw,KXMe8pxQ64,gOMnDkAqwN,KPqjO38Q6v,100,2021,2,12164,False,2021-04-27 14:57:22 UTC,,,,


In [33]:
games2021.loc[:, 'Nprobs'] = games2021['game_id'].apply(probs_game_from_id, all_games=seasons)

games2021

2022-03-13 23:00:00+00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


2022-03-13 20:30:00+00:00
2022-03-13 03:00:00+00:00
2022-03-13 02:00:00+00:00
2022-03-13 01:30:00+00:00
2022-03-13 00:30:00+00:00
2022-03-13 00:30:00+00:00
2022-03-13 00:30:00+00:00
2022-03-13 00:30:00+00:00
2022-03-12 23:30:00+00:00
2022-03-12 20:30:00+00:00
2022-03-12 18:30:00+00:00
2022-03-12 18:30:00+00:00
2022-03-12 18:00:00+00:00
2022-03-07 03:00:00+00:00
2022-03-06 21:00:00+00:00
2022-03-06 00:30:00+00:00
2022-03-05 23:00:00+00:00
2022-03-05 23:00:00+00:00
2022-03-05 23:00:00+00:00
2022-03-05 23:00:00+00:00
2022-03-05 23:00:00+00:00
2022-03-05 23:00:00+00:00
2022-03-05 22:30:00+00:00
2022-03-05 21:00:00+00:00
2022-03-05 20:30:00+00:00
2022-03-05 19:00:00+00:00
2022-03-05 18:30:00+00:00
2022-02-28 01:00:00+00:00
2022-02-28 00:00:00+00:00
2022-02-27 22:00:00+00:00
2022-02-27 20:00:00+00:00
2022-02-27 18:00:00+00:00
2022-02-27 00:30:00+00:00
2022-02-26 23:00:00+00:00
2022-02-26 23:00:00+00:00
2022-02-26 23:00:00+00:00
2022-02-26 23:00:00+00:00
2022-02-26 22:30:00+00:00
2022-02-26 2

Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,...,season_name,matchday,attendance,knockout_game,last_updated_utc,extra_time,penalties,home_penalties,away_penalties,Nprobs
0,2lqRoy3JQr,2022-03-13 23:00:00 UTC,0,1,a2lqRX2Mr0,kRQand1MKZ,2lqRG1WQr0,NWMW84L5lz,9z5kKNb5A3,OlMlYvy5Lz,...,2022,3,14848,False,2022-03-14 01:17:32 UTC,,,,,"(0.428, 0.442, 0.13)"
1,9Yqd3nDL5v,2022-03-13 20:30:00 UTC,2,1,KAqBN0Vqbg,NPqxKXZ59d,wvq9vKlQWn,NWMWoaeMlz,7vQ7m6Y5D1,gjMNG4k5Kp,...,2022,3,43055,False,2022-03-13 23:08:07 UTC,,,,,"(0.197, 0.478, 0.325)"
2,KXMeBo1r56,2022-03-13 03:00:00 UTC,1,0,WBLMvYAQxe,gpMOLwl5zy,odMX96a5YL,p6qbX06M0G,gpMOYv1qzy,0Oq6mNP56D,...,2022,3,22795,False,2022-03-13 05:16:37 UTC,,,,,"(0.517, 0.39, 0.093)"
3,EGMPOxJVQa,2022-03-13 02:00:00 UTC,2,0,pzeQZ6xQKw,Z2vQ1xlqrA,KPqjDeNM6v,9z5ka6gQA3,0Oq6VPgq6D,N6MmpyLQEG,...,2022,3,12387,False,2022-03-13 12:25:12 UTC,,,,,"(0.163, 0.489, 0.348)"
4,gjMNGxZ05K,2022-03-13 01:30:00 UTC,2,0,mKAqBBmqbg,vzqoOgNqap,4wM4voBqjB,eVq3alGMWO,KAqBwv7Qbg,gOMnDkAqwN,...,2022,3,13448,False,2022-03-14 01:35:01 UTC,,,,,"(0.395, 0.525, 0.08)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,NPqxXGajq9,2021-04-24 22:00:00 UTC,1,1,eVq3ya6MWO,jYQJ19EqGR,gjMNk4v5Kp,7vQ7xbOMD1,0Oq6zkzq6D,odMXxreMYL,...,2021,2,4900,False,2021-04-28 00:29:37 UTC,,,,,"(0.363, 0.581, 0.056)"
496,9vQ283Ar5K,2021-04-24 19:30:00 UTC,3,1,0KPqjA456v,mKAqBBmqbg,kRQa9k8QKZ,Vj58W84M8n,2vQ10jwqrA,eV5DNVJQKn,...,2021,2,0,False,2021-04-27 15:01:07 UTC,,,,,"(0.26, 0.53, 0.21)"
497,XVqK8XxaQ0,2021-04-24 19:00:00 UTC,2,2,kRQabn8MKZ,lgpMOvnQzy,9z5kJ7gMA3,vzqoJrj5ap,vzqo2dv5ap,0Oq6mxd56D,...,2021,2,0,False,2021-04-27 06:01:05 UTC,,,,,"(0.108, 0.462, 0.43)"
498,4JMAVNAD5K,2021-04-24 18:00:00 UTC,2,2,vzqoOgNqap,APk5LGOMOW,zeQZlGK5Kw,KXMe8pxQ64,gOMnDkAqwN,KPqjO38Q6v,...,2021,2,12164,False,2021-04-27 14:57:22 UTC,,,,,"(0.471, 0.449, 0.08)"


In [34]:
games2021.loc[:, 'NH'] = games2021['Nprobs'].apply(lambda x: x[0])
games2021.loc[:, 'ND'] = games2021['Nprobs'].apply(lambda x: x[1])
games2021.loc[:, 'NA'] = games2021['Nprobs'].apply(lambda x: x[2])

games2021

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,...,knockout_game,last_updated_utc,extra_time,penalties,home_penalties,away_penalties,Nprobs,NH,ND,NA
0,2lqRoy3JQr,2022-03-13 23:00:00 UTC,0,1,a2lqRX2Mr0,kRQand1MKZ,2lqRG1WQr0,NWMW84L5lz,9z5kKNb5A3,OlMlYvy5Lz,...,False,2022-03-14 01:17:32 UTC,,,,,"(0.428, 0.442, 0.13)",0.428,0.442,0.130
1,9Yqd3nDL5v,2022-03-13 20:30:00 UTC,2,1,KAqBN0Vqbg,NPqxKXZ59d,wvq9vKlQWn,NWMWoaeMlz,7vQ7m6Y5D1,gjMNG4k5Kp,...,False,2022-03-13 23:08:07 UTC,,,,,"(0.197, 0.478, 0.325)",0.197,0.478,0.325
2,KXMeBo1r56,2022-03-13 03:00:00 UTC,1,0,WBLMvYAQxe,gpMOLwl5zy,odMX96a5YL,p6qbX06M0G,gpMOYv1qzy,0Oq6mNP56D,...,False,2022-03-13 05:16:37 UTC,,,,,"(0.517, 0.39, 0.093)",0.517,0.390,0.093
3,EGMPOxJVQa,2022-03-13 02:00:00 UTC,2,0,pzeQZ6xQKw,Z2vQ1xlqrA,KPqjDeNM6v,9z5ka6gQA3,0Oq6VPgq6D,N6MmpyLQEG,...,False,2022-03-13 12:25:12 UTC,,,,,"(0.163, 0.489, 0.348)",0.163,0.489,0.348
4,gjMNGxZ05K,2022-03-13 01:30:00 UTC,2,0,mKAqBBmqbg,vzqoOgNqap,4wM4voBqjB,eVq3alGMWO,KAqBwv7Qbg,gOMnDkAqwN,...,False,2022-03-14 01:35:01 UTC,,,,,"(0.395, 0.525, 0.08)",0.395,0.525,0.080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,NPqxXGajq9,2021-04-24 22:00:00 UTC,1,1,eVq3ya6MWO,jYQJ19EqGR,gjMNk4v5Kp,7vQ7xbOMD1,0Oq6zkzq6D,odMXxreMYL,...,False,2021-04-28 00:29:37 UTC,,,,,"(0.363, 0.581, 0.056)",0.363,0.581,0.056
496,9vQ283Ar5K,2021-04-24 19:30:00 UTC,3,1,0KPqjA456v,mKAqBBmqbg,kRQa9k8QKZ,Vj58W84M8n,2vQ10jwqrA,eV5DNVJQKn,...,False,2021-04-27 15:01:07 UTC,,,,,"(0.26, 0.53, 0.21)",0.260,0.530,0.210
497,XVqK8XxaQ0,2021-04-24 19:00:00 UTC,2,2,kRQabn8MKZ,lgpMOvnQzy,9z5kJ7gMA3,vzqoJrj5ap,vzqo2dv5ap,0Oq6mxd56D,...,False,2021-04-27 06:01:05 UTC,,,,,"(0.108, 0.462, 0.43)",0.108,0.462,0.430
498,4JMAVNAD5K,2021-04-24 18:00:00 UTC,2,2,vzqoOgNqap,APk5LGOMOW,zeQZlGK5Kw,KXMe8pxQ64,gOMnDkAqwN,KPqjO38Q6v,...,False,2021-04-27 14:57:22 UTC,,,,,"(0.471, 0.449, 0.08)",0.471,0.449,0.080


In [35]:
games2021.loc[:, 'result'] = games2021['game_id'].apply(decide_result, games=games2021)
games2021

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,...,last_updated_utc,extra_time,penalties,home_penalties,away_penalties,Nprobs,NH,ND,NA,result
0,2lqRoy3JQr,2022-03-13 23:00:00 UTC,0,1,a2lqRX2Mr0,kRQand1MKZ,2lqRG1WQr0,NWMW84L5lz,9z5kKNb5A3,OlMlYvy5Lz,...,2022-03-14 01:17:32 UTC,,,,,"(0.428, 0.442, 0.13)",0.428,0.442,0.130,A
1,9Yqd3nDL5v,2022-03-13 20:30:00 UTC,2,1,KAqBN0Vqbg,NPqxKXZ59d,wvq9vKlQWn,NWMWoaeMlz,7vQ7m6Y5D1,gjMNG4k5Kp,...,2022-03-13 23:08:07 UTC,,,,,"(0.197, 0.478, 0.325)",0.197,0.478,0.325,H
2,KXMeBo1r56,2022-03-13 03:00:00 UTC,1,0,WBLMvYAQxe,gpMOLwl5zy,odMX96a5YL,p6qbX06M0G,gpMOYv1qzy,0Oq6mNP56D,...,2022-03-13 05:16:37 UTC,,,,,"(0.517, 0.39, 0.093)",0.517,0.390,0.093,H
3,EGMPOxJVQa,2022-03-13 02:00:00 UTC,2,0,pzeQZ6xQKw,Z2vQ1xlqrA,KPqjDeNM6v,9z5ka6gQA3,0Oq6VPgq6D,N6MmpyLQEG,...,2022-03-13 12:25:12 UTC,,,,,"(0.163, 0.489, 0.348)",0.163,0.489,0.348,H
4,gjMNGxZ05K,2022-03-13 01:30:00 UTC,2,0,mKAqBBmqbg,vzqoOgNqap,4wM4voBqjB,eVq3alGMWO,KAqBwv7Qbg,gOMnDkAqwN,...,2022-03-14 01:35:01 UTC,,,,,"(0.395, 0.525, 0.08)",0.395,0.525,0.080,H
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,NPqxXGajq9,2021-04-24 22:00:00 UTC,1,1,eVq3ya6MWO,jYQJ19EqGR,gjMNk4v5Kp,7vQ7xbOMD1,0Oq6zkzq6D,odMXxreMYL,...,2021-04-28 00:29:37 UTC,,,,,"(0.363, 0.581, 0.056)",0.363,0.581,0.056,D
496,9vQ283Ar5K,2021-04-24 19:30:00 UTC,3,1,0KPqjA456v,mKAqBBmqbg,kRQa9k8QKZ,Vj58W84M8n,2vQ10jwqrA,eV5DNVJQKn,...,2021-04-27 15:01:07 UTC,,,,,"(0.26, 0.53, 0.21)",0.260,0.530,0.210,H
497,XVqK8XxaQ0,2021-04-24 19:00:00 UTC,2,2,kRQabn8MKZ,lgpMOvnQzy,9z5kJ7gMA3,vzqoJrj5ap,vzqo2dv5ap,0Oq6mxd56D,...,2021-04-27 06:01:05 UTC,,,,,"(0.108, 0.462, 0.43)",0.108,0.462,0.430,D
498,4JMAVNAD5K,2021-04-24 18:00:00 UTC,2,2,vzqoOgNqap,APk5LGOMOW,zeQZlGK5Kw,KXMe8pxQ64,gOMnDkAqwN,KPqjO38Q6v,...,2021-04-27 14:57:22 UTC,,,,,"(0.471, 0.449, 0.08)",0.471,0.449,0.080,D


In [46]:
games2021.loc[:,'NHCat'] = pd.cut(games2021['NH'], 5)
games2021

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,...,home_penalties,away_penalties,Nprobs,NH,ND,NA,result,NHCat,NACat,NDCat
0,2lqRoy3JQr,2022-03-13 23:00:00 UTC,0,1,a2lqRX2Mr0,kRQand1MKZ,2lqRG1WQr0,NWMW84L5lz,9z5kKNb5A3,OlMlYvy5Lz,...,,,"(0.428, 0.442, 0.13)",0.428,0.442,0.130,A,"(0.364, 0.545]","(0.12, 0.15]","(0.42, 0.443]"
1,9Yqd3nDL5v,2022-03-13 20:30:00 UTC,2,1,KAqBN0Vqbg,NPqxKXZ59d,wvq9vKlQWn,NWMWoaeMlz,7vQ7m6Y5D1,gjMNG4k5Kp,...,,,"(0.197, 0.478, 0.325)",0.197,0.478,0.325,H,"(0.183, 0.364]","(0.298, 0.327]","(0.466, 0.49]"
2,KXMeBo1r56,2022-03-13 03:00:00 UTC,1,0,WBLMvYAQxe,gpMOLwl5zy,odMX96a5YL,p6qbX06M0G,gpMOYv1qzy,0Oq6mNP56D,...,,,"(0.517, 0.39, 0.093)",0.517,0.390,0.093,H,"(0.364, 0.545]","(0.0907, 0.12]","(0.373, 0.396]"
3,EGMPOxJVQa,2022-03-13 02:00:00 UTC,2,0,pzeQZ6xQKw,Z2vQ1xlqrA,KPqjDeNM6v,9z5ka6gQA3,0Oq6VPgq6D,N6MmpyLQEG,...,,,"(0.163, 0.489, 0.348)",0.163,0.489,0.348,H,"(0.0011, 0.183]","(0.327, 0.357]","(0.466, 0.49]"
4,gjMNGxZ05K,2022-03-13 01:30:00 UTC,2,0,mKAqBBmqbg,vzqoOgNqap,4wM4voBqjB,eVq3alGMWO,KAqBwv7Qbg,gOMnDkAqwN,...,,,"(0.395, 0.525, 0.08)",0.395,0.525,0.080,H,"(0.364, 0.545]","(0.0611, 0.0907]","(0.513, 0.537]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,NPqxXGajq9,2021-04-24 22:00:00 UTC,1,1,eVq3ya6MWO,jYQJ19EqGR,gjMNk4v5Kp,7vQ7xbOMD1,0Oq6zkzq6D,odMXxreMYL,...,,,"(0.363, 0.581, 0.056)",0.363,0.581,0.056,D,"(0.183, 0.364]","(0.0316, 0.0611]","(0.56, 0.584]"
496,9vQ283Ar5K,2021-04-24 19:30:00 UTC,3,1,0KPqjA456v,mKAqBBmqbg,kRQa9k8QKZ,Vj58W84M8n,2vQ10jwqrA,eV5DNVJQKn,...,,,"(0.26, 0.53, 0.21)",0.260,0.530,0.210,H,"(0.183, 0.364]","(0.209, 0.239]","(0.513, 0.537]"
497,XVqK8XxaQ0,2021-04-24 19:00:00 UTC,2,2,kRQabn8MKZ,lgpMOvnQzy,9z5kJ7gMA3,vzqoJrj5ap,vzqo2dv5ap,0Oq6mxd56D,...,,,"(0.108, 0.462, 0.43)",0.108,0.462,0.430,D,"(0.0011, 0.183]","(0.416, 0.446]","(0.443, 0.466]"
498,4JMAVNAD5K,2021-04-24 18:00:00 UTC,2,2,vzqoOgNqap,APk5LGOMOW,zeQZlGK5Kw,KXMe8pxQ64,gOMnDkAqwN,KPqjO38Q6v,...,,,"(0.471, 0.449, 0.08)",0.471,0.449,0.080,D,"(0.364, 0.545]","(0.0611, 0.0907]","(0.443, 0.466]"


In [48]:
games2021.loc[:,'NACat'] = pd.cut(games2021['NA'], 5)
games2021.loc[:,'NDCat'] = pd.cut(games2021['ND'], 5)
games2021

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,...,home_penalties,away_penalties,Nprobs,NH,ND,NA,result,NHCat,NACat,NDCat
0,2lqRoy3JQr,2022-03-13 23:00:00 UTC,0,1,a2lqRX2Mr0,kRQand1MKZ,2lqRG1WQr0,NWMW84L5lz,9z5kKNb5A3,OlMlYvy5Lz,...,,,"(0.428, 0.442, 0.13)",0.428,0.442,0.130,A,"(0.364, 0.545]","(0.00111, 0.179]","(0.373, 0.513]"
1,9Yqd3nDL5v,2022-03-13 20:30:00 UTC,2,1,KAqBN0Vqbg,NPqxKXZ59d,wvq9vKlQWn,NWMWoaeMlz,7vQ7m6Y5D1,gjMNG4k5Kp,...,,,"(0.197, 0.478, 0.325)",0.197,0.478,0.325,H,"(0.183, 0.364]","(0.179, 0.357]","(0.373, 0.513]"
2,KXMeBo1r56,2022-03-13 03:00:00 UTC,1,0,WBLMvYAQxe,gpMOLwl5zy,odMX96a5YL,p6qbX06M0G,gpMOYv1qzy,0Oq6mNP56D,...,,,"(0.517, 0.39, 0.093)",0.517,0.390,0.093,H,"(0.364, 0.545]","(0.00111, 0.179]","(0.373, 0.513]"
3,EGMPOxJVQa,2022-03-13 02:00:00 UTC,2,0,pzeQZ6xQKw,Z2vQ1xlqrA,KPqjDeNM6v,9z5ka6gQA3,0Oq6VPgq6D,N6MmpyLQEG,...,,,"(0.163, 0.489, 0.348)",0.163,0.489,0.348,H,"(0.0011, 0.183]","(0.179, 0.357]","(0.373, 0.513]"
4,gjMNGxZ05K,2022-03-13 01:30:00 UTC,2,0,mKAqBBmqbg,vzqoOgNqap,4wM4voBqjB,eVq3alGMWO,KAqBwv7Qbg,gOMnDkAqwN,...,,,"(0.395, 0.525, 0.08)",0.395,0.525,0.080,H,"(0.364, 0.545]","(0.00111, 0.179]","(0.513, 0.654]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,NPqxXGajq9,2021-04-24 22:00:00 UTC,1,1,eVq3ya6MWO,jYQJ19EqGR,gjMNk4v5Kp,7vQ7xbOMD1,0Oq6zkzq6D,odMXxreMYL,...,,,"(0.363, 0.581, 0.056)",0.363,0.581,0.056,D,"(0.183, 0.364]","(0.00111, 0.179]","(0.513, 0.654]"
496,9vQ283Ar5K,2021-04-24 19:30:00 UTC,3,1,0KPqjA456v,mKAqBBmqbg,kRQa9k8QKZ,Vj58W84M8n,2vQ10jwqrA,eV5DNVJQKn,...,,,"(0.26, 0.53, 0.21)",0.260,0.530,0.210,H,"(0.183, 0.364]","(0.179, 0.357]","(0.513, 0.654]"
497,XVqK8XxaQ0,2021-04-24 19:00:00 UTC,2,2,kRQabn8MKZ,lgpMOvnQzy,9z5kJ7gMA3,vzqoJrj5ap,vzqo2dv5ap,0Oq6mxd56D,...,,,"(0.108, 0.462, 0.43)",0.108,0.462,0.430,D,"(0.0011, 0.183]","(0.357, 0.534]","(0.373, 0.513]"
498,4JMAVNAD5K,2021-04-24 18:00:00 UTC,2,2,vzqoOgNqap,APk5LGOMOW,zeQZlGK5Kw,KXMe8pxQ64,gOMnDkAqwN,KPqjO38Q6v,...,,,"(0.471, 0.449, 0.08)",0.471,0.449,0.080,D,"(0.364, 0.545]","(0.00111, 0.179]","(0.373, 0.513]"


In [49]:
for (interval,group) in games2021.groupby('NHCat'):
    print(interval.mid, len(group[group['result']=='H'])/len(group), len(group))

0.09204999999999999 0.4153846153846154 195
0.27349999999999997 0.5 184
0.4545 0.5543478260869565 92
0.6355 0.6086956521739131 23
0.8165 0.5 6


In [50]:
for (interval,group) in games2021.groupby('NDCat'):
    print(interval.mid, len(group[group['result']=='D'])/len(group), len(group))

0.16115000000000002 0.36363636363636365 11
0.3025 0.24615384615384617 65
0.443 0.2717391304347826 276
0.5835 0.23880597014925373 134
0.7245 0.35714285714285715 14


In [51]:
for (interval,group) in games2021.groupby('NACat'):
    print(interval.mid, len(group[group['result']=='A'])/len(group), len(group))

0.090055 0.17791411042944785 163
0.268 0.2651162790697674 215
0.4455 0.32 75
0.623 0.375 40
0.8005 0.2857142857142857 7
