In [1]:
import pandas as pd
import requests as req
from bs4 import BeautifulSoup
import time
import numpy as np
import lxml
import chardet
import logging as log

In [28]:
fbRefFileName = 'matches23-17-4.csv'

streakColumns = ["streak_win"              ,"streak_draw"              ,"streak_loss",\
                 "streak_win_home"         ,"streak_draw_home"         ,"streak_loss_home",\
                 "streak_win_away"         ,"streak_draw_away"         ,"streak_loss_away",\
                 "streak_win_vs"           ,"streak_draw_vs"           ,"streak_loss_vs",\
                 "streak_home_win_vs"      ,"streak_home_draw_vs"      ,"streak_home_loss_vs",\
                 "streak_away_win_against" ,"streak_away_draw_against" ,"streak_away_draw_against"\
                ]
    

In [None]:
def infoConfigLogging(filename):
    log.basicConfig(filename = filename,                               \
                    format='%(asctime)s - %(levelname)s: %(message)s', \
                    datefmt='%d-%b-%y %H:%M:%S',                       \
                    level = log.INFO,                                  \
                    filemode ='w')       
    
def debugConfigLogging(filename):
    log.basicConfig(filename = filename,                         \
                    format='%(asctime)s - %(levelname)s: %(message)s', \
                    datefmt='%d-%b-%y %H:%M:%S',                       \
                    level = log.DEBUG,                                 \
                    filemode ='w')       
    
def errorLoggingConfig(filename):
    log.basicConfig(filename = filename,                         \
                    format='%(asctime)s - %(levelname)s: %(message)s', \
                    datefmt='%d-%b-%y %H:%M:%S',                       \
                    level = log.ERROR,                                 \
                    filemode ='w')    

In [None]:
def loggingConfig(filename = 'scraping.log', loglevel = 'INFO'):
    if loglevel.upper() == 'DEBUG':
        debugConfigLogging(filename)
    elif loglevel.upper() == 'ERROR':
        loggingConfigAsError(filename)
    else :
        infoConfigLogging(filename)

In [3]:
df = pd.read_csv(fbRefFileName, encoding = 'latin-1')
df

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,match report,notes,sh,sot,dist,fk,pk,pkatt,team_name,season
0,3/12/2023,21:15,Liga MX,Clausura 2023 Regular Season,Sun,Home,D,1,1,Necaxa,...,Match Report,,13,3,13.5,0.0,0,0,FC Juarez,2023
1,3/12/2023,21:15,Liga MX,Clausura 2023 Regular Season,Sun,Away,D,1,1,FC Juárez,...,Match Report,,19,4,23.3,1.0,0,0,Necaxa,2023
2,3/12/2023,19:05,Liga MX,Clausura 2023 Regular Season,Sun,Away,W,2,1,Pachuca,...,Match Report,,13,5,21.3,1.0,0,0,Monterrey,2023
3,3/12/2023,19:05,Liga MX,Clausura 2023 Regular Season,Sun,Home,L,1,2,Monterrey,...,Match Report,,17,5,22.8,3.0,0,0,Pachuca,2023
4,3/12/2023,19:05,Liga MX,Clausura 2023 Regular Season,Sun,Home,W,3,2,Tijuana,...,Match Report,,11,7,12.4,0.0,0,0,Santos Laguna,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3785,7/22/2017,17:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,W,1,0,América,...,Match Report,,0,0,,,1,1,Queretaro,2018
3786,7/21/2017,19:00,Liga MX,Apertura 2017 Regular Season,Fri,Away,D,0,0,Morelia,...,Match Report,,0,0,,,0,0,Monterrey,2018
3787,7/21/2017,19:00,Liga MX,Apertura 2017 Regular Season,Fri,Home,D,0,0,Monterrey,...,Match Report,,0,0,,,0,0,Morelia,2018
3788,7/21/2017,19:00,Liga MX,Apertura 2017 Regular Season,Fri,Away,W,2,0,Tijuana,...,Match Report,,0,0,,,0,1,Cruz Azul,2018


In [5]:
# create a function to convert string values to numerical values
def result_to_numeric(result):
    if result.lower() in ["win","w"]:
        return 1
    elif result.lower() in ["loss","l"]:
        return -1
    elif result.lower() in ["draw","d"]:
        return 0
    


In [6]:
df["result_num"] = df["result"].apply(result_to_numeric)


In [None]:
"""
Two alternatives for iterative streak calculation (W,L,D and Unbeaten):

Both work almost the same way but assign streaks for only previous matches in different ways:

def calc_streaks: Makes index referencing so that first row gets a streak value of zero.
It also uses a variable prevResult to keep track of the previous game, so that
the next row only takes into account previous results

def cal_streaks1: Does not use indexes or prevResult variable. It calculates
streaks and at the end, it shifts the entire column one position to give
previous streaks to each match
"""



In [7]:

# create a function to calculate streaks
def calc_streaks(series, streakType):
    current_streak = 0
    streaks=[]
    
    if streakType.lower() == 'unbeaten':
        numMatchResult =[result_to_numeric('w'), result_to_numeric('d')]
    else:
        numMatchResult = [ result_to_numeric(streakType.lower()) ]
    #for idx in range(1, len(series) -1):
    indexes = series.index
    for idx, val in series.items():
    #for val in series:    
        
        if  idx == indexes[0] or prevResult not in numMatchResult:
            current_streak =  0
        else:
            current_streak += 1
            
        streaks.append(current_streak)
        prevResult = val
    #resultSeries = pd.Series(streaks, index = series.index)
    #return pd.Series(streaks, index = series.index)
    return streaks


In [8]:

# create a function to calculate streaks
def calc_streaks1(series, streakType):
    current_streak = 0
    streaks=[]
    
    if streakType.lower() == 'unbeaten':
        numMatchResult = [ result_to_numeric('w'), result_to_numeric('d') ]
    else:
        numMatchResult = [ result_to_numeric(streakType.lower()) ]
    
    for actualResult in series:
        if actualResult not in numMatchResult:
            current_streak =  0
        else:
            current_streak += 1

        streaks.append(current_streak)
    
    #Shifting list 1 position to the right
    streaks = [0] + streaks[:-1]
    return streaks

In [23]:

# create a function to calculate streaks
def get_streak_subset(dfTeam, homeOrAway = '', streak_is_against_rival = False):
    
    """
    Calculate
    -Streaks against any team                 * 
    -Streak while home                        *
    -Streak while away                        *
    -Streak against that opponent               
    -Streak while home against that opponent
    -Streak while away against that opponent
    
    """
    subset = dfTeam.copy(deep = False)
    if homeOrAway in ['home','away']:
        homeOrAway = homeOrAway.lower()
        subset = subset[ subset["venue"].str.lower() == homeOrAway ]
        

    #This sorting method also creates a copy of the dataframe (default inplace parameter False) and resets the indexes
    subset = subset.sort_values(by = ["date","time"], ascending = True, ignore_index=True)
    
    return subset      
    
    #and streak_is_against_rival :
    #.reset_index(drop=True)
'''2. for each team, make a copy of the original dataframe with the records. '''
#dfTeam#
    


'2. for each team, make a copy of the original dataframe with the records. '

In [None]:
def getStreakColumns(df, streakColumns):
    for streakCol in streakColumns:
        
    streakColumns = ["streak_win"          ,"streak_draw"         ,"streak_loss",\
                     "streak_win_home"     ,"streak_draw_home"    ,"streak_loss_home",\
                     "streak_win_away"     ,"streak_draw_away"    ,"streak_loss_away",\
                     "streak_win_vs"       ,"streak_draw_vs"      ,"streak_loss_vs",\
                     "streak_home_win_vs"  ,"streak_home_draw_vs" ,"streak_home_loss_vs",\
                     "streak_away_win_vs"  ,"streak_away_draw_vs" ,"streak_away_draw_vs"\
                    ]
    

In [3]:
def getAllStreaks(df):
    streaks = {}
    teams = df.team_name.unique()
    
    df = df.sort_values(by = ["date", "time"], ascending = True, ignore_index=True)

    for team in teams:
        #streaks[team] = {}
        streaks[team] = {"win":0, "draw": 0, "loss":0, "home_win": 0, "home_draw": 0, "home_loss": 0, "away_win": 0, "away_draw": 0, "away_loss": 0}
        
        for opponent in teams:
            #streaks[team][opponent] = {"win":0, "draw": 0, "loss":0,"home_win": 0, "home_draw": 0, "home_loss": 0, "away_win": 0, "away_draw": 0, "away_loss": 0}
            streaks[team][opponent] = {"win":0, "draw": 0, "loss":0,"home_win": 0, "home_draw": 0, "home_loss": 0, "away_win": 0, "away_draw": 0, "away_loss": 0}
            
            #streaks[team][opponent]["home"] = {"win":0, "draw": 0, "loss":0}
            #streaks[team][opponent]["away"] = {"win":0, "draw": 0, "loss":0}
    
    #log.info("Home and away dictionaries are set up")
    print("Home and away dictionaries are set up")
    
    for index, row in df.iterrows():
        venue = row["venue"]
        home_team = row["team_name"] if venue == 'Home' else row["opponent"]
        away_team = row["opponent"]  if venue == 'Away' else row["team_name"]
        #teamName = row["team_name"]
        #opponent = row["opponent"]
            
        result = row["result_num"]
        
        #Streaks for home team

        #Home team wins
        if (result == 1 and venue = 'Home') or (result == -1 and venue =='Away'):
            '''streaks[home_team]["home_win"] += 1
            streaks[away_team]["away_loss"] += 1
            streaks[home_team][away_team]["win"] += 1
            streaks[away_team][home_team]["loss"] += 1
            streaks[home_team][away_team]["home_win"] += 1
            streaks[away_team][home_team]["away_loss"] += 1
            streaks[home_team][away_team]["home"]["win"] += 1
            streaks[away_team][home_team]["away"]["loss"] += 1
            '''
            streaks[home_team]["win"] += 1
            streaks[home_team]["home_win"] += 1
            streaks[away_team]["loss"] += 1
            streaks[away_team]["away_loss"] += 1

            streaks[home_team][away_team]["win"] += 1
            streaks[home_team][away_team]["home_win"] += 1
            streaks[away_team][home_team]["loss"] += 1
            streaks[away_team][home_team]["away_loss"] += 1
            	
        #Home team losses
        elif (result == -1 and venue = 'Home') or (result == 1 and venue =='Away'):
            '''
            streaks[home_team]["home_loss"] += 1
            streaks[away_team]["away_win"] += 1
            streaks[home_team][away_team]["loss"] += 1
            streaks[away_team][home_team]["win"] += 1
            streaks[home_team][away_team]["home_loss"] += 1
            streaks[away_team][home_team]["away_win"] += 1
            streaks[home_team][away_team]["home"]["loss"] += 1
            streaks[away_team][home_team]["away"]["win"] += 1'''
            
            streaks[home_team]["loss"] += 1
            streaks[home_team]["home_loss"] += 1
            streaks[away_team]["win"] += 1
            streaks[away_team]["away_win"] += 1

            streaks[home_team][away_team]["loss"] += 1
            streaks[home_team][away_team]["home_loss"] += 1
            streaks[away_team][home_team]["win"] += 1
            streaks[away_team][home_team]["away_win"] += 1

        else:
            streaks[home_team]["draw"] += 1
            streaks[away_team]["draw"] += 1
            streaks[home_team]["home_draw"] += 1
            streaks[away_team]["away_draw"] += 1
            streaks[home_team][away_team]["draw"] += 1
            streaks[away_team][home_team]["draw"] += 1
            streaks[home_team][away_team]["home_draw"] += 1
            streaks[away_team][home_team]["away_draw"] += 1
            """streaks[home_team][away_team]["home"]["draw"] += 1
            streaks[away_team][home_team]["away"]["draw"] += 1
            '''
        
        
        
        
        '''
    streaks_df = pd.DataFrame(columns=["Team", "Win Streak", "Loss Streak", "Draw Streak"])
    
    for team in teams:
        win_streak = streaks[team]["win"]
        loss_streak = streaks[team]["loss"]
        draw_streak = streaks[team]["draw"]
        streaks_df = streaks_df.append({"Team": team, "Win Streak": win_streak, "Loss Streak": loss_streak, "Draw Streak": draw_streak}, ignore_index=True)
    '''
        
        
    

SyntaxError: invalid syntax (3488253914.py, line 5)

In [2]:
"""0. Order matches by date asc (Older matches first) """
df["date"] = pd.to_datetime(df["date"])
df["time"] = pd.to_datetime(df["time"])

#This sorting method also creates a copy of the dataframe (default inplace parameter False) and resets the indexes
df = df.sort_values(by = ["date","time"], ascending = True, ignore_index=True)
df

NameError: name 'pd' is not defined

In [None]:
df["streak_win"] = df.apply(lambda row: calc_streaks(df["result_num"], 'win') if df["team_name"].any() =='Guadalajara' else None, axis = 1)

In [26]:
dfTeam = df[ df["team_name"] == "Guadalajara"]
dfTeam1 = get_streak_subset(dfTeam, 'away')

In [27]:
dfTeam1

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,notes,sh,sot,dist,fk,pk,pkatt,team_name,season,result_num
0,2017-07-29,2023-03-28 17:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,D,1,1,Cruz Azul,...,,0,0,,,0,0,Guadalajara,2018,0
1,2017-08-12,2023-03-28 19:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,L,1,4,Monterrey,...,,0,0,,,0,0,Guadalajara,2018,-1
2,2017-08-23,2023-03-28 20:30:00,Liga MX,Apertura 2017 Regular Season,Wed,Away,D,1,1,Santos,...,,0,0,,,1,1,Guadalajara,2018,0
3,2017-09-09,2023-03-28 19:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,W,3,1,Pachuca,...,,0,0,,,0,0,Guadalajara,2018,1
4,2017-09-30,2023-03-28 19:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,L,0,1,UANL,...,,0,0,,,0,0,Guadalajara,2018,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,2023-01-28,2023-03-28 20:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,FC Juárez,...,,6,1,17.8,0.0,1,1,Guadalajara,2023,1
99,2023-02-11,2023-03-28 21:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,D,1,1,Pachuca,...,,17,9,20.9,0.0,0,0,Guadalajara,2023,0
100,2023-02-18,2023-03-28 21:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,UNAM,...,,10,3,20.9,0.0,0,0,Guadalajara,2023,1
101,2023-02-25,2023-03-28 19:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,UANL,...,,7,4,15.9,0.0,0,0,Guadalajara,2023,1


In [None]:
df["streak_win"].unique()

In [None]:
'''
Option 2

1. List of different teams in imported dataframe
2. for each team, make a copy of the original dataframe with the records. 
3. Reindex slice
4. Create streak columns
5. Add resulting dataframe in a list of dataframes
6. Concat all dataframes in a new one
7. return to csv

'''

In [27]:
"""1. List of different teams in imported dataframe"""

teams = df.team_name.unique()

'''3. Reindex slice: Indexes should be 0 -> n-1'''

dfTeam = df[df["team_name"]== "Guadalajara"].reset_index(drop=True)
'''2. for each team, make a copy of the original dataframe with the records. '''
dfTeam#.sort_values(by = pd.to_datetime(df["date"]))
#dfTeamReindex

#dfTeamG
#dfTeamReindex


Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,notes,sh,sot,dist,fk,pk,pkatt,team_name,season,result_num
0,2017-07-22,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,D,0,0,Toluca,...,,0,0,,,0,0,Guadalajara,2018,0
1,2017-07-29,2023-03-20 17:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,D,1,1,Cruz Azul,...,,0,0,,,0,0,Guadalajara,2018,0
2,2017-08-05,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,D,2,2,Necaxa,...,,0,0,,,0,0,Guadalajara,2018,0
3,2017-08-12,2023-03-20 19:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,L,1,4,Monterrey,...,,0,0,,,0,0,Guadalajara,2018,-1
4,2017-08-19,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,L,0,1,Puebla,...,,0,0,,,0,0,Guadalajara,2018,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2023-02-15,2023-03-20 19:05:00,Liga MX,Clausura 2023 Regular Season,Wed,Home,W,2,1,Tijuana,...,,11,0,25.1,0.0,2,2,Guadalajara,2023,1
199,2023-02-18,2023-03-20 21:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,UNAM,...,,10,3,20.9,0.0,0,0,Guadalajara,2023,1
200,2023-02-25,2023-03-20 19:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,UANL,...,,7,4,15.9,0.0,0,0,Guadalajara,2023,1
201,2023-03-04,2023-03-20 21:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Home,W,2,0,Santos,...,,17,7,21.9,1.0,0,0,Guadalajara,2023,1


In [28]:
dfTeam["streak_win"] = calc_streaks1(dfTeam["result_num"], 'win')
dfTeam["streak_draw"] = calc_streaks1(dfTeam["result_num"], 'draw')
dfTeam["streak_loss"] = calc_streaks1(dfTeam["result_num"], 'loss')
dfTeam["streak_unbeaten"] = calc_streaks1(dfTeam["result_num"], 'unbeaten')

dfTeam
#for team in teams:
#    dfTeam = df.copy()[df["team_name"]== team]
    #get dfName here


#4. Create streak columns  -> This may be optional

#Apply calc_streak function 
#5. Add resulting dataframe in a list of dataframes



Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,fk,pk,pkatt,team_name,season,result_num,streak_win,streak_draw,streak_loss,streak_unbeaten
0,2017-07-22,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,D,0,0,Toluca,...,,0,0,Guadalajara,2018,0,0,0,0,0
1,2017-07-29,2023-03-20 17:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,D,1,1,Cruz Azul,...,,0,0,Guadalajara,2018,0,0,1,0,1
2,2017-08-05,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,D,2,2,Necaxa,...,,0,0,Guadalajara,2018,0,0,2,0,2
3,2017-08-12,2023-03-20 19:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,L,1,4,Monterrey,...,,0,0,Guadalajara,2018,-1,0,3,0,3
4,2017-08-19,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,L,0,1,Puebla,...,,0,0,Guadalajara,2018,-1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2023-02-15,2023-03-20 19:05:00,Liga MX,Clausura 2023 Regular Season,Wed,Home,W,2,1,Tijuana,...,0.0,2,2,Guadalajara,2023,1,0,2,0,3
199,2023-02-18,2023-03-20 21:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,UNAM,...,0.0,0,0,Guadalajara,2023,1,1,0,0,4
200,2023-02-25,2023-03-20 19:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,W,2,1,UANL,...,0.0,0,0,Guadalajara,2023,1,2,0,0,5
201,2023-03-04,2023-03-20 21:05:00,Liga MX,Clausura 2023 Regular Season,Sat,Home,W,2,0,Santos,...,1.0,0,0,Guadalajara,2023,1,3,0,0,6


In [29]:
team_streaks = []

for team in teams:
    dfTeam = df[df.team_name == team].reset_index(drop=True)
    
    dfTeam["streak_win"]      = calc_streaks1(dfTeam["result_num"], 'win')
    dfTeam["streak_draw"]     = calc_streaks1(dfTeam["result_num"], 'draw')
    dfTeam["streak_loss"]     = calc_streaks1(dfTeam["result_num"], 'loss')
    dfTeam["streak_unbeaten"] = calc_streaks1(dfTeam["result_num"], 'unbeaten')
    
    team_streaks.append(dfTeam)

    

In [30]:
dfStreaks = pd.concat(team_streaks)

In [31]:
dfStreaks[dfStreaks.team_name == "Necaxa"]

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,fk,pk,pkatt,team_name,season,result_num,streak_win,streak_draw,streak_loss,streak_unbeaten
0,2017-07-23,2023-03-20 18:49:00,Liga MX,Apertura 2017 Regular Season,Sun,Away,W,2,0,Veracruz,...,,0,0,Necaxa,2018,1,0,0,0,0
1,2017-07-29,2023-03-20 21:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,W,1,0,Tijuana,...,,0,0,Necaxa,2018,1,1,0,0,1
2,2017-08-05,2023-03-20 21:06:00,Liga MX,Apertura 2017 Regular Season,Sat,Away,D,2,2,Guadalajara,...,,0,0,Necaxa,2018,0,2,0,0,2
3,2017-08-12,2023-03-20 21:00:00,Liga MX,Apertura 2017 Regular Season,Sat,Home,L,0,3,León,...,,0,0,Necaxa,2018,-1,0,1,0,3
4,2017-08-20,2023-03-20 12:00:00,Liga MX,Apertura 2017 Regular Season,Sun,Away,D,0,0,Toluca,...,,0,0,Necaxa,2018,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,2023-02-15,2023-03-20 21:05:00,Liga MX,Clausura 2023 Regular Season,Wed,Home,W,3,1,UNAM,...,0.0,0,0,Necaxa,2023,1,0,0,1,0
197,2023-02-18,2023-03-20 17:06:00,Liga MX,Clausura 2023 Regular Season,Sat,Away,L,1,2,Monterrey,...,0.0,1,1,Necaxa,2023,-1,1,0,0,1
198,2023-02-24,2023-03-20 19:05:00,Liga MX,Clausura 2023 Regular Season,Fri,Home,D,1,1,Querétaro,...,1.0,0,0,Necaxa,2023,0,0,0,1,0
199,2023-03-03,2023-03-20 19:05:00,Liga MX,Clausura 2023 Regular Season,Fri,Home,L,0,1,UANL,...,1.0,0,0,Necaxa,2023,-1,0,1,0,1


In [34]:
dfStreaks = dfStreaks.sort_values(by = ["date","time"], ascending = False, ignore_index=True)
dfStreaks.to_csv("matchesStreaks.csv", encoding = 'latin-1', index=False)

In [None]:
dfChivas


In [None]:
team_results = df[df.team_name == "Guadalajara"] 

In [None]:
#Option 1: group by?

'''
Option 1
Use lambda function with a conditional statement
Create empty columns from the beginning?

'''


In [None]:
#Adding streak columns to dataframe
df = df.reindex(df.columns.tolist() + ['streak_win','streak_draw','streak_loss','streak_unbeaten'], axis=1) 
df


In [None]:
'''
Use lambda function with a conditional statement
Create empty columns from the beginning?
'''


In [None]:
#Streak calculation

resultsTest = pd.DataFrame({
    "result": ["win", "loss", "draw", "win", "win", "loss", "win", "draw", "win", "loss", "win", "win", "loss", "win", "draw", "loss", "loss", "draw", "win", "win", "win", "draw", "draw", "win", "win", "loss", "win", "win", "loss", "win"]
})



In [None]:
#resultsTest["result_num"]=resultsTest.result.apply(result_to_numeric) 
#resultsTest["win_streak"]=resultsTest.apply(result_to_numeric) 

resultsTest["win_streak1"] = resultsTest["result_num"].rolling(window = len(resultsTest), min_periods=1).apply(lambda x: calc_streaks1(x, "win")[-1], raw = False).astype(int)

#resultsTest["win_streak"] = streak = calc_streaks1(resultsTest["result_num"], "win")
#resultsTest = resultsTest.drop("resultsShifted", axis=1)
resultsTest
#len(streak)

In [None]:
resultsTest
#type(resultsTest["result_num"])

In [None]:
results = team_results.result
team_results

#esults1 = team_results["result"]
#esults1

In [None]:
#df["team_name"].values()
teams = df["team_name"].unique()
type(teams)

In [None]:
for team in teams:
    team_results = df[df.team_name == team] 

In [None]:

win_streak =calc_streaks(results["results"], "win")
draw_streak =calc_streaks(results["result_num"], "draw")
loss_streak =calc_streaks(results["result_num"], "loss")
unbeaten_streak = calc_streaks(results["result_num"], "unbeaten")

newDf = pd.DataFrame({"results":results.result, "streak_win":win_streak, "streak_draw":draw_streak, "streak_loss": loss_streak,"streak_unbeaten": unbeaten_streak})

#newDf.results = results.result
#newDf.streaks = win_streak
newDf

In [None]:
df


In [None]:
resultsTest["win_streak"] = calc_streaks(results["result_num"], "win")
resultsTest

In [None]:
print(dfChivas["result"].index[0])

In [None]:
for idx, val in dfChivas["result"].items():
    print(idx)

In [None]:
for val in dfChivas["result"].items(), enum in enumerate(dfChivas.index):
    print(enum, idx)

In [None]:
dfChivas.loc[:, ("win_streak")]      = calc_streaks(dfChivas["result"], "win")
#df["draw_streak"]     = calc_streaks(df["result"], "draw")
#df["loss_streak"]     = calc_streaks(df["result"], "loss")
#df["unbeaten_streak"] = calc_streaks(df["result"], "unbeaten")

#for idx, val in dfChivas["result"].items():
#    print(idx , val)
#dfChivas.index[0]
#dfChivas[["result","win_streak"]]

df

In [None]:


"""
results["win_streak"] = results["result"].rolling(window=len(results), min_periods=1).apply(lambda x: calc_streaks(x)["win"], raw=False)

# calculate loss streaks
results["loss_streak"] = results["result"].rolling(window=len(results), min_periods=1).apply(lambda x: calc_streaks(x)["loss"], raw=False)

# calculate draw streaks
results["draw_streak"] = results["result"].rolling(window=len(results),min_periods=1).apply(lambda x: calc_streaks(x)["draw"], raw=False)
"""

In [None]:
series = results["result_num"]
print(series[0])
print(series[1])
print(series[len(series)-1])

In [None]:
series.items()

In [None]:


# calculate win streaks
results["win_streak"] = results["result_num"].rolling(window = len(results), closed = "left", min_periods=1).apply(lambda x: calc_streaks(x[0]), raw=False)
#results["win_streak"] = 3
#thefinal= results["result_num"].rolling(window = len(results), closed = "left", min_periods=1).apply(calc_streaks, raw=False)

#results["win_streak"] 
#resultado
#len(results)

In [None]:

# create a function to calculate streaks
def calc_streaks(series):
    current_streak = 0
    streaks = []
    
    for result in series:
        if result == series.name:
            current_streak += 1
        else:
            current_streak = 0
        
        streaks.append(current_streak)
    
    return streaks


results["win_streak"] = results["result"].rolling(window=len(results), min_periods=1).apply(lambda x: calc_streaks(x)["win"], raw=False)

# calculate loss streaks
results["loss_streak"] = results["result"].rolling(window=len(results), min_periods=1).apply(lambda x: calc_streaks(x)["loss"], raw=False)

# calculate draw streaks
results["draw_streak"] = results["result"].rolling(window=len(results),min_periods=1).apply(lambda x: calc_streaks(x)["draw"], raw=False)

print(results)




In [None]:
import pandas as pd
import numpy as np

# create a simple DataFrame with random values
df = pd.DataFrame(np.random.randn(10, 2), columns=['A', 'B'])

# apply a rolling sum to column A with a window size of 3
rolling_sum = df['A'].rolling(window=3).sum()

# print the original DataFrame and the rolling sum
print(df)
print(rolling_sum)

In [None]:
my_list = [1, 2, 3, 4, 5]
print(my_list)
shift_by = 1

# Reverse the list
#my_list = my_list[::-1]
#print(my_list)
# Shift the elements to the right by 1 positions

my_list = [0]*shift_by + my_list[:-shift_by]
print(my_list)
# Reverse the list back to its original order
#my_list = my_list[::-1]

#print(my_list)