In [25]:
import pandas as pd
import numpy as np
import os

import requests
from bs4 import BeautifulSoup

import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

In [76]:
# Downloads the updated csv for the 2023 season
import gdown

def update_df():
    '''
    Download most recent csv file, concat with rest of data, return the full df
    '''
    output = "lol_2023.csv" #What to save the downloaded file as
    id = "1XXk2LO0CsNADBB1LRGOV5rUpyZdEZ8s2" #The id from the google drive file
    gdown.download(id=id, output=output, quiet=False)
    
    df_2021 = pd.read_csv('lol_2021.csv')
    df_2022 = pd.read_csv('lol_2022.csv')
    df_2023 = pd.read_csv('lol_2023.csv')
    df = pd.concat([df_2021,df_2022,df_2023])
    return df

def get_wiki():
    '''
    Returns chart from wikipedia containing info for Tier 1 and Tier 2 leagues
    Returns tier1, tier2
    '''
    wiki = pd.read_html('https://en.wikipedia.org/wiki/List_of_League_of_Legends_leagues_and_tournaments')
    return wiki[1], wiki[3]

def add_opp_name(df): #tup = list of tuples
    # Create an 'opp_name' column for each row
    evens = range(0,df.shape[0],2)
    odds = range(1,df.shape[0],2)
    tup = [(a,b) for a,b in zip(evens,odds)] # list of tuples
    
    for t in tup: #iterate through list of tuples
        a,b= t #unpack each tuple into two values
        df.loc[a,'opp_name']=df.teamname.loc[b] #create new column w/opp_name
        df.loc[b,'opp_name']=df.teamname.loc[a]
    return df

def add_opp_elo(df): #tup = list of tuples
    # Create an 'opp_name' column for each row
    evens = range(0,df.shape[0],2)
    odds = range(1,df.shape[0],2)
    tup = [(a,b) for a,b in zip(evens,odds)] # list of tuples
    
    for t in tup: #iterate through list of tuples
        a,b= t #unpack each tuple into two values
        df.loc[a,'opp_name']=df.elo.loc[b] #create new column w/opp_name
        df.loc[b,'opp_name']=df.elo.loc[a]
    return df

def win_percent(elo_a,elo_b): #Calculate odds to win based off of elo rankings
    return 1/(1+10**((elo_b-elo_a)/400)) #elo_a is who you're calculating for, elo_b is opponent

def win_prob(x): # x is the American odds (-110,110,etc.) Calculates probability of winning
    if x < 0 :
        x = x*-1
        return x / (x + 100)
    else: 
        return 100 / (x + 100)

def gain_elo(elo,opp_elo,k=32): #Gain elo after a win, k=24, expected = 1
    return int(elo+k*(1-win_percent(elo,opp_elo)))

def lose_elo(elo,opp_elo,k=32): #Lose elo after a loss, k=24, expected = 0
    return int(elo+k*(0-win_percent(elo,opp_elo)))

def tie_elo(elo,opp_elo,k=32): #Lose elo after a loss, k=24, expected = .5
    return int(elo+k*(.5-win_percent(elo,opp_elo)))


def wrangle_df(df):
    leagues = ['LCK','LPL','LEC','LCS','PCS','VCS','CBLOL','LJL','LLA','UL','SL','LFL','LCO','CBLOLA'] # These are my 9 tier 1 leagues that I'll keep
    df = df[df.league.isin(leagues)] #Grab leagues of interest
    df = df[df.position=='team'] #Remove individual player stats
    
    df = df.reset_index(drop=True) #Reset index
    
    df.split = df.split.str.replace('Split 1','Spring').str.replace('Split 2','Summer') #Rename 'split' names
    df.split = df.split.str.replace('Opening','Spring').str.replace('Closing','Summer')

    cols = ['teamname','league','split','date', 'side', 'gamelength','game', 'result', 'teamkills', 
            'teamdeaths', 'firstblood', 'position', 'dragons', 'barons', 'opp_barons','towers', 'opp_towers', 
            'inhibitors', 'opp_inhibitors', 'damagetochampions', 'damagetakenperminute', 'wardsplaced', 'wardskilled', 
            'controlwardsbought', 'totalgold', 'gspd'] #Columns to keep

    df = df[cols] #Remove unwanted columns
    df = df.dropna() #Drop nan values
    
    df.date = pd.to_datetime(df.date,infer_datetime_format=True) #Change to datetime object
    del df['position'] # Delete 'position' column
    df = df.sort_values('date') #Sort by date
    df = df.reset_index(drop=True) #Reset index
    df.side = np.where(df.side=='Blue',1,0) #Add 'side' column for 'blue' or 'red'
    df.rename(columns={'side':'blue_side'},inplace = True) #Change 'side' to 'blue_side'
    
    df['old_elo']=np.NaN #create new elo column
    df['new_elo']=np.NaN
    df['opp_elo']=np.NaN
    df.loc[df[~df.teamname.duplicated()].index,'old_elo']=1200 #set elo for first game to 1200 for each team

    df = add_opp_name(df) #adds opponents' name
    
    for i in range(0,df.shape[0]):
        opp_name = df.loc[i,'opp_name']
        df.loc[i,'opp_elo'] = df[(df.teamname==opp_name)&(~df.old_elo.isna())]['old_elo'].iloc[-1]
        if df.loc[i,'result'] == 1:
            df.loc[i,'new_elo'] = gain_elo(df.loc[i,'old_elo'],df.loc[i,'opp_elo'])
        else:
            df.loc[i,'new_elo'] = lose_elo(df.loc[i,'old_elo'],df.loc[i,'opp_elo'])

        team_name = df.loc[i,'teamname']
        try:
            next_game_index = df[(df.teamname==team_name)&df.old_elo.isna()]['old_elo'].index[0]
            df.loc[next_game_index,'old_elo'] = df[(df.teamname==team_name)&(~df.new_elo.isna())]['new_elo'].iloc[-1]
        except:
            pass
    
    df.to_csv('final.csv') #Save to csv file
    
    return df

    

# Update and wrangle professional games for Spring and Summer splits

In [43]:
df = update_df()
df.to_csv('raw.csv')
df = wrangle_df(df)

Downloading...
From: https://drive.google.com/uc?id=1XXk2LO0CsNADBB1LRGOV5rUpyZdEZ8s2
To: /Users/thegootch/codeup-data-science/league/lol_2023.csv
100%|██████████████████████████████████████| 34.2M/34.2M [00:03<00:00, 10.8MB/s]
  df = update_df()


In [44]:
raw[(raw.league=='PRM')&(raw.position=='team')].sort_values('date',ascending=False).head(20)

Unnamed: 0,gameid,datacompleteness,url,league,year,split,playoffs,date,game,patch,participantid,side,position,playername,playerid,teamname,teamid,champion,ban1,ban2,ban3,ban4,ban5,gamelength,result,kills,deaths,assists,teamkills,teamdeaths,doublekills,triplekills,quadrakills,pentakills,firstblood,firstbloodkill,firstbloodassist,firstbloodvictim,team kpm,ckpm,firstdragon,dragons,opp_dragons,elementaldrakes,opp_elementaldrakes,infernals,mountains,clouds,oceans,chemtechs,hextechs,dragons (type unknown),elders,opp_elders,firstherald,heralds,opp_heralds,firstbaron,barons,opp_barons,firsttower,towers,opp_towers,firstmidtower,firsttothreetowers,turretplates,opp_turretplates,inhibitors,opp_inhibitors,damagetochampions,dpm,damageshare,damagetakenperminute,damagemitigatedperminute,wardsplaced,wpm,wardskilled,wcpm,controlwardsbought,visionscore,vspm,totalgold,earnedgold,earned gpm,earnedgoldshare,goldspent,gspd,total cs,minionkills,monsterkills,monsterkillsownjungle,monsterkillsenemyjungle,cspm,goldat10,xpat10,csat10,opp_goldat10,opp_xpat10,opp_csat10,golddiffat10,xpdiffat10,csdiffat10,killsat10,assistsat10,deathsat10,opp_killsat10,opp_assistsat10,opp_deathsat10,goldat15,xpat15,csat15,opp_goldat15,opp_xpat15,opp_csat15,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15
52799,ESPORTSTMNT04_2670516,complete,,PRM,2023,,1,2023-04-01 16:51:53,5.0,13.05,200,Red,team,,,SK Gaming Prime,oe:team:aa8c3b781d96ddb7a24ab3240918988,,LeBlanc,Jayce,Rakan,Fiora,Blitzcrank,1603,0,6,21,12,6,21,0.0,0.0,0.0,0.0,0.0,,,,0.2246,1.0106,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,10.0,0.0,0.0,0.0,6.0,0.0,2.0,45659.0,1709.0081,,3611.6032,2942.9944,96.0,3.5933,35.0,1.31,41.0,192.0,7.1865,42230,24501.0,917.068,,40125.0,-0.247817,,728.0,121.0,,,31.7779,14873.0,18221.0,322.0,16427.0,18923.0,350.0,-1554.0,-702.0,-28.0,1.0,1.0,2.0,2.0,4.0,1.0,23909.0,28171.0,487.0,27201.0,31280.0,558.0,-3292.0,-3109.0,-71.0,3.0,6.0,7.0,7.0,16.0,3.0
52798,ESPORTSTMNT04_2670516,complete,,PRM,2023,,1,2023-04-01 16:51:53,5.0,13.05,100,Blue,team,,,Unicorns of Love Sexy Edition,oe:team:6a060bd74c9041bf0aa89adf9d15d12,,Vi,Kalista,Nautilus,Wukong,Lee Sin,1603,1,21,7,48,21,7,3.0,0.0,0.0,0.0,1.0,,,,0.786,1.0106,1.0,4.0,0.0,4.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,,0.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,1.0,10.0,0.0,1.0,1.0,6.0,0.0,2.0,0.0,77413.0,2897.5546,,3053.5995,2872.8883,80.0,2.9944,42.0,1.5721,27.0,225.0,8.4217,57954,40225.0,1505.6145,,51475.0,0.247817,,780.0,213.0,,,37.1678,16427.0,18923.0,350.0,14873.0,18221.0,322.0,1554.0,702.0,28.0,2.0,4.0,1.0,1.0,1.0,2.0,27201.0,31280.0,558.0,23909.0,28171.0,487.0,3292.0,3109.0,71.0,7.0,16.0,3.0,3.0,6.0,7.0
52739,ESPORTSTMNT04_2669492,complete,,PRM,2023,,1,2023-04-01 15:54:27,4.0,13.05,200,Red,team,,,Unicorns of Love Sexy Edition,oe:team:6a060bd74c9041bf0aa89adf9d15d12,,Vi,Kalista,Wukong,Syndra,Sion,1763,0,6,15,16,6,15,0.0,0.0,0.0,0.0,0.0,,,,0.2042,0.7147,0.0,1.0,4.0,1.0,4.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,10.0,0.0,0.0,1.0,10.0,0.0,2.0,54422.0,1852.1384,,2988.0885,3127.249,81.0,2.7567,31.0,1.055,21.0,192.0,6.5343,48091,28730.0,977.7652,,46725.0,-0.08749,,832.0,159.0,,,33.7266,14610.0,17594.0,311.0,16695.0,18366.0,305.0,-2085.0,-772.0,6.0,0.0,0.0,3.0,3.0,8.0,0.0,22678.0,28961.0,503.0,27138.0,30622.0,499.0,-4460.0,-1661.0,4.0,1.0,3.0,6.0,6.0,15.0,1.0
52738,ESPORTSTMNT04_2669492,complete,,PRM,2023,,1,2023-04-01 15:54:27,4.0,13.05,100,Blue,team,,,SK Gaming Prime,oe:team:aa8c3b781d96ddb7a24ab3240918988,,LeBlanc,Jayce,Xayah,Fiora,Irelia,1763,1,15,6,38,15,6,1.0,0.0,0.0,0.0,1.0,,,,0.5105,0.7147,1.0,4.0,1.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,,0.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,1.0,10.0,2.0,1.0,1.0,10.0,1.0,2.0,0.0,65022.0,2212.8871,,2988.8032,2984.447,94.0,3.1991,37.0,1.2592,36.0,243.0,8.27,59701,40340.0,1372.8871,,51000.0,0.08749,,833.0,180.0,,,34.4753,16695.0,18366.0,305.0,14610.0,17594.0,311.0,2085.0,772.0,-6.0,3.0,8.0,0.0,0.0,0.0,3.0,27138.0,30622.0,499.0,22678.0,28961.0,503.0,4460.0,1661.0,-4.0,6.0,15.0,1.0,1.0,3.0,6.0
52691,ESPORTSTMNT04_2669485,complete,,PRM,2023,,1,2023-04-01 14:41:00,3.0,13.05,200,Red,team,,,SK Gaming Prime,oe:team:aa8c3b781d96ddb7a24ab3240918988,,LeBlanc,Jayce,Rakan,Fiora,Olaf,2416,0,20,23,54,20,23,1.0,0.0,0.0,0.0,1.0,,,,0.4967,1.0679,0.0,3.0,3.0,3.0,3.0,0.0,0.0,1.0,2.0,0.0,0.0,,0.0,0.0,1.0,2.0,0.0,0.0,2.0,1.0,1.0,8.0,9.0,1.0,1.0,4.0,3.0,3.0,1.0,111889.0,2778.7003,,3298.4603,3626.6722,152.0,3.7748,59.0,1.4652,77.0,371.0,9.2136,73179,47158.0,1171.1424,,70785.0,0.015447,,940.0,228.0,,,29.0066,15503.0,18863.0,339.0,15859.0,19290.0,351.0,-356.0,-427.0,-12.0,1.0,2.0,2.0,2.0,6.0,1.0,23834.0,29520.0,527.0,26324.0,32341.0,554.0,-2490.0,-2821.0,-27.0,1.0,2.0,6.0,6.0,14.0,1.0
52690,ESPORTSTMNT04_2669485,complete,,PRM,2023,,1,2023-04-01 14:41:00,3.0,13.05,100,Blue,team,,,Unicorns of Love Sexy Edition,oe:team:6a060bd74c9041bf0aa89adf9d15d12,,Wukong,Vi,Nautilus,Leona,Sion,2416,1,23,20,71,23,20,6.0,2.0,0.0,0.0,0.0,,,,0.5712,1.0679,1.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,2.0,0.0,1.0,,0.0,0.0,0.0,0.0,2.0,1.0,1.0,2.0,0.0,9.0,8.0,0.0,0.0,3.0,4.0,1.0,3.0,105725.0,2625.6209,,3818.4189,4474.1722,139.0,3.452,54.0,1.3411,46.0,306.0,7.5993,76755,50734.0,1259.9503,,69700.0,-0.015447,,1064.0,253.0,,,32.707,15859.0,19290.0,351.0,15503.0,18863.0,339.0,356.0,427.0,12.0,2.0,6.0,1.0,1.0,2.0,2.0,26324.0,32341.0,554.0,23834.0,29520.0,527.0,2490.0,2821.0,27.0,6.0,14.0,1.0,1.0,2.0,6.0
52667,ESPORTSTMNT04_2669481,complete,,PRM,2023,,1,2023-04-01 13:42:15,2.0,13.05,200,Red,team,,,Unicorns of Love Sexy Edition,oe:team:6a060bd74c9041bf0aa89adf9d15d12,,Vi,Wukong,Nautilus,Maokai,Jarvan IV,1786,0,7,23,16,7,23,0.0,0.0,0.0,0.0,1.0,,,,0.2352,1.0078,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,2.0,9.0,0.0,0.0,4.0,7.0,0.0,1.0,43374.0,1457.1333,,3105.084,3168.0739,98.0,3.2923,49.0,1.6461,30.0,230.0,7.7268,48578,28983.0,973.673,,46400.0,-0.205463,,817.0,150.0,,,32.486,15099.0,16821.0,294.0,17261.0,17021.0,314.0,-2162.0,-200.0,-20.0,2.0,4.0,6.0,6.0,12.0,2.0,24431.0,27845.0,487.0,28038.0,29196.0,531.0,-3607.0,-1351.0,-44.0,4.0,8.0,9.0,9.0,16.0,4.0
52666,ESPORTSTMNT04_2669481,complete,,PRM,2023,,1,2023-04-01 13:42:15,2.0,13.05,100,Blue,team,,,SK Gaming Prime,oe:team:aa8c3b781d96ddb7a24ab3240918988,,LeBlanc,Jayce,Xayah,Thresh,Ahri,1786,1,23,7,54,23,7,4.0,0.0,0.0,0.0,0.0,,,,0.7727,1.0078,1.0,4.0,0.0,4.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,1.0,9.0,2.0,1.0,1.0,7.0,4.0,1.0,0.0,75439.0,2534.3449,,2519.2609,3219.3393,120.0,4.0314,41.0,1.3774,43.0,294.0,9.8768,64189,44594.0,1498.1187,,57025.0,0.205463,,840.0,207.0,,,35.1736,17261.0,17021.0,314.0,15099.0,16821.0,294.0,2162.0,200.0,20.0,6.0,12.0,2.0,2.0,4.0,6.0,28038.0,29196.0,531.0,24431.0,27845.0,487.0,3607.0,1351.0,44.0,9.0,16.0,4.0,4.0,8.0,9.0
52607,ESPORTSTMNT04_2670495,complete,,PRM,2023,,1,2023-04-01 12:22:12,1.0,13.05,200,Red,team,,,SK Gaming Prime,oe:team:aa8c3b781d96ddb7a24ab3240918988,,LeBlanc,Graves,K'Sante,Olaf,Zeri,1951,0,3,14,11,3,14,0.0,0.0,0.0,0.0,0.0,,,,0.0923,0.5228,1.0,3.0,2.0,3.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,0.0,3.0,10.0,0.0,0.0,2.0,4.0,0.0,3.0,47147.0,1449.9334,,3128.6725,3913.3778,116.0,3.5674,68.0,2.0912,43.0,257.0,7.9036,51803,30525.0,938.7494,,49870.0,-0.158589,,951.0,196.0,,,35.2742,15100.0,19244.0,357.0,15391.0,19429.0,357.0,-291.0,-185.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23031.0,29955.0,563.0,24662.0,30193.0,548.0,-1631.0,-238.0,15.0,0.0,0.0,2.0,2.0,4.0,0.0
52606,ESPORTSTMNT04_2670495,complete,,PRM,2023,,1,2023-04-01 12:22:12,1.0,13.05,100,Blue,team,,,Unicorns of Love Sexy Edition,oe:team:6a060bd74c9041bf0aa89adf9d15d12,,Wukong,Vi,Nautilus,Syndra,Sion,1951,1,14,3,32,14,3,4.0,0.0,0.0,0.0,1.0,,,,0.4305,0.5228,0.0,2.0,3.0,2.0,3.0,0.0,0.0,1.0,0.0,0.0,1.0,,0.0,0.0,1.0,1.0,1.0,1.0,2.0,0.0,1.0,10.0,3.0,1.0,1.0,4.0,2.0,3.0,0.0,75699.0,2328.0062,,2558.1035,3402.0707,142.0,4.367,70.0,2.1527,29.0,346.0,10.6407,64306,43028.0,1323.2599,,58460.0,0.158589,,960.0,202.0,,,35.7355,15391.0,19429.0,357.0,15100.0,19244.0,357.0,291.0,185.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24662.0,30193.0,548.0,23031.0,29955.0,563.0,1631.0,238.0,-15.0,2.0,4.0,0.0,0.0,0.0,2.0


In [45]:
df[['teamname','league','date','result','opp_name']].tail(20)

Unnamed: 0,teamname,league,date,result,opp_name
19860,Maturalni Forsaken,UL,2023-05-30 15:02:41,0,Orbit Anonymo
19861,Orbit Anonymo,UL,2023-05-30 15:02:41,1,Maturalni Forsaken
19862,Grypciocraft Esports,UL,2023-05-30 16:03:30,0,exeed
19863,exeed,UL,2023-05-30 16:03:30,1,Grypciocraft Esports
19864,Finetwork KOI,SL,2023-05-30 16:04:18,1,Guasones
19865,Guasones,SL,2023-05-30 16:04:18,0,Finetwork KOI
19866,Rebels Gaming,SL,2023-05-30 17:04:32,0,BISONS ECLUB
19867,BISONS ECLUB,SL,2023-05-30 17:04:32,1,Rebels Gaming
19868,Komil & Friends,UL,2023-05-30 17:05:26,0,Illuminar Gaming
19869,Illuminar Gaming,UL,2023-05-30 17:05:26,1,Komil & Friends


# Sports Betting Section

In [46]:
def get_league(team): #Returns a league ("LCS,LPL,etc.") sorted by latest elo
    return current_elo[current_elo.league==team].sort_values('new_elo',ascending=False)

def get_team(df, team):
    return df[df.teamname==team].sort_values(by='date',ascending = False)

def single_game_odds(df, teams, opponents, bet_odds):
    """
    Returns a DataFrame with 'home' and 'away' teams with elos and implied odds.
    """
    team_list = []
    for team, opponent, odds in zip(teams, opponents, bet_odds):
        row1 = df[df.teamname==team][['teamname', 'new_elo']]
        row1['odds'] = odds[0]
        row2 = df[df.teamname==opponent][['teamname', 'new_elo']]
        row2['odds'] = odds[1]
        row3 = pd.concat([row1, row2])
        row3.columns = ['teamname', 'elo', 'odds']
        row3['opponent'] = [row2.teamname.iloc[0], row1.teamname.iloc[0]]
        row3['next_opp_elo'] = [row2.new_elo.iloc[0], row1.new_elo.iloc[0]]
        team_list.append(row3)
    temp = pd.concat(team_list)
    temp = temp[['teamname', 'elo', 'opponent', 'next_opp_elo', 'odds']]
    temp['implied_odds'] = temp.odds.apply(win_prob)
    temp['elo_odds'] = temp.apply(lambda row: win_percent(row['elo'], row['next_opp_elo']), axis=1)
    temp['odds_diff'] = temp.elo_odds - temp.implied_odds
    temp.sort_values('odds_diff', ascending=False, inplace=True)
    return temp.reset_index(drop=True)

import math

def series_3(probability):
    num_wins_required = 2
    num_games_required = (num_wins_required * 2) - 1

    # Calculate the probability of winning a single game
    p_win = probability

    # Calculate the probability of losing a single game
    p_loss = 1 - p_win

    # Calculate the odds of winning a best-of-3 series
    odds = 0

    for wins in range(num_wins_required, num_games_required + 1):
        # Calculate the number of combinations to achieve the current number of wins
        combinations = math.comb(num_games_required, wins)

        # Calculate the probability of achieving the current number of wins
        p_current_wins = p_win ** wins * p_loss ** (num_games_required - wins)

        odds += combinations * p_current_wins

    return odds


def series_5(probability):
    num_wins_required = 3
    num_games_required = (num_wins_required * 2) - 1

    # Calculate the probability of winning a single game
    p_win = probability

    # Calculate the probability of losing a single game
    p_loss = 1 - p_win

    # Calculate the odds of winning a best-of-5 series
    odds = 0

    for wins in range(num_wins_required, num_games_required + 1):
        # Calculate the number of combinations to achieve the current number of wins
        combinations = math.comb(num_games_required, wins)

        # Calculate the probability of achieving the current number of wins
        p_current_wins = p_win ** wins * p_loss ** (num_games_required - wins)

        odds += combinations * p_current_wins

    return odds


def best_of_3_odds(df, teams, opponents, bet_odds):
    """
    Returns a DataFrame with 'home' and 'away' teams with elos and implied odds.
    """
    team_list = []
    for team, opponent, odds in zip(teams, opponents, bet_odds):
        row1 = df[df.teamname==team][['teamname', 'new_elo']]
        row1['odds'] = odds[0]
        row2 = df[df.teamname==opponent][['teamname', 'new_elo']]
        row2['odds'] = odds[1]
        row3 = pd.concat([row1, row2])
        row3.columns = ['teamname', 'elo', 'odds']
        row3['opponent'] = [row2.teamname.iloc[0], row1.teamname.iloc[0]]
        row3['next_opp_elo'] = [row2.new_elo.iloc[0], row1.new_elo.iloc[0]]
        team_list.append(row3)
    temp = pd.concat(team_list)
    temp = temp[['teamname', 'elo', 'opponent', 'next_opp_elo', 'odds']]
    temp['implied_odds'] = temp.odds.apply(win_prob)
    temp['elo_odds'] = temp.apply(lambda row: win_percent(row['elo'], row['next_opp_elo']), axis=1)
    temp['series_odds'] = temp.apply(lambda row: series_3(row['elo_odds']), axis=1)
    temp['odds_diff'] = temp.series_odds - temp.implied_odds
    temp.sort_values('odds_diff', ascending=False, inplace=True)
    return temp.reset_index(drop=True)

In [47]:
#Read in df and create 'current_elo' df
df = pd.read_csv('final.csv',index_col=0) 
raw = pd.read_csv('raw.csv',index_col=0)

#'current_elo' contains every team and their latest elo
current_elo = df[~df.teamname.duplicated(keep='last')]\
[['teamname','league','opp_name','old_elo','opp_elo','new_elo']]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [48]:
get_team(df,"Orbit Anonymo")

Unnamed: 0,teamname,league,split,date,blue_side,gamelength,game,result,teamkills,teamdeaths,firstblood,dragons,barons,opp_barons,towers,opp_towers,inhibitors,opp_inhibitors,damagetochampions,damagetakenperminute,wardsplaced,wardskilled,controlwardsbought,totalgold,gspd,old_elo,new_elo,opp_elo,opp_name
19861,Orbit Anonymo,UL,Summer,2023-05-30 15:02:41,1,1710,1.0,1,27,10,0.0,2.0,1.0,1.0,10.0,3.0,2.0,0.0,84245.0,2984.3509,69.0,27.0,19.0,59652,0.18464,1172.0,1189.0,1195.0,Maturalni Forsaken
19845,Orbit Anonymo,UL,Summer,2023-05-29 18:19:06,0,1912,1.0,0,6,18,1.0,2.0,0.0,1.0,3.0,11.0,0.0,1.0,49138.0,2932.3431,80.0,31.0,19.0,48094,-0.124783,1189.0,1172.0,1168.0,Alior Bank Team
18189,Orbit Anonymo,UL,Spring,2023-03-08 18:07:55,0,1952,1.0,0,6,26,0.0,1.0,0.0,2.0,0.0,9.0,0.0,1.0,64265.0,3766.6291,86.0,40.0,29.0,49327,-0.195453,1200.0,1189.0,1323.0,Grypciocraft Esports


In [49]:
get_league("LFL")

Unnamed: 0,teamname,league,opp_name,old_elo,opp_elo,new_elo
18666,LDLC OL,LFL,Team BDS Academy,1338.0,1259.0,1350.0
18658,Aegis,LFL,Vitality.Bee,1248.0,1243.0,1263.0
18667,Team BDS Academy,LFL,LDLC OL,1259.0,1338.0,1246.0
18663,Team GO,LFL,Karmine Corp,1217.0,1161.0,1230.0
18659,Vitality.Bee,LFL,Aegis,1243.0,1248.0,1227.0
18654,GameWard,LFL,IZI Dream,1231.0,982.0,1205.0
18668,Solary,LFL,BK ROG Esports,1186.0,1208.0,1203.0
14417,Misfits Premier,LFL,Vitality.Bee,1203.0,1303.0,1191.0
18669,BK ROG Esports,LFL,Solary,1208.0,1186.0,1190.0
18662,Karmine Corp,LFL,Team GO,1161.0,1217.0,1147.0


In [50]:
#Input home and away teams with their odds and get back a df with the difference between betting odds and elo odds
home = ['Team GO','Vitality.Bee','Karmine Corp','Aegis','IZI Dream']
away = ['LDLC OL','BK ROG Esports','Team BDS Academy','Solary','GameWard']
odds = [(110,-150),(-275,200),(-188,133),(-125,-110),(150,-200)]
single = single_game_odds(current_elo,home,away,odds)
# series_odds = [(-250,175),(-163,120),(333,-500),(500,-900),(175,-250)]
# series = best_of_3_odds(current_elo,home,away,series_odds)

In [51]:
single

Unnamed: 0,teamname,elo,opponent,next_opp_elo,odds,implied_odds,elo_odds,odds_diff
0,Team BDS Academy,1246.0,Karmine Corp,1147.0,133,0.429185,0.638738,0.209553
1,BK ROG Esports,1190.0,Vitality.Bee,1227.0,200,0.333333,0.446953,0.11362
2,GameWard,1205.0,IZI Dream,1007.0,-200,0.666667,0.757639,0.090973
3,LDLC OL,1350.0,Team GO,1230.0,-150,0.6,0.666139,0.066139
4,Aegis,1263.0,Solary,1203.0,-125,0.555556,0.585499,0.029943
5,Solary,1203.0,Aegis,1263.0,-110,0.52381,0.414501,-0.109308
6,Team GO,1230.0,LDLC OL,1350.0,110,0.47619,0.333861,-0.14233
7,IZI Dream,1007.0,GameWard,1205.0,150,0.4,0.242361,-0.157639
8,Vitality.Bee,1227.0,BK ROG Esports,1190.0,-275,0.733333,0.553047,-0.180286
9,Karmine Corp,1147.0,Team BDS Academy,1246.0,-188,0.652778,0.361262,-0.291516


In [53]:
get_league('LCS')

Unnamed: 0,teamname,league,opp_name,old_elo,opp_elo,new_elo
19639,Cloud9,LCS,Golden Guardians,1364.0,1194.0,1372.0
19638,Golden Guardians,LCS,Cloud9,1194.0,1364.0,1185.0
19580,FlyQuest,LCS,Golden Guardians,1202.0,1171.0,1184.0
18725,Team Liquid,LCS,Evil Geniuses,1146.0,1183.0,1163.0
19125,100 Thieves,LCS,Golden Guardians,1168.0,1108.0,1149.0
19318,Evil Geniuses,LCS,Golden Guardians,1167.0,1144.0,1149.0
19192,Counter Logic Gaming,LCS,Evil Geniuses,1118.0,1195.0,1105.0
18723,TSM,LCS,FlyQuest,1093.0,1211.0,1082.0
18729,Immortals,LCS,100 Thieves,953.0,1213.0,947.0
18726,Dignitas,LCS,Golden Guardians,913.0,1093.0,936.0


In [55]:
df[df.teamname=='Rare Atom'][['teamname','opp_name','date','result']].tail(5)


Unnamed: 0,teamname,opp_name,date,result
18963,Rare Atom,Royal Never Give Up,2023-03-23 10:10:08,1
18968,Rare Atom,Royal Never Give Up,2023-03-23 10:56:42,0
19156,Rare Atom,Weibo Gaming,2023-03-26 11:46:40,1
19162,Rare Atom,Weibo Gaming,2023-03-26 12:38:48,0
19166,Rare Atom,Weibo Gaming,2023-03-26 13:32:04,0


# Open html file and parse with BeautifulSoup


In [56]:
import pandas as pd
from bs4 import BeautifulSoup

LEC - https://www.co.bet365.com/#/AC/B151/C20890316/D48/E1510001/F10/  
CBLOLA - https://www.co.bet365.com/#/AC/B151/C20890093/D48/E1510001/F10/  
CBLOL - https://www.co.bet365.com/#/AC/B151/C20889769/D48/E1510001/F10/  
LCK - https://www.co.bet365.com/#/AC/B151/C20889854/D48/E1510001/F10/  
LCO - https://www.co.bet365.com/#/AC/B151/C20890087/D48/E1510001/F10/  
LFL - https://www.co.bet365.com/#/AC/B151/C20890217/D48/E1510001/F10/  
LPL - https://www.co.bet365.com/#/AC/B151/C20889805/D48/E1510001/F10/  
SL - https://www.co.bet365.com/#/AC/B151/C20890120/D48/E1510001/F10/   
UL - https://www.co.bet365.com/?_h=stk6mbH5dnUh1pzKwtn2RQ%3D%3D#/AC/B151/C20889870/D48/E1510001/F10/


In [57]:
df.league.unique()

array(['LPL', 'LCK', 'LCS', 'CBLOL', 'SL', 'UL', 'LFL', 'CBLOLA', 'VCS',
       'LEC', 'LJL', 'LLA', 'PCS', 'LCO'], dtype=object)

In [79]:
#Parses html files for each league to pull updated odds from bet365
html_files =['lec.html','cblola.html','cblol.html','lck.html','lco.html','lfl.html','lpl.html','sl.html','ul.html']
league_name = ['lec','cblola','cblol','lck','lco','lfl','lpl','sl','ul']

temp = []
for html, name in zip(html_files,league_name):
    file_path = html  # Specify the path to the HTML file in the local directory

    # Read the HTML content from the file
    with open(file_path, 'r', encoding='utf-8') as f:
        html = f.read()

    soup = BeautifulSoup(html,'html.parser')
    
    # Get list of teams
    team_html = soup.select('div.ses-ParticipantFixtureDetailsHigherEsports_Team')
    team_list = [i.text for i in team_html]

    # Get list of odds for each team
    span_element = soup.find_all('span', class_='src-ParticipantOddsOnly50_Odds')
    odds_list = [int(i.text) for i in span_element]
    
    df = pd.DataFrame({'teamname':team_list,'odds':odds_list})
    df = add_opp_name(df)
    df['league'] = name
    temp.append(df)
    
pd.concat(temp)

Unnamed: 0,teamname,odds,opp_name,league
0,MAD Lions,-120,Team Vitality,lec
1,Team Vitality,-120,MAD Lions,lec
2,Team Heretics,100,SK Gaming,lec
3,SK Gaming,-138,Team Heretics,lec
4,KOI,-250,Excel,lec
...,...,...,...,...
5,Illuminar Gaming,-163,Forsaken,ul
6,Orbit Anonymo,100,Zero Tenacity,ul
7,Zero Tenacity,-138,Orbit Anonymo,ul
8,Komil&Friends,250,Alior Bank Team,ul


In [61]:
df = pd.DataFrame({'team':team_list,'odds':odds_list,'league':'lec'})
df

Unnamed: 0,team,odds,league
0,paiN Gaming,110,lec
1,LOUD,-150,lec
2,Liberty,125,lec
3,INTZ,-175,lec
4,KaBuM! e-Sports,162,lec
5,Los Grandes,-225,lec
6,Fluxo,-138,lec
7,Vivo Keyd Stars,100,lec
8,RED Canids,-250,lec
9,FURIA,175,lec


Unnamed: 0,team,odds
0,paiN Gaming,110
1,LOUD,-150
2,Liberty,125
3,INTZ,-175
4,KaBuM! e-Sports,162
5,Los Grandes,-225
6,Fluxo,-138
7,Vivo Keyd Stars,100
8,RED Canids,-250
9,FURIA,175


# Don't go past here yet

In [None]:
numerical = train.select_dtypes(['int','float']).columns

In [None]:
def create_target(groupby):
    groupby['target']=groupby['result'].shift(-1)
    return groupby

In [None]:
def add_target(df):
    df = df.groupby('teamname').apply(create_target)
    df.loc[pd.isnull(df.target),'target'] =2
    df.target = df.target.astype(int,errors='ignore')
    return df

In [None]:
df = add_target(df)

In [None]:
from sklearn.preprocessing import MinMaxScaler #scale all numerical columns

removed_columns = ['teamname','league','date','target','opp_name']
selected_columns = df.columns[~df.columns.isin(removed_columns)]

scaler = MinMaxScaler()
df[selected_columns] = scaler.fit_transform(df[selected_columns])


In [None]:
# Create rolling averages for columns, concat as new columns to df


def rolling(team):
    rolling = team.rolling(10).mean()
    return rolling

def add_rolling(df):
    cols = ['gamelength','teamkills','teamdeaths','firstblood','dragons','barons','opp_barons','towers','opp_towers',\
       'inhibitors','opp_inhibitors','damagetochampions','damagetakenperminute','wardsplaced','wardskilled',\
       'controlwardsbought','totalgold','gspd']

    df_rolling=df[list(cols)+['teamname']]
    
    
    df_rolling = df_rolling.groupby('teamname',group_keys=False)[cols].apply(rolling)

    rolling_cols = [f'{col}_rolling' for col in df_rolling.columns]
    df_rolling.columns = rolling_cols
    df = pd.concat([df,df_rolling],axis=1)
    return df.dropna()

In [None]:
def next_opp(team):
    team['next_opp'] = team['opp_name'].shift(-1)
    return team
def add_opp(df):
    df = df.groupby('teamname').apply(next_opp)
    df.loc[df.next_opp.isnull(),'next_opp'] = 2
    return df

In [None]:
add_opp(df)

In [None]:
def next_side(team):
    team['next_blue'] = team['blue_side'].shift(-1)
    return team

def add_next_side(df):
    df = df.groupby('teamname').apply(next_side)
    df.loc[df.next_blue.isnull(),'next_blue']=2
    df.next_blue = df.next_blue.astype(int,errors='ignore')
    return df

In [None]:
def next_date(team):
    team['next_date'] = team['date'].shift(-1)
    return team

def add_next_date(df):
    df = df.groupby('teamname').apply(next_date)
    df.loc[df.next_date.isnull(),'next_date']=2
    return df

In [None]:
full = df.merge(df[rolling_cols + ["next_opp", "next_date", "teamname"]], left_on=["teamname", "next_date"], \
                right_on=["next_opp", "next_date"])

In [None]:
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier

def create_objects():
    rr = RidgeClassifier(solver ='sag',normalize=False,)
    split = TimeSeriesSplit(n_splits=3)
    sfs = SequentialFeatureSelector(rr, n_features_to_select=14,direction='backward',cv=split,n_jobs=-1)

create_objects()

In [None]:
removed_columns = list(full.columns[full.dtypes=='object']) + removed_columns
selected_columns = full.columns[~full.columns.isin(removed_columns)]

In [None]:
sfs.fit(full[selected_columns],full['target'])

In [None]:
selectors = selected_columns[sfs.get_support()]

In [None]:
predictions = backtest(full,rr,selectors,'target')

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predictions.actual,predictions.prediction)

.5665 'forward', rr

In [None]:
def near_split(x, num_bins): #Split my df into equal splits to perform backtesting
    quotient, remainder = divmod(x, num_bins)
    bins = [quotient + 1] * remainder + [quotient] * (num_bins - remainder)
    count = 0
    new_list = []
    for b in bins:
        count += b
        new_list.append(count)
    return new_list

splits = near_split(df.shape[0],5)
last_split = splits[4]-splits[3] #Difference between last two values for final 'test' set

In [None]:
def backtest(data,model,predictors,target):
    all_predictions= []
    
    for i in range(0,len(splits)-1):
        train = data.loc[:splits[i]]
        test = data.loc[splits[i]:splits[i]+last_split]
        
        model.fit(train[predictors],train[target])
        preds = model.predict(test[predictors])
        preds = pd.Series(preds,index=test.index)
        combined = pd.concat([test[target],preds],axis=1)
        combined.columns = ['actual','prediction']
        
        all_predictions.append(combined)
        
    return pd.concat(all_predictions)
        
        
        
        


In [None]:
predictions = backtest(full,rr,selectors,'target')

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predictions.actual,predictions.prediction)

### optimize ridge regression

In [None]:
from sklearn import decomposition
from sklearn import linear_model
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score

In [None]:
X = full[selectors]
y = full['target']

In [None]:
pca = decomposition.PCA()
ridge = linear_model.Ridge()

In [None]:
pipe = Pipeline(steps=[("pca", pca),
                        ("ridge", ridge)])

In [None]:
n_components = list(range(1,X.shape[1]+1,1))
normalize = [True, False]
solver = ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
parameters = dict(pca__n_components=n_components,
                      ridge__normalize=normalize,
                      ridge__solver=solver)

In [None]:
clf_GS = GridSearchCV(pipe, parameters)
clf_GS.fit(X, y)

In [None]:
clf_GS.best_params_

In [None]:
print("Best Number Of Components:", clf_GS.best_estimator_.get_params()["pca__n_components"])
print(); print(clf_GS.best_estimator_.get_params()["ridge"])

