In [20]:
import pandas as pd
import numpy as np
import os

import requests
from bs4 import BeautifulSoup

import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

In [21]:
# Downloads the updated csv for the 2023 season
import gdown

def update_df():
    '''
    Download most recent csv file, concat with rest of data, return the full df
    '''
    output = "lol_2023.csv" #What to save the downloaded file as
    id = "1XXk2LO0CsNADBB1LRGOV5rUpyZdEZ8s2" #The id from the google drive file
    gdown.download(id=id, output=output, quiet=False)
    
    df_2021 = pd.read_csv('lol_2021.csv')
    df_2022 = pd.read_csv('lol_2022.csv')
    df_2023 = pd.read_csv('lol_2023.csv')
    df = pd.concat([df_2021,df_2022,df_2023])
    return df

def get_wiki():
    '''
    Returns chart from wikipedia containing info for Tier 1 and Tier 2 leagues
    Returns tier1, tier2
    '''
    wiki = pd.read_html('https://en.wikipedia.org/wiki/List_of_League_of_Legends_leagues_and_tournaments')
    return wiki[1], wiki[3]

def add_opp_name(df): #tup = list of tuples
    # Create an 'opp_name' column for each row
    evens = range(0,df.shape[0],2)
    odds = range(1,df.shape[0],2)
    tup = [(a,b) for a,b in zip(evens,odds)] # list of tuples
    
    for t in tup: #iterate through list of tuples
        a,b= t #unpack each tuple into two values
        df.loc[a,'opp_name']=df.teamname.loc[b] #create new column w/opp_name
        df.loc[b,'opp_name']=df.teamname.loc[a]
    return df

def add_opp_elo(df): #tup = list of tuples
    # Create an 'opp_name' column for each row
    evens = range(0,df.shape[0],2)
    odds = range(1,df.shape[0],2)
    tup = [(a,b) for a,b in zip(evens,odds)] # list of tuples
    
    for t in tup: #iterate through list of tuples
        a,b= t #unpack each tuple into two values
        df.loc[a,'opp_name']=df.elo.loc[b] #create new column w/opp_name
        df.loc[b,'opp_name']=df.elo.loc[a]
    return df

def win_percent(elo_a,elo_b): #Calculate odds to win based off of elo rankings
    return 1/(1+10**((elo_b-elo_a)/400)) #elo_a is who you're calculating for, elo_b is opponent

def win_prob(x): # x is the American odds (-110,110,etc.) Calculates probability of winning
    if x < 0 :
        x = x*-1
        return x / (x + 100)
    else: 
        return 100 / (x + 100)

def gain_elo(elo,opp_elo,k=32): #Gain elo after a win, k=24, expected = 1
    return int(elo+k*(1-win_percent(elo,opp_elo)))

def lose_elo(elo,opp_elo,k=32): #Lose elo after a loss, k=24, expected = 0
    return int(elo+k*(0-win_percent(elo,opp_elo)))

def tie_elo(elo,opp_elo,k=32): #Lose elo after a loss, k=24, expected = .5
    return int(elo+k*(.5-win_percent(elo,opp_elo)))


def wrangle_df(df):
    leagues = ['LCK','LPL','LEC','LCS','PCS','VCS','CBLOL','LJL','LLA','UL','SL','LFL','LCO','CBLOLA'] # These are my 9 tier 1 leagues that I'll keep
    df = df[df.league.isin(leagues)] #Grab leagues of interest
    df = df[df.position=='team'] #Remove individual player stats
    
    mapping = {'Excel Esports':'Excel','EDward Gaming':'Edward Gaming','KaBuM! Esports':'KaBuM! e-Sports',
     'BISONS ECLUB':'BISONS Eclub','exeed':'Exeed','Grypciocraft Esports':'Grypciocraft',
     'Komil&amp;Friends':'Komil&Friends','IZI Dream':'Izi Dream','Team BDS Academy':'Team BDS.A',
     'FURIA Academy':'FURIA.A','Fluxo Academy':'Fluxo.A','INTZ Academy':'INTZ.A','KaBuM! Academy':'KaBuM! e-Sports.A',
     'LOUD Academy':'LOUD.A','Liberty Academy':'Liberty.A','Los Grandes Academy':'Los Grandes.A',
     'RED Academy':'RED Canids.A','Vivo Keyd Stars Academy':'Vivo Keyd Stars.A','paiN Gaming Academy':'paiN Gaming.A',
     'MAMMOTH':'Mammoth'}

    df.teamname.replace(mapping,inplace=True)
    
    df.split = df.split.str.replace('Split 1','Spring').str.replace('Split 2','Summer') #Rename 'split' names
    df.split = df.split.str.replace('Opening','Spring').str.replace('Closing','Summer')

    cols = ['teamname','league','split','date', 'side', 'gamelength','game', 'result', 'teamkills', 
            'teamdeaths', 'firstblood', 'position', 'dragons', 'barons', 'opp_barons','towers', 'opp_towers', 
            'inhibitors', 'opp_inhibitors', 'damagetochampions', 'damagetakenperminute', 'wardsplaced', 'wardskilled', 
            'controlwardsbought', 'totalgold', 'gspd'] #Columns to keep

    df = df[cols] #Remove unwanted columns
    df = df.dropna() #Drop nan values
    
    df.date = pd.to_datetime(df.date,infer_datetime_format=True) #Change to datetime object
    del df['position'] # Delete 'position' column
    df = df.sort_values('date') #Sort by date
    df = df.reset_index(drop=True) #Reset index
    df.side = np.where(df.side=='Blue',1,0) #Add 'side' column for 'blue' or 'red'
    df.rename(columns={'side':'blue_side'},inplace = True) #Change 'side' to 'blue_side'
    
    df['old_elo']=np.NaN #create new elo column
    df['new_elo']=np.NaN
    df['opp_elo']=np.NaN
    df.loc[df[~df.teamname.duplicated()].index,'old_elo']=1200 #set elo for first game to 1200 for each team

    df = add_opp_name(df) #adds opponents' name
    
    for i in range(0,df.shape[0]):
        opp_name = df.loc[i,'opp_name']
        df.loc[i,'opp_elo'] = df[(df.teamname==opp_name)&(~df.old_elo.isna())]['old_elo'].iloc[-1]
        if df.loc[i,'result'] == 1:
            df.loc[i,'new_elo'] = gain_elo(df.loc[i,'old_elo'],df.loc[i,'opp_elo'])
        else:
            df.loc[i,'new_elo'] = lose_elo(df.loc[i,'old_elo'],df.loc[i,'opp_elo'])

        team_name = df.loc[i,'teamname']
        try:
            next_game_index = df[(df.teamname==team_name)&df.old_elo.isna()]['old_elo'].index[0]
            df.loc[next_game_index,'old_elo'] = df[(df.teamname==team_name)&(~df.new_elo.isna())]['new_elo'].iloc[-1]
        except:
            pass
    
    df.to_csv('final.csv') #Save to csv file
    
    return df

    

# Update and wrangle professional games for Spring and Summer splits

In [None]:
df = update_df()
df.to_csv('raw.csv')
df = wrangle_df(df)

Downloading...
From: https://drive.google.com/uc?id=1XXk2LO0CsNADBB1LRGOV5rUpyZdEZ8s2
To: /Users/thegootch/codeup-data-science/league/lol_2023.csv
100%|██████████████████████████████████████| 42.8M/42.8M [00:03<00:00, 10.8MB/s]
  df = update_df()


In [None]:
df[['teamname','league','date','result','opp_name']].tail()

# Test Section


In [6]:
import requests

In [7]:
response = requests.get('https://www.oddsportal.com/esports/league-of-legends/league-of-legends-lcs/results/')

In [10]:
file_path = 'historical.html'
with open(file_path, 'r', encoding='utf-8') as f:
    html = f.read()

soup = BeautifulSoup(html,'html.parser')

In [18]:
test = soup.select('div.border-black-borders.flex.flex-col.border-b.d')
#SAvrNaoM > div.border-black-borders.flex.flex-col.border-b > div > a > div.flex.items-center.gap-1.my-1.align-center.w-\[100\%\] > div > div > a.flex.items-start.justify-start.min-w-0.gap-1.cursor-pointer.justify-content.next-m\:\!items-center.next-m\:\!justify-center.min-sm\:min-w-\[180px\].next-m\:\!gap-2 > div.relative.block.truncate.whitespace-nowrap.group-hover\:underline.next-m\:\!ml-auto.text-gray-dark

In [19]:
test

[]

In [11]:
soup

<body class="main-inner-section-bgcolor bg-no-repeat !bg-fixed">
<div class="flex justify-center max-w-[1350px] m-auto main-inner-section-bgcolor" data-v-app="" id="app"><div class="relative flex flex-col w-full max-w-[1350px] font-main"><header class="flex flex-col text-sm bg-gray-light text-white-main"><div class="hidden items-center h-0 justify-center bg-[#333b3b] banner-slot border-gray-dark border-b"><div class="adsenvelope adstextpad banx-left_menu_1_soccer" id="lsadvert-zid-3978" style="width: 970px; display: none;"><div><div class="adscontent" id="lsadvert-left_menu_1_soccer"><iframe frameborder="0" id="lsadvert-zid-3978-iframe" name="banx-left_menu_1_soccer" scrolling="no" style="height: 0px; visibility: hidden; width: 970px;" title="zid-3978"></iframe></div><p class="w-4 h-auto ml-auto mt-[-25px] mr-[-18px] text-[10px] leading-4 -rotate-90 text-gray-medium height-content opacity-50">advertisement</p><div class="ads__text ads__text--h"></div></div></div></div><div class="items

# Sports Betting Section

In [5]:
def find_differing_values(series1,series2): #Takes 2 pd Series with string values and returns values that aren't in both
    # Find values in series1 but not in series2
    values_in_series1 = series1[~series1.isin(series2)]

    # Find values in series2 but not in series1
    values_in_series2 = series2[~series2.isin(series1)]

    # Print the results
    print("Values in series1 but not in series2:")
    print(values_in_series1)

    print("Values in series2 but not in series1:")
    print(values_in_series2)

def get_league(df, league_name): #Returns a league ("LCS,LPL,etc.") sorted by latest elo
    '''
    pass in 2 parameters:
    df, league_name
    '''
    return df[df.league==league_name].sort_values('new_elo',ascending=False)

def get_team(df, team,how_many):
    '''
    pass in 3 parameters:
    df, teamname, how many results you want
    '''
    return df[df.teamname==team][['teamname','opp_name','date','result','old_elo','opp_elo','new_elo']].\
sort_values(by='date',ascending = False).head(how_many)

def single_game_odds(df, teams, opponents, bet_odds):
    """
    Returns a DataFrame with 'home' and 'away' teams with elos and implied odds.
    """
    team_list = []
    for team, opponent, odds in zip(teams, opponents, bet_odds):
        row1 = df[df.teamname==team][['teamname', 'new_elo']]
        row1['odds'] = odds[0]
        row2 = df[df.teamname==opponent][['teamname', 'new_elo']]
        row2['odds'] = odds[1]
        row3 = pd.concat([row1, row2])
        row3.columns = ['teamname', 'elo', 'odds']
        row3['opponent'] = [row2.teamname.iloc[0], row1.teamname.iloc[0]]
        row3['next_opp_elo'] = [row2.new_elo.iloc[0], row1.new_elo.iloc[0]]
        team_list.append(row3)
    temp = pd.concat(team_list)
    temp = temp[['teamname', 'elo', 'opponent', 'next_opp_elo', 'odds']]
    temp['implied_odds'] = temp.odds.apply(win_prob)
    temp['elo_odds'] = temp.apply(lambda row: win_percent(row['elo'], row['next_opp_elo']), axis=1)
    temp['odds_diff'] = temp.elo_odds - temp.implied_odds
    temp.sort_values('odds_diff', ascending=False, inplace=True)
    return temp.reset_index(drop=True)

import math

def series_3(probability):
    num_wins_required = 2
    num_games_required = (num_wins_required * 2) - 1

    # Calculate the probability of winning a single game
    p_win = probability

    # Calculate the probability of losing a single game
    p_loss = 1 - p_win

    # Calculate the odds of winning a best-of-3 series
    odds = 0

    for wins in range(num_wins_required, num_games_required + 1):
        # Calculate the number of combinations to achieve the current number of wins
        combinations = math.comb(num_games_required, wins)

        # Calculate the probability of achieving the current number of wins
        p_current_wins = p_win ** wins * p_loss ** (num_games_required - wins)

        odds += combinations * p_current_wins

    return odds


def series_5(probability):
    num_wins_required = 3
    num_games_required = (num_wins_required * 2) - 1

    # Calculate the probability of winning a single game
    p_win = probability

    # Calculate the probability of losing a single game
    p_loss = 1 - p_win

    # Calculate the odds of winning a best-of-5 series
    odds = 0

    for wins in range(num_wins_required, num_games_required + 1):
        # Calculate the number of combinations to achieve the current number of wins
        combinations = math.comb(num_games_required, wins)

        # Calculate the probability of achieving the current number of wins
        p_current_wins = p_win ** wins * p_loss ** (num_games_required - wins)

        odds += combinations * p_current_wins

    return odds


def best_of_3_odds(df, teams, opponents, bet_odds):
    """
    Returns a DataFrame with 'home' and 'away' teams with elos and implied odds.
    """
    team_list = []
    for team, opponent, odds in zip(teams, opponents, bet_odds):
        row1 = df[df.teamname==team][['teamname', 'new_elo']]
        row1['odds'] = odds[0]
        row2 = df[df.teamname==opponent][['teamname', 'new_elo']]
        row2['odds'] = odds[1]
        row3 = pd.concat([row1, row2])
        row3.columns = ['teamname', 'elo', 'odds']
        row3['opponent'] = [row2.teamname.iloc[0], row1.teamname.iloc[0]]
        row3['next_opp_elo'] = [row2.new_elo.iloc[0], row1.new_elo.iloc[0]]
        team_list.append(row3)
    temp = pd.concat(team_list)
    temp = temp[['teamname', 'elo', 'opponent', 'next_opp_elo', 'odds']]
    temp['implied_odds'] = temp.odds.apply(win_prob)
    temp['elo_odds'] = temp.apply(lambda row: win_percent(row['elo'], row['next_opp_elo']), axis=1)
    temp['series_odds'] = temp.apply(lambda row: series_3(row['elo_odds']), axis=1)
    temp['odds_diff'] = temp.series_odds - temp.implied_odds
    temp.sort_values('odds_diff', ascending=False, inplace=True)
    return temp.reset_index(drop=True)

In [6]:
def calc_odds_diff(df):
    df['implied_odds'] = df.odds.apply(win_prob)
    df['elo_odds'] = df.apply(lambda row: win_percent(row['new_elo'], row['opp_elo']), axis=1)
    df['series_odds_3'] = df.apply(lambda row: series_3(row['elo_odds']), axis=1)
    df['series_odds_5'] = df.apply(lambda row: series_5(row['elo_odds']), axis=1)
    df['odds_diff'] = df.elo_odds - df.implied_odds
    df['odds_diff_3'] = df.series_odds_3 - df.implied_odds
    df['odds_diff_5'] = df.series_odds_5 - df.implied_odds
    return df

In [7]:
#Read in df and create 'current_elo' df
df = pd.read_csv('final.csv',index_col=0) 
raw = pd.read_csv('raw.csv',index_col=0)

#'current_elo' contains every team and their latest elo
current_elo = df[~df.teamname.duplicated(keep='last')]\
[['teamname','league','opp_name','old_elo','opp_elo','new_elo']]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [8]:
get_league(current_elo,"LFL")

Unnamed: 0,teamname,league,opp_name,old_elo,opp_elo,new_elo
20778,Aegis,LFL,Izi Dream,1319.0,988.0,1291.0
20783,BK ROG Esports,LFL,Solary,1270.0,1219.0,1283.0
20794,LDLC OL,LFL,Team BDS.A,1267.0,1212.0,1280.0
20790,Team GO,LFL,Karmine Corp,1266.0,1202.0,1279.0
20782,Solary,LFL,BK ROG Esports,1219.0,1270.0,1205.0
20795,Team BDS.A,LFL,LDLC OL,1212.0,1267.0,1198.0
14416,Misfits Premier,LFL,Vitality.Bee,1205.0,1294.0,1193.0
20791,Karmine Corp,LFL,Team GO,1202.0,1266.0,1188.0
20786,Vitality.Bee,LFL,GameWard,1190.0,1139.0,1171.0
20787,GameWard,LFL,Vitality.Bee,1139.0,1190.0,1157.0


In [9]:
# #Input home and away teams with their odds and get back a df with the difference between betting odds and elo odds
# home = ['Team GO','Vitality.Bee','Karmine Corp','Aegis','IZI Dream']
# away = ['LDLC OL','BK ROG Esports','Team BDS Academy','Solary','GameWard']
# odds = [(110,-150),(-275,200),(-188,133),(-125,-110),(150,-200)]
# single = single_game_odds(current_elo,home,away,odds)
# # series_odds = [(-250,175),(-163,120),(333,-500),(500,-900),(175,-250)]
# # series = best_of_3_odds(current_elo,home,away,series_odds)

In [10]:
get_team(df,'Rare Atom',4)

Unnamed: 0,teamname,opp_name,date,result,old_elo,opp_elo,new_elo
20758,Rare Atom,Invictus Gaming,2023-06-22 08:07:45,1,1001.0,932.0,1013.0
20755,Rare Atom,Invictus Gaming,2023-06-22 07:15:44,1,988.0,932.0,1001.0
20655,Rare Atom,Ultra Prime,2023-06-20 10:48:26,0,1007.0,954.0,988.0
20651,Rare Atom,Ultra Prime,2023-06-20 10:03:52,1,994.0,935.0,1007.0


# Open html file and parse with BeautifulSoup


In [11]:
import pandas as pd
from bs4 import BeautifulSoup
df = pd.read_csv('final.csv')

CBLOL - https://www.co.bet365.com/#/AC/B151/C20889769/D48/E1510001/F10/  
CBLOLA - https://www.co.bet365.com/#/AC/B151/C20890093/D48/E1510001/F10/  
LCK - https://www.co.bet365.com/#/AC/B151/C20889854/D48/E1510001/F10/  
LCO - https://www.co.bet365.com/#/AC/B151/C20890087/D48/E1510001/F10/  
LEC - https://www.co.bet365.com/#/AC/B151/C20890316/D48/E1510001/F10/  
LFL - https://www.co.bet365.com/#/AC/B151/C20890217/D48/E1510001/F10/  
LPL - https://www.co.bet365.com/#/AC/B151/C20889805/D48/E1510001/F10/  
SL - https://www.co.bet365.com/#/AC/B151/C20890120/D48/E1510001/F10/   
UL - https://www.co.bet365.com/?_h=stk6mbH5dnUh1pzKwtn2RQ%3D%3D#/AC/B151/C20889870/D48/E1510001/F10/  
LCS - https://www.co.bet365.com/?_h=stk6mbH5dnUh1pzKwtn2RQ%3D%3D#/AC/B151/C20889513/D48/E1510001/F10/  
VCS, LJL, LLA, LCO  



In [12]:
#Parses html files for each league to pull updated odds from bet365
html_files =['lcs.html','lec.html','cblola.html','cblol.html','lck.html','lco.html','lfl.html','lpl.html','sl.html','ul.html']
league_name = ['lcs','lec','cblola','cblol','lck','lco','lfl','lpl','sl','ul']

temp = []
for html, name in zip(html_files,league_name):
    file_path = html  # Specify the path to the HTML file in the local directory

    # Read the HTML content from the file
    with open(file_path, 'r', encoding='utf-8') as f:
        html = f.read()

    soup = BeautifulSoup(html,'html.parser')
    
    # Get list of teams
    team_html = soup.select('div.ses-ParticipantFixtureDetailsHigherEsports_Team')
    team_list = [i.text for i in team_html]

    # Get list of odds for each team
    span_element = soup.find_all('span', class_='src-ParticipantOddsOnly50_Odds')
    odds_list = [int(i.text) for i in span_element]
    
    df_temp = pd.DataFrame({'teamname':team_list,'odds':odds_list})
    df_temp = add_opp_name(df_temp)
    df_temp['league'] = name
    temp.append(df_temp)
    
bet = pd.concat(temp)

In [13]:
current_elo.head()

Unnamed: 0,teamname,league,opp_name,old_elo,opp_elo,new_elo
2057,7more7 Pompa Team,UL,K1CK,1241.0,1192.0,1222.0
2688,Cruzeiro eSports,CBLOL,KaBuM! e-Sports,1116.0,1205.0,1136.0
2795,Cruzeiro Academy,CBLOLA,KaBuM! e-Sports.A,1101.0,1137.0,1086.0
3104,eStar,LPL,Suning,985.0,1268.0,979.0
5843,PDW,UL,Komil&Friends,1212.0,1047.0,1220.0


In [14]:
# Create df with implied odds, elo odds, and the difference for all upcoming games
temp = pd.merge(bet,current_elo[['teamname','new_elo']],on='teamname')
monies = pd.merge(temp,current_elo[['teamname','new_elo']],left_on='opp_name',right_on='teamname')
del monies['teamname_y']
monies.columns = ['teamname','odds','opp_name','league','new_elo','opp_elo']
monies = calc_odds_diff(monies)
monies1 =monies[['teamname','league','odds','opp_name','new_elo','opp_elo','odds_diff']].sort_values(['league','odds_diff'],ascending=False)
monies1 = monies1[monies1.odds_diff>.08]
monies3 =monies[['teamname','league','odds','opp_name','new_elo','opp_elo','odds_diff_3']].sort_values(['league','odds_diff_3'],ascending=False)
monies3 = monies3[monies3.odds_diff_3>.08]

In [15]:
get_team(df,"Ground Zero Gaming",10)

Unnamed: 0,teamname,opp_name,date,result,old_elo,opp_elo,new_elo
20647,Ground Zero Gaming,Team Bliss,2023-06-20 08:58:54,0,1147.0,1250.0,1135.0
20644,Ground Zero Gaming,Team Bliss,2023-06-20 08:07:56,0,1160.0,1239.0,1147.0
20365,Ground Zero Gaming,Pentanet.GG,2023-06-14 11:05:52,0,1174.0,1229.0,1160.0
20358,Ground Zero Gaming,Pentanet.GG,2023-06-14 10:06:11,1,1155.0,1234.0,1174.0
20014,Ground Zero Gaming,Kanga Esports,2023-06-06 10:53:03,1,1141.0,1107.0,1155.0
20010,Ground Zero Gaming,Kanga Esports,2023-06-06 10:08:08,0,1161.0,1089.0,1141.0
17749,Ground Zero Gaming,Kanga Esports,2023-03-01 08:52:06,0,1181.0,1108.0,1161.0
17745,Ground Zero Gaming,Kanga Esports,2023-03-01 07:58:35,0,1203.0,1088.0,1181.0
17743,Ground Zero Gaming,Kanga Esports,2023-03-01 07:06:45,1,1193.0,1067.0,1203.0
17694,Ground Zero Gaming,Chiefs Esports Club,2023-02-28 09:58:36,1,1167.0,1439.0,1193.0


In [16]:
monies1[~(monies1.league=='lck')&~(monies1.league=='lpl')]

Unnamed: 0,teamname,league,odds,opp_name,new_elo,opp_elo,odds_diff
170,Grypciocraft,ul,333,Alior Bank Team,1138.0,1164.0,0.231706
119,GameWard,lfl,175,Vitality.Bee,1157.0,1171.0,0.116227
121,Team GO,lfl,-120,Karmine Corp,1279.0,1188.0,0.08259
29,Team BDS,lec,-110,SK Gaming,1203.0,1082.0,0.143609
41,KOI,lec,150,Fnatic,1140.0,1121.0,0.127316
32,Team BDS,lec,-200,Excel,1203.0,979.0,0.117387
35,Team BDS,lec,120,Team Heretics,1203.0,1156.0,0.112683
15,Evil Geniuses,lcs,120,FlyQuest,1156.0,1127.0,0.087092
108,Vertex Esports Club,lco,450,Ground Zero Gaming,1140.0,1135.0,0.325377
112,Vertex Esports Club,lco,2500,Team Bliss,1140.0,1261.0,0.29412


In [17]:
monies3[(monies3.league=='lck')|(monies3.league=='lpl')]

Unnamed: 0,teamname,league,odds,opp_name,new_elo,opp_elo,odds_diff_3
148,Royal Never Give Up,lpl,-250,Ultra Prime,1147.0,940.0,0.148173
157,JD Gaming,lpl,-225,Top Esports,1380.0,1206.0,0.129985
136,FunPlus Phoenix,lpl,-120,Anyone's Legend,1012.0,934.0,0.117458
99,Gen.G,lck,-275,Dplus KIA,1370.0,1125.0,0.166306
90,Kwangdong Freecs,lck,-250,Nongshim RedForce,1060.0,895.0,0.095721


In [18]:
monies[monies.league=='lpl'].sort_values(['league','odds_diff_3'],ascending=False)

Unnamed: 0,teamname,odds,opp_name,league,new_elo,opp_elo,implied_odds,elo_odds,series_odds_3,series_odds_5,odds_diff,odds_diff_3,odds_diff_5
148,Royal Never Give Up,-250,Ultra Prime,lpl,1147.0,940.0,0.714286,0.767025,0.862458,0.91362,0.052739,0.148173,0.199334
157,JD Gaming,-225,Top Esports,lpl,1380.0,1206.0,0.692308,0.731378,0.822293,0.875877,0.03907,0.129985,0.18357
136,FunPlus Phoenix,-120,Anyone's Legend,lpl,1012.0,934.0,0.545455,0.610402,0.662912,0.700375,0.064948,0.117458,0.15492
161,Rare Atom,-120,ThunderTalk Gaming,lpl,1013.0,957.0,0.545455,0.5799,0.618829,0.647281,0.034445,0.073375,0.101827
133,FunPlus Phoenix,-138,LGD Gaming,lpl,1012.0,943.0,0.579832,0.598014,0.645137,0.679122,0.018182,0.065306,0.09929
146,Rare Atom,350,Edward Gaming,lpl,1013.0,1124.0,0.222222,0.34548,0.275599,0.228193,0.123258,0.053377,0.005971
160,Team WE,-334,ThunderTalk Gaming,lpl,1131.0,957.0,0.769585,0.731378,0.822293,0.875877,-0.038207,0.052708,0.106292
137,Royal Never Give Up,-450,Anyone's Legend,lpl,1147.0,934.0,0.818182,0.77314,0.868954,0.91937,-0.045042,0.050773,0.101189
158,Team WE,240,Top Esports,lpl,1131.0,1206.0,0.294118,0.393712,0.34297,0.306633,0.099595,0.048852,0.012515
138,FunPlus Phoenix,700,Weibo Gaming,lpl,1012.0,1211.0,0.125,0.241305,0.146583,0.094559,0.116305,0.021583,-0.030441


# Don't go past here yet

In [19]:
numerical = train.select_dtypes(['int','float']).columns

NameError: name 'train' is not defined

In [None]:
def create_target(groupby):
    groupby['target']=groupby['result'].shift(-1)
    return groupby

In [None]:
def add_target(df):
    df = df.groupby('teamname').apply(create_target)
    df.loc[pd.isnull(df.target),'target'] =2
    df.target = df.target.astype(int,errors='ignore')
    return df

In [None]:
df = add_target(df)

In [None]:
from sklearn.preprocessing import MinMaxScaler #scale all numerical columns

removed_columns = ['teamname','league','date','target','opp_name']
selected_columns = df.columns[~df.columns.isin(removed_columns)]

scaler = MinMaxScaler()
df[selected_columns] = scaler.fit_transform(df[selected_columns])


In [None]:
# Create rolling averages for columns, concat as new columns to df


def rolling(team):
    rolling = team.rolling(10).mean()
    return rolling

def add_rolling(df):
    cols = ['gamelength','teamkills','teamdeaths','firstblood','dragons','barons','opp_barons','towers','opp_towers',\
       'inhibitors','opp_inhibitors','damagetochampions','damagetakenperminute','wardsplaced','wardskilled',\
       'controlwardsbought','totalgold','gspd']

    df_rolling=df[list(cols)+['teamname']]
    
    
    df_rolling = df_rolling.groupby('teamname',group_keys=False)[cols].apply(rolling)

    rolling_cols = [f'{col}_rolling' for col in df_rolling.columns]
    df_rolling.columns = rolling_cols
    df = pd.concat([df,df_rolling],axis=1)
    return df.dropna()

In [None]:
def next_opp(team):
    team['next_opp'] = team['opp_name'].shift(-1)
    return team
def add_opp(df):
    df = df.groupby('teamname').apply(next_opp)
    df.loc[df.next_opp.isnull(),'next_opp'] = 2
    return df

In [None]:
add_opp(df)

In [None]:
def next_side(team):
    team['next_blue'] = team['blue_side'].shift(-1)
    return team

def add_next_side(df):
    df = df.groupby('teamname').apply(next_side)
    df.loc[df.next_blue.isnull(),'next_blue']=2
    df.next_blue = df.next_blue.astype(int,errors='ignore')
    return df

In [None]:
def next_date(team):
    team['next_date'] = team['date'].shift(-1)
    return team

def add_next_date(df):
    df = df.groupby('teamname').apply(next_date)
    df.loc[df.next_date.isnull(),'next_date']=2
    return df

In [None]:
full = df.merge(df[rolling_cols + ["next_opp", "next_date", "teamname"]], left_on=["teamname", "next_date"], \
                right_on=["next_opp", "next_date"])

In [None]:
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier

def create_objects():
    rr = RidgeClassifier(solver ='sag',normalize=False,)
    split = TimeSeriesSplit(n_splits=3)
    sfs = SequentialFeatureSelector(rr, n_features_to_select=14,direction='backward',cv=split,n_jobs=-1)

create_objects()

In [None]:
removed_columns = list(full.columns[full.dtypes=='object']) + removed_columns
selected_columns = full.columns[~full.columns.isin(removed_columns)]

In [None]:
sfs.fit(full[selected_columns],full['target'])

In [None]:
selectors = selected_columns[sfs.get_support()]

In [None]:
predictions = backtest(full,rr,selectors,'target')

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predictions.actual,predictions.prediction)

.5665 'forward', rr

In [None]:
def near_split(x, num_bins): #Split my df into equal splits to perform backtesting
    quotient, remainder = divmod(x, num_bins)
    bins = [quotient + 1] * remainder + [quotient] * (num_bins - remainder)
    count = 0
    new_list = []
    for b in bins:
        count += b
        new_list.append(count)
    return new_list

splits = near_split(df.shape[0],5)
last_split = splits[4]-splits[3] #Difference between last two values for final 'test' set

In [None]:
def backtest(data,model,predictors,target):
    all_predictions= []
    
    for i in range(0,len(splits)-1):
        train = data.loc[:splits[i]]
        test = data.loc[splits[i]:splits[i]+last_split]
        
        model.fit(train[predictors],train[target])
        preds = model.predict(test[predictors])
        preds = pd.Series(preds,index=test.index)
        combined = pd.concat([test[target],preds],axis=1)
        combined.columns = ['actual','prediction']
        
        all_predictions.append(combined)
        
    return pd.concat(all_predictions)
        
        
        
        


In [None]:
predictions = backtest(full,rr,selectors,'target')

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predictions.actual,predictions.prediction)

### optimize ridge regression

In [None]:
from sklearn import decomposition
from sklearn import linear_model
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score

In [None]:
X = full[selectors]
y = full['target']

In [None]:
pca = decomposition.PCA()
ridge = linear_model.Ridge()

In [None]:
pipe = Pipeline(steps=[("pca", pca),
                        ("ridge", ridge)])

In [None]:
n_components = list(range(1,X.shape[1]+1,1))
normalize = [True, False]
solver = ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
parameters = dict(pca__n_components=n_components,
                      ridge__normalize=normalize,
                      ridge__solver=solver)

In [None]:
clf_GS = GridSearchCV(pipe, parameters)
clf_GS.fit(X, y)

In [None]:
clf_GS.best_params_

In [None]:
print("Best Number Of Components:", clf_GS.best_estimator_.get_params()["pca__n_components"])
print(); print(clf_GS.best_estimator_.get_params()["ridge"])

