In [1]:
import bs4 as bs
import requests
import numpy as np
from tqdm import tqdm

In [2]:
# values used to create url links

REGIONS = ['LCS', 'LEC', 'LCK', 'LPL']
SERIES = ['Spring_Season', 'Spring_Playoffs', 'Summer_Season', 'Summer_Playoffs']

In [3]:
# example url:
# https://lol.gamepedia.com/LCS/2020_Season/Spring_Playoffs/Match_History

URL = []
for region in REGIONS:
    for series in SERIES:
        URL.append(f"https://lol.gamepedia.com/{region}/2020_Season/{series}/Match_History")
# worlds data
URL.append('https://lol.gamepedia.com/2020_Season_World_Championship/Play-In/Match_History')
URL.append('https://lol.gamepedia.com/2020_Season_World_Championship/Main_Event/Match_History')

print(URL)

['https://lol.gamepedia.com/LCS/2020_Season/Spring_Season/Match_History', 'https://lol.gamepedia.com/LCS/2020_Season/Spring_Playoffs/Match_History', 'https://lol.gamepedia.com/LCS/2020_Season/Summer_Season/Match_History', 'https://lol.gamepedia.com/LCS/2020_Season/Summer_Playoffs/Match_History', 'https://lol.gamepedia.com/LEC/2020_Season/Spring_Season/Match_History', 'https://lol.gamepedia.com/LEC/2020_Season/Spring_Playoffs/Match_History', 'https://lol.gamepedia.com/LEC/2020_Season/Summer_Season/Match_History', 'https://lol.gamepedia.com/LEC/2020_Season/Summer_Playoffs/Match_History', 'https://lol.gamepedia.com/LCK/2020_Season/Spring_Season/Match_History', 'https://lol.gamepedia.com/LCK/2020_Season/Spring_Playoffs/Match_History', 'https://lol.gamepedia.com/LCK/2020_Season/Summer_Season/Match_History', 'https://lol.gamepedia.com/LCK/2020_Season/Summer_Playoffs/Match_History', 'https://lol.gamepedia.com/LPL/2020_Season/Spring_Season/Match_History', 'https://lol.gamepedia.com/LPL/2020_Se

In [4]:
champions = []
teams = []
for url in URL:
    sauce = requests.get(url)
    soup = bs.BeautifulSoup(sauce.text)
    
    for span in soup.find_all('span', class_='sprite champion-sprite'):
        champions.append(span.get('title'))
    # print(champions)
    
    for td in soup.find_all('td', class_='mhgame-result'):
        children = td.findChildren('a')
        for child in children:
            teams.append(child.get('href'))

In [5]:
picked_ = []

# champions are in sets of 10s, alternating bans and picks
# first 10 are bans, next 10 are picks, etc. we only care about the picks
for i in range(int(len(champions)/10)):
    if i%2==0:
        continue
    else:
        picked_.append(champions[i*10:i*10+10])
        
print(picked_[:10])

[['Aatrox', 'Lee Sin', 'Malzahar', 'Aphelios', 'Braum', 'Sett', 'Sejuani', 'Zoe', 'Varus', 'Thresh'], ['Sett', 'Jarvan IV', 'Zoe', 'Varus', 'Braum', 'Renekton', 'Pantheon', 'Viktor', 'Ashe', 'Tahm Kench'], ['Sett', 'Sejuani', 'Syndra', 'Lucian', 'Braum', 'Aatrox', 'Pantheon', 'Orianna', 'Miss Fortune', 'Yuumi'], ['Aatrox', 'Pantheon', 'Orianna', 'Aphelios', 'Nautilus', 'Sett', 'Sejuani', 'Zoe', 'Miss Fortune', 'Leona'], ['Aatrox', 'Sejuani', 'Syndra', 'Aphelios', 'Tahm Kench', 'Camille', 'Pantheon', 'Ekko', 'Varus', 'Braum'], ['Aatrox', 'Elise', 'Ornn', 'Ashe', 'Braum', 'Renekton', 'Olaf', 'Rumble', 'Varus', 'Tahm Kench'], ['Sett', 'Pantheon', 'Ryze', 'Aphelios', 'Braum', 'Aatrox', 'Jarvan IV', 'Viktor', 'Varus', 'Rakan'], ['Ornn', 'Lee Sin', 'Viktor', 'Aphelios', 'Tahm Kench', 'Sett', 'Sejuani', 'Azir', 'Ezreal', 'Yuumi'], ['Ornn', "Rek'Sai", 'Zoe', 'Aphelios', 'Nautilus', 'Renekton', 'Trundle', 'Viktor', 'Miss Fortune', 'Alistar'], ['Ornn', 'Pantheon', 'Syndra', 'Ezreal', 'Tahm Kench

In [6]:
winner = []

# 0 for blue win, 1 for red win
for i in range(int(len(teams)/4)):
    sub = teams[i*4:i*4+4]
    if sub[3] == sub[1]: # blue win
        winner.append(0)
    else:
        winner.append(1)

print(winner[:10])

[0, 0, 0, 0, 0, 1, 0, 0, 0, 1]


In [7]:
# parsing champ data into numbers

champ_dict = dict((champ, idx) for idx, champ in enumerate(np.unique(picked_)))
print(champ_dict)

picked = []
for game in picked_:
    picked.append([champ_dict[champ] for champ in game])
    
print(picked[:10])

{'Aatrox': 0, 'Ahri': 1, 'Akali': 2, 'Alistar': 3, 'Anivia': 4, 'Annie': 5, 'Aphelios': 6, 'Ashe': 7, 'Aurelion Sol': 8, 'Azir': 9, 'Bard': 10, 'Blitzcrank': 11, 'Brand': 12, 'Braum': 13, 'Caitlyn': 14, 'Camille': 15, 'Cassiopeia': 16, "Cho'Gath": 17, 'Corki': 18, 'Darius': 19, 'Diana': 20, 'Dr. Mundo': 21, 'Draven': 22, 'Ekko': 23, 'Elise': 24, 'Evelynn': 25, 'Ezreal': 26, 'Fiddlesticks': 27, 'Fiora': 28, 'Galio': 29, 'Gangplank': 30, 'Gnar': 31, 'Gragas': 32, 'Graves': 33, 'Hecarim': 34, 'Heimerdinger': 35, 'Illaoi': 36, 'Irelia': 37, 'Ivern': 38, 'Janna': 39, 'Jarvan IV': 40, 'Jax': 41, 'Jayce': 42, 'Jhin': 43, 'Jinx': 44, "Kai'Sa": 45, 'Kalista': 46, 'Karma': 47, 'Karthus': 48, 'Kassadin': 49, 'Kayle': 50, 'Kayn': 51, 'Kennen': 52, "Kha'Zix": 53, 'Kindred': 54, 'Kled': 55, "Kog'Maw": 56, 'LeBlanc': 57, 'Lee Sin': 58, 'Leona': 59, 'Lillia': 60, 'Lissandra': 61, 'Lucian': 62, 'Lulu': 63, 'Lux': 64, 'Malphite': 65, 'Malzahar': 66, 'Maokai': 67, 'Miss Fortune': 68, 'Mordekaiser': 69, '

In [8]:
games_ar = []
for game in picked:
    five_hot1 = np.zeros((len(np.unique(picked_)),), dtype=int)
    for j in game[0:5]:
        five_hot1[j] = 1
    five_hot2 = np.zeros((len(np.unique(picked_)),), dtype=int)
    for j in game[5:10]:
        five_hot2[j] = 1
    games_ar.append(np.concatenate((five_hot1, five_hot2)))
    
print(games_ar[:2])

[array([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0

In [9]:
games_ar = np.array(games_ar)
winner = np.array(winner)

np.save('games_ar.npy', games_ar)
np.save('winner.npy', winner)