In [1]:
from collections import Counter

import pandas as pd
import numpy as np

from lol_fandom import SITE, set_default_delay
from lol_fandom import get_leagues, get_tournaments
from lol_fandom import get_scoreboard_games, get_scoreboard_players
from lol_fandom import from_response

pd.set_option('display.max_columns', None)
# set_default_delay(0.5)

In [2]:
TARGET_LEAGUES = [
    'LTC', 'LCK',
    'LPL',
    'EU LCS', 'LEC',
    'NA LCS', 'LCS',
    'GPL', 'LST',
    'LLA', 'CLS', 'LLN',
    'LCO', 'OPL',
    'LCL',
    'LJL',
    'LMS', 'PCS',
    'VCS',
    'CBLOL', 
    'TCL',
    
    'MSI',
    'WCS',
]

In [3]:
def get_new_id(ids):
    if 'team_id' in ids.columns:
        id_list = sorted(ids['team_id'].unique())
    else:
        id_list = sorted(ids['player_id'].unique())

    if len(id_list) == 0:
        return 1
    prev = id_list[0]
    for id in id_list[1:]:
        if prev + 1 != id:
            return prev + 1
        prev = id
    return prev + 1

## Teams

In [None]:
leagues = get_leagues(where='L.Level="Primary" and L.IsOfficial="Yes"')
leagues

In [None]:
lst = []
for league in TARGET_LEAGUES:
    if league not in leagues['League Short'].values:
        lst.append(league)
lst

In [None]:
teams = pd.read_csv('./csv/teams_id.csv')
names = []

# for year in range(2011, 2024):
for year in range(2023, 2024):
    print(year)
    for league in leagues['League Short']:
        print(f'\t{league}')
        tournaments = get_tournaments(
            where=f'L.League_Short="{league}" and T.Year={year}'
        )
        if tournaments.shape[0] == 0:
            continue
        tournaments = tournaments.sort_values(
            by=['Year', 'DateStart', 'Date']
        ).reset_index(drop=True)

        for page in tournaments['OverviewPage']:
            print(f'\t\t{page}')
            try:
                scoreboard_games = get_scoreboard_games(
                    where=f'T.OverviewPage="{page}"'
                )
                if scoreboard_games is None:
                    continue
            except Exception as e:
                print(e)
                continue
            scoreboard_games = scoreboard_games.sort_values(
                by='DateTime UTC'
            ).reset_index(drop=True)
            team_names = scoreboard_games[['Team1', 'Team2']].unstack().unique()
            names = []
            for name in team_names:
                if name not in teams['team'].values:
                    names.append(name)
            if len(names) > 0:
                names = sorted(names)
                break
        if len(names) > 0:
            break
    if len(names) > 0:
        print(scoreboard_games['OverviewPage'].unique()[0])
        print(names)
        break

In [None]:
teams.loc[teams['team'].str.contains(
    'rainbow',
    case=False
)]

In [None]:
if len(names) > 0:
    teams = pd.concat(
        [
            teams,
            pd.Series({'team': names.pop(0), 'team_id': get_new_id(teams)}).to_frame().T
            # pd.Series({'team': names.pop(0), 'team_id': 323}).to_frame().T
        ],
        ignore_index=True
    )
names

In [None]:
teams = teams.sort_values(by=['team_id', 'team']).reset_index(drop=True)
teams

In [None]:
teams.to_csv('./csv/teams_id.csv', index=False)

## Players

In [None]:
players = pd.read_csv('./csv/players_id.csv')
names = {}

# for year in range(2011, 2024):
for year in range(2016, 2024):
    print(year)
    for league in leagues['League Short']:
        print(f'\t{league}')
        tournaments = get_tournaments(
            where=f'L.League_Short="{league}" and T.Year={year}'
        )
        if tournaments.shape[0] == 0:
            continue
        tournaments = tournaments.sort_values(
            by=['Year', 'DateStart', 'Date']
        ).reset_index(drop=True)

        for page in tournaments['OverviewPage']:
            print(f'\t\t{page}')
            try:
                scoreboard_games = get_scoreboard_games(
                    where=f'T.OverviewPage="{page}"'
                )
                if scoreboard_games is None:
                    continue
            except Exception as e:
                print(e)
                continue
            lst_team = scoreboard_games[['Team1', 'Team2']].unstack().unique()
            player_names = []
            for team in lst_team:
                print(f'\t\t\t{team}')
                try:
                    scoreboard_players = get_scoreboard_players(
                        where=f'T.OverviewPage="{page}" and SP.Team="{team}"'
                    )
                    if scoreboard_players is None:
                        continue
                except Exception as e:
                    print(e)
                    continue
                player_names += list(set(
                    scoreboard_players[['Link', 'Team']].itertuples(index=False)
                ))
            names = {}
            for (name, team) in player_names:
                if name not in players['player'].values:
                    lst = names.get(team, [])
                    lst.append(name)
                    names[team] = lst
            if len(names) > 0:
                break
        if len(names) > 0:
            break
    if len(names) > 0:
        print(scoreboard_players['OverviewPage'].unique()[0])
        for team, player_lst in names.items():
            print(f'{team}\n  {player_lst}')
        break

In [None]:
print(scoreboard_players['OverviewPage'].unique()[0])
teams = list(names.keys())
print(teams)
print()
print(teams[0])
print(names[teams[0]])

In [None]:
players.loc[players['player'].str.contains(
    '14a',
    case=False
)]
# players.loc[players['player_id'] == 135]

In [None]:
if len(names) > 0:
    name = names[teams[0]].pop(0)
    players = pd.concat(
        [
            players,
            pd.Series({'player': name, 'player_id': get_new_id(players)}).to_frame().T
            # pd.Series({'player': name, 'player_id': 1613}).to_frame().T
        ],
        ignore_index=True
    )
    if len(names[teams[0]]) == 0:
        del names[teams[0]]
        del teams[0]
print(teams)
print()
if len(teams) > 0:
    print(teams[0])
    print(names[teams[0]])

In [None]:
players = players.sort_values(by=['player_id', 'player']).reset_index(drop=True)
players

In [None]:
while True:
    players['lower'] = players['player'].str.lower()
    lst = sorted(list(players[['lower', 'player_id']].itertuples(index=False)))

    is_complete = True
    prev = lst[0]
    for cur in lst[1:]:
        if prev[0] == cur[0] and prev[1] != cur[1]:
            # and cur[1] != 1869:
            is_complete = False
            break
        prev = cur

    if not is_complete:
        print('incomplete')
        candidate = players.loc[players['lower'] == prev[0]]
        print(candidate)
        df = players.loc[players['player_id'].isin(candidate['player_id'].values)]
        if candidate.shape[0] != df.shape[0]:
            print('improper id')
            print(df)
            break
        else:
            players.loc[players['lower'] == prev[0], 'player_id'] = min(players.loc[players['lower'] == prev[0], 'player_id'])
            print('after correction')
            print(players.loc[players['lower'] == prev[0]])
            print('\n')
    else:
        print('completed')
        break

In [None]:
is_complete = True

counter = Counter(players[['player', 'player_id']].itertuples(index=False))
lst = [key for key, value in counter.items() if value > 1]
if len(lst) > 0:
    is_complete = False
    print('incomplete')
    print(lst)
    for (name, id) in lst:
        idx = players.loc[players['player'] == name].index
        players.drop(idx[1:], inplace=True)
else:
    print('completed')

In [None]:
if is_complete:
    players[['player', 'player_id']].to_csv('./csv/players_id.csv', index=False)
    print('save complete')

## Tournaments

In [4]:
leagues = get_leagues(where='L.Level="Primary" and L.IsOfficial="Yes"')
leagues

Unnamed: 0,League,League Short,Region,Level,IsOfficial
0,2015 International Wildcard Tournament,2015 IWCT,International,Primary,Yes
1,2016 International Wildcard Qualifier,IWCQ,International,Primary,Yes
2,Circuit Brazilian League of Legends,CBLOL,Brazil,Primary,Yes
3,Copa Latinoamérica Sur,CLS,LAS,Primary,Yes
4,DreamHack,DH,International,Primary,Yes
5,Europe League Championship Series,EU LCS,Europe,Primary,Yes
6,Garena Premier League,GPL,SEA,Primary,Yes
7,International Wildcard Invitational,2015 IWCI,International,Primary,Yes
8,League of Legends Championship Series,LCS,North America,Primary,Yes
9,League of Legends SEA Tour,LST,SEA,Primary,Yes


In [5]:
# for year in range(2011, 2024):
for year in range(2023, 2024):
    print(f'{year}')
    tournaments = pd.DataFrame()
    for league in leagues['League Short']:
        t = get_tournaments(where=f'L.League_Short="{league}" and T.Year={year}')
        print(f'\t{league} - {t.shape[0]}')
        tournaments = pd.concat([tournaments, t])
    tournaments = tournaments.sort_values(
        by=['Year', 'DateStart', 'Date']
    ).reset_index(drop=True)

    tournaments.to_csv(f'./csv/tournaments/{year}_tournaments.csv', index=False)
    print(f'{year} - {tournaments.shape}')

2011
	2015 IWCT - 0
	IWCQ - 0
	CBLOL - 0
	CLS - 0
	DH - 0
	EU LCS - 0
	GPL - 0
	2015 IWCI - 0
	LCS - 0
	LST - 0
	LLA - 0
	LLN - 0
	LCK - 0
	LCO - 0
	LCL - 0
	LEC - 0
	LJL - 0
	LMS - 0
	LTC - 0
	MLG - 0
	MSC - 0
	MSI - 0
	NA LCS - 0
	OPL - 0
	PCS - 0
	RR - 0
	RR - 0
	RR - 0
	RR - 0
	RR - 0
	Riot - 0
	LPL - 0
	TCL - 0
	VCS - 0
	WCS - 3
2011 - (3, 17)
2012
	2015 IWCT - 0
	IWCQ - 0
	CBLOL - 0
	CLS - 0
	DH - 1
	EU LCS - 0
	GPL - 0
	2015 IWCI - 0
	LCS - 0
	LST - 0
	LLA - 0
	LLN - 0
	LCK - 0
	LCO - 0
	LCL - 0
	LEC - 0
	LJL - 0
	LMS - 0
	LTC - 2
	MLG - 0
	MSC - 0
	MSI - 0
	NA LCS - 0
	OPL - 0
	PCS - 0
	RR - 0
	RR - 0
	RR - 0
	RR - 0
	RR - 0
	Riot - 0
	LPL - 0
	TCL - 0
	VCS - 0
	WCS - 4
2012 - (7, 17)
2013
	2015 IWCT - 0
	IWCQ - 0
	CBLOL - 0
	CLS - 0
	DH - 0
	EU LCS - 5
	GPL - 0
	2015 IWCI - 0
	LCS - 0
	LST - 0
	LLA - 0
	LLN - 0
	LCK - 0
	LCO - 0
	LCL - 0
	LEC - 0
	LJL - 0
	LMS - 0
	LTC - 6
	MLG - 0
	MSC - 0
	MSI - 0
	NA LCS - 6
	OPL - 0
	PCS - 0
	RR - 0
	RR - 0
	RR - 0
	RR - 0
	RR - 0
	Riot - 

## Scoreboard games

In [6]:
# for year in range(2011, 2024):
for year in range(2023, 2024):
    tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
    print(f'{year} - tournament {tournaments.shape}')
    scoreboard_games = pd.DataFrame()
    for page in tournaments['OverviewPage']:
        sg = get_scoreboard_games(where=f'T.OverviewPage="{page}"')
        if sg is None:
            print(f'\t{page} - drop')
            tournaments.drop(
                tournaments.loc[tournaments['OverviewPage'] == page].index,
                inplace=True
            )
            continue
        league = tournaments.loc[tournaments['OverviewPage'] == page, 'League'].iloc[0]
        league = leagues.loc[leagues['League'] == league, 'League Short'].iloc[0]
        sg['League'] = league
        print(f'\t{page} - {sg.shape[0]}')
        scoreboard_games = pd.concat([scoreboard_games, sg])
    scoreboard_games = scoreboard_games.sort_values(
        by='DateTime UTC'
    ).reset_index(drop=True)
    scoreboard_games.to_csv(
        f'./csv/scoreboard_games/{year}_scoreboard_games.csv', index=False
    )
    print(f'{year} scoreboard_games {scoreboard_games.shape}')
    tournaments.to_csv(f'./csv/tournaments/{year}_tournaments.csv', index=False)
    print(f'{year} tournaments {tournaments.shape}')

2011 - tournament (3, 17)
	Season 1 World Championship/Qualifiers/Europe - drop
	Season 1 World Championship/Qualifiers/North America - drop
	Season 1 World Championship - 28
2011 scoreboard_games (28, 37)
2011 tournaments (1, 17)
2012 - tournament (7, 17)
	Champions/2012 Season/Spring - 44
	DreamHack Summer 2012 - 16
	Champions/2012 Season/Summer - 46
	Season 2/Regional Finals/China - drop
	Season 2/Regional Finals/Europe - 17
	Season 2/Regional Finals/Korea - 16
	Season 2 World Championship - 31
2012 scoreboard_games (170, 37)
2012 tournaments (6, 17)
2013 - tournament (27, 17)
	Champions/2013 Season/Winter Qualifiers - drop
	Champions/2013 Season/Winter - 112
	NA LCS/Season 3/Spring Qualifiers - drop
	LPL/2013 Season/Spring Qualifier - drop
	NA LCS/Season 3/Spring Season - 112
	EU LCS/Season 3/Spring Season - 112
	Champions/2013 Season/Spring Qualifiers - drop
	LPL/2013 Season/Spring Season - 112
	Champions/2013 Season/Spring - 87
	EU LCS/Season 3/Spring Playoffs - 15
	NA LCS/Season

## Scoreboard players

In [8]:
# for year in range(2011, 2024):
for year in range(2022, 2023):
    tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
    scoreboard_games = pd.read_csv(f'./csv/scoreboard_games/{year}_scoreboard_games.csv')
    print(f'{year} - tournament {tournaments.shape}')
    print(f'{year} - scoreboard games {scoreboard_games.shape}')
    scoreboard_players = pd.DataFrame()
    for page in tournaments['OverviewPage']:
        print(f'\t{page}', end='')
        teams = scoreboard_games.loc[
            scoreboard_games['OverviewPage'] == page, ['Team1', 'Team2']
        ].unstack().unique()
        len_sp = 0
        for i, team in enumerate(teams, start=1):
            print(f'\r\t{page} - ({i}/{len(teams)})', end='')
            sp = get_scoreboard_players(
                where=f'T.OverviewPage="{page}" and SP.Team="{team}"'
            )
            len_sp += sp.shape[0]
            scoreboard_players = pd.concat([scoreboard_players, sp])
        len_sg = scoreboard_games.loc[scoreboard_games['OverviewPage'] == page].shape[0]
        print(f'\n\t\tscoreboard games - {len_sg} | scoreboard players - {len_sp} | {len_sg * 10 == len_sp}')
    scoreboard_players = scoreboard_players.sort_values(
        by=['DateTime UTC', 'Team', 'Role Number']
    ).reset_index(drop=True)
    scoreboard_players.to_csv(
        f'./csv/scoreboard_players/{year}_scoreboard_players.csv',
        index=False
    )
    print(f'{year} scoreboard_players {scoreboard_players.shape}')

2011 - tournament (1, 17)
2011 - scoreboard games (28, 37)
	Season 1 World Championship - (8/8)
		scoreboard games - 28 | scoreboard players - 280
2011 scoreboard_players (280, 44)
2012 - tournament (6, 17)
2012 - scoreboard games (170, 37)
	Champions/2012 Season/Spring - (16/16)
		scoreboard games - 44 | scoreboard players - 440
	DreamHack Summer 2012 - (8/8)
		scoreboard games - 16 | scoreboard players - 160
	Champions/2012 Season/Summer - (17/17)
		scoreboard games - 46 | scoreboard players - 460
	Season 2/Regional Finals/Europe - (8/8)
		scoreboard games - 17 | scoreboard players - 170
	Season 2/Regional Finals/Korea - (5/5)
		scoreboard games - 16 | scoreboard players - 160
	Season 2 World Championship - (12/12)
		scoreboard games - 31 | scoreboard players - 310
2012 scoreboard_players (1700, 44)
2013 - tournament (20, 17)
2013 - scoreboard games (1162, 37)
	Champions/2013 Season/Winter - (13/13)
		scoreboard games - 112 | scoreboard players - 1120
	NA LCS/Season 3/Spring Season -