In [1]:
from collections import Counter
import webbrowser

import pandas as pd
import numpy as np

from lol_fandom import get_leagues, get_player_redirects

pd.set_option('display.max_columns', None)

In [2]:
def get_wiki_url(link):
    return f'https://lol.fandom.com/wiki/{link.replace(" ", "_")}'

def get_new_id(ids):
    if 'team_id' in ids.columns:
        id_list = sorted(ids['team_id'].unique())
    else:
        id_list = sorted(ids['player_id'].unique())

    if len(id_list) == 0:
        return 1
    prev = id_list[0]
    for id in id_list[1:]:
        if prev + 1 != id:
            return prev + 1
        prev = id
    return prev + 1

def get_player_redirects_list(player_link):
    pr = get_player_redirects(where=f'PR.AllName="{player_link}"')
    if pr is None:
        return []
    link = pr['OverviewPage'].iloc[0]
    lst = get_player_redirects(where=f'PR.OverviewPage="{link}"')['AllName'].values
    return list(map(lambda x: x.lower(), lst))

def get_players_id(players, player_link):
    return players.loc[
        players['player'].str.lower().isin(get_player_redirects_list(player_link))
    ]

def concat_players(players, name, new_id=-1):
    if new_id == -1:
        new_id = get_new_id(players)
    df = pd.concat(
        [
            players,
            pd.Series({'player': name, 'player_id': new_id}).to_frame().T
        ],
        ignore_index=True
    )
    return df

In [3]:
leagues = get_leagues(where=f'L.Level="Primary" and L.IsOfficial="Yes"')
leagues

Unnamed: 0,League,League Short,Region,Level,IsOfficial
0,2015 International Wildcard Tournament,2015 IWCT,International,Primary,Yes
1,2016 International Wildcard Qualifier,IWCQ,International,Primary,Yes
2,Circuit Brazilian League of Legends,CBLOL,Brazil,Primary,Yes
3,Copa Latinoamérica Sur,CLS,LAS,Primary,Yes
4,DreamHack,DH,International,Primary,Yes
5,Europe League Championship Series,EU LCS,Europe,Primary,Yes
6,Garena Premier League,GPL,SEA,Primary,Yes
7,International Wildcard Invitational,2015 IWCI,International,Primary,Yes
8,League of Legends Championship Series,LCS,North America,Primary,Yes
9,League of Legends SEA Tour,LST,SEA,Primary,Yes


## Teams

In [4]:
teams = pd.read_csv('./csv/teams_id.csv')
names = []

for year in range(2011, 2024):
    tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
    scoreboard_games = pd.read_csv(f'./csv/scoreboard_games/{year}_scoreboard_games.csv')
    tournament_rosters = pd.read_csv(
        f'./csv/tournament_rosters/{year}_tournament_rosters.csv'
    )
    print(f'{year} - tournament {tournaments.shape[0]}')
    print(f'{year} - scoreboard games {scoreboard_games.shape[0]}')
    print(f'{year} - tournament rosters {tournament_rosters.shape[0]}')
    for page in tournaments['OverviewPage']:
        print(f'\t{page}')
        sg = scoreboard_games.loc[scoreboard_games['OverviewPage'] == page]
        tr = tournament_rosters.loc[tournament_rosters['OverviewPage'] == page]
        team_names = list(set(
            list(sg[['Team1', 'Team2']].unstack().unique()) +
            list(tr['Team'].unique())
        ))
        names = []
        for name in team_names:
            if name not in teams['team'].values:
                names.append(name)
        if len(names) > 0:
            names = sorted(names, key=lambda x: x.lower())
            break
    if len(names) > 0:
        url = f'https://lol.fandom.com/wiki/{page.replace(" ", "_")}'
        webbrowser.open(url)
        print(url)
        print(f'{page}\n{names}')
        for link in names:
            url = f'https://lol.fandom.com/wiki/{link.replace(" ", "_")}'
            webbrowser.open(url)
        break
if len(names) == 0:
    print('\nCompleted')

2011 - tournament 1
2011 - scoreboard games 28
2011 - tournament rosters 8
	Season 1 World Championship
2012 - tournament 6
2012 - scoreboard games 170
2012 - tournament rosters 65
	Champions/2012 Season/Spring
	DreamHack Summer 2012
	Champions/2012 Season/Summer
	Season 2/Regional Finals/Europe
	Season 2/Regional Finals/Korea
	Season 2 World Championship
2013 - tournament 20
2013 - scoreboard games 1162
2013 - tournament rosters 165
	Champions/2013 Season/Winter
	NA LCS/Season 3/Spring Season
	EU LCS/Season 3/Spring Season
	LPL/2013 Season/Spring Season
	Champions/2013 Season/Spring
	EU LCS/Season 3/Spring Playoffs
	NA LCS/Season 3/Spring Playoffs
	EU LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Season
	EU LCS/Season 3/Summer Season
	LPL/2013 Season/Spring Playoffs
	Champions/2013 Season/Summer
	LPL/2013 Season/Summer Season
	EU LCS/Season 3/Summer Playoffs
	NA LCS/Season 3/Summer Playoffs
	Season 3 Korea Regional Finals
	Season 3 China Regio

In [6]:
teams.loc[teams['team'].str.contains(
    'rainbow',
    case=False
)]

Unnamed: 0,team,team_id
571,Rainbow7,467


In [None]:
if len(names) > 0:
    teams = pd.concat(
        [
            teams,
            pd.Series({'team': names.pop(0), 'team_id': get_new_id(teams)}).to_frame().T
            # pd.Series({'team': names.pop(0), 'team_id': 467}).to_frame().T
        ],
        ignore_index=True
    )
names

In [None]:
teams = teams.sort_values(by=['team_id', 'team']).reset_index(drop=True)
teams

In [None]:
teams['lower'] = teams['team'].str.lower()
teams['lower'] = teams['lower'].str.replace(' ', '')
teams['lower'] = teams['lower'].str.replace('-', '')
lst = sorted(list(teams[['lower', 'team_id']].itertuples(index=False)))

is_complete = True
prev = lst[0]
for cur in lst[1:]:
    if prev[0] == cur[0] and prev[1] != cur[1]:
        is_complete = False
        break
    prev = cur

if not is_complete:
    print('incomplete')
    candidate = teams.loc[teams['lower'] == prev[0]]
    print(candidate)
else:
    print('completed')

In [None]:
if is_complete:
    teams[['team', 'team_id']].to_csv('./csv/teams_id.csv', index=False)
    print('Save complete')

## Players

In [5]:
is_exception = False

while not is_exception:
    players = pd.read_csv('./csv/players_id.csv')
    names = []

    for year in range(2011, 2024):
        tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
        scoreboard_games = pd.read_csv(
            f'./csv/scoreboard_games/{year}_scoreboard_games.csv'
        )
        scoreboard_players = pd.read_csv(
            f'./csv/scoreboard_players/{year}_scoreboard_players.csv'
        )
        print(year)
        print(
            f'{year} - {tournaments.shape[0]} tournaments | ' +
            f'{scoreboard_games.shape[0]} scoreboard games | ' + 
            f'{scoreboard_players.shape[0]} scoreboard players'
        )
        for page in tournaments['OverviewPage']:
            print(f'\t{page}')
            sp = scoreboard_players.loc[scoreboard_players['OverviewPage'] == page]
            player_names = sorted(sp['Link'].unique())
            for name in player_names:
                if name not in players['player'].values:
                    names.append(name)
            if len(names) > 0:
                break
        if len(names) > 0:
            print(f'{page}')
            print(f'{names}')
            break
    if len(names) == 0:
        print('\nCompleted')
        break

    while not is_exception and len(names) > 0:
        name = names[0]
        df = get_players_id(players, name)
        print('\n---------------------------------------------')
        print(name)
        print(df)

        is_concatenated = False
        if df.shape[0] == 0:
            players = concat_players(players, name)
            is_concatenated = True
            id = players.loc[players['player'] == name, 'player_id'].iloc[0]
            print(f'{name} - {id}')
        else:
            id_lst = df['player_id'].unique()
            if len(id_lst) == 1:
                players = concat_players(players, name, id_lst[0])
                is_concatenated = True
                id = players.loc[players['player'] == name, 'player_id'].iloc[0]
                print(f'{name} - {id}')

        if is_concatenated:
            del names[0]
        else:
            print('There are several ids')
            print(name, '\n')
            print(df)
            is_exception = True
            break
    
    if is_exception:
        break

    players = players.sort_values(by=['player_id', 'player']).reset_index(drop=True)

    while True:
        players['lower'] = players['player'].str.lower()
        lst = sorted(list(players[['lower', 'player_id']].itertuples(index=False)))

        is_complete = True
        prev = lst[0]
        for cur in lst[1:]:
            if prev[0] == cur[0] and prev[1] != cur[1]:
                is_complete = False
                break
            prev = cur

        if not is_complete:
            print('incomplete')
            candidate = players.loc[players['lower'] == prev[0]]
            print(candidate)
            df = players.loc[players['player_id'].isin(candidate['player_id'].values)]
            if candidate.shape[0] != df.shape[0]:
                print('improper id')
                print(df)
                break
            else:
                players.loc[players['lower'] == prev[0], 'player_id'] = \
                    min(players.loc[players['lower'] == prev[0], 'player_id'])
                print('after correction')
                print(players.loc[players['lower'] == prev[0]])
                print('\n')
        else:
            print('completed')
            break

    while True:
        is_complete = True

        counter = Counter(players[['player', 'player_id']].itertuples(index=False))
        lst = [key for key, value in counter.items() if value > 1]
        if len(lst) > 0:
            is_complete = False
            print('incomplete')
            print(lst)
            for (name, id) in lst:
                idx = players.loc[players['player'] == name].index
                players.drop(idx[1:], inplace=True)
        else:
            print('completed')
            break

    if is_complete:
        players[['player', 'player_id']].to_csv('./csv/players_id.csv', index=False)
        print('Save complete')

2011
2011 - 1 tournaments | 28 scoreboard games | 280 scoreboard players
	Season 1 World Championship
2012
2012 - 6 tournaments | 170 scoreboard games | 1700 scoreboard players
	Champions/2012 Season/Spring
	DreamHack Summer 2012
	Champions/2012 Season/Summer
	Season 2/Regional Finals/Europe
	Season 2/Regional Finals/Korea
	Season 2 World Championship
2013
2013 - 20 tournaments | 1162 scoreboard games | 11620 scoreboard players
	Champions/2013 Season/Winter
	NA LCS/Season 3/Spring Season
	EU LCS/Season 3/Spring Season
	LPL/2013 Season/Spring Season
	Champions/2013 Season/Spring
	EU LCS/Season 3/Spring Playoffs
	NA LCS/Season 3/Spring Playoffs
	EU LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Season
	EU LCS/Season 3/Summer Season
	LPL/2013 Season/Spring Playoffs
	Champions/2013 Season/Summer
	LPL/2013 Season/Summer Season
	EU LCS/Season 3/Summer Playoffs
	NA LCS/Season 3/Summer Playoffs
	Season 3 Korea Regional Finals
	Season 3 China Regional Fi

  scoreboard_players = pd.read_csv(


2018
2018 - 97 tournaments | 4287 scoreboard games | 42870 scoreboard players
	CLS/2018 Season/Opening Promotion
	CBLOL/2018 Season/Split 1 Promotion
	TCL/2018 Season/Winter Qualifiers
	NA LCS/2018 Season/Spring Promotion
	LJL/2018 Season/Spring Promotion
	EU LCS/2018 Season/Spring Promotion
	LLN/2018 Season/Opening Promotion
	OPL/2018 Season/Split 1 Promotion
	LCL/2018 Season/Spring Promotion
	LMS/2018 Season/Spring Promotion
	LCK/2018 Season/Spring Promotion
	LPL/2018 Season/Spring Season
	LCK/2018 Season/Spring Season
	VCS/2018 Season/Spring Season
	EU LCS/2018 Season/Spring Season
	LLN/2018 Season/Opening Season
	CBLOL/2018 Season/Split 1
	NA LCS/2018 Season/Spring Season
	OPL/2018 Season/Split 1
	CLS/2018 Season/Opening Season
	TCL/2018 Season/Winter Season
	LMS/2018 Season/Spring Season
	LJL/2018 Season/Spring Season
	LCL/2018 Season/Spring Season
	LCL/2018 Season/Spring Playoffs
	CBLOL/2018 Season/Split 1 Playoffs
	OPL/2018 Season/Split 1 Playoffs
	EU LCS/2018 Season/Spring Play

In [216]:
players = pd.read_csv('./csv/players_id.csv')
names = {}

for year in range(2011, 2024):
    tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
    scoreboard_games = pd.read_csv(
        f'./csv/scoreboard_games/{year}_scoreboard_games.csv'
    )
    scoreboard_players = pd.read_csv(
        f'./csv/scoreboard_players/{year}_scoreboard_players.csv'
    )
    print(year)
    print(f'{year} - {tournaments.shape[0]} tournaments | {scoreboard_games.shape[0]} scoreboard games | {scoreboard_players.shape[0]} scoreboard players')
    for page in tournaments['OverviewPage']:
        print(f'\t{page}')
        sp = scoreboard_players.loc[scoreboard_players['OverviewPage'] == page]
        player_names = sp['Link'].unique()
        for i, name in enumerate(player_names):
            team = sp.loc[sp['Link'] == name, 'Team'].unique()
            team = sorted(team)[0]
            position = sp.loc[sp['Link'] == name, 'Role Number'].unique()[0]
            player_names[i] = (team, position, name)
        player_names = sorted(player_names)
        for (team, _, name) in player_names:
            if name not in players['player'].values:
                lst = names.get(team, [])
                lst.append(name)
                names[team] = lst
        if len(names) > 0:
            break
    if len(names) > 0:
        print(f'{page}')
        for team, player_lst in names.items():
            print(f'{team}\n  {player_lst}')
        break
if len(names) == 0:
    print('\nCompleted')

2011
2011 - 1 tournaments | 28 scoreboard games | 280 scoreboard players
	Season 1 World Championship
2012
2012 - 6 tournaments | 170 scoreboard games | 1700 scoreboard players
	Champions/2012 Season/Spring
	DreamHack Summer 2012
	Champions/2012 Season/Summer
	Season 2/Regional Finals/Europe
	Season 2/Regional Finals/Korea
	Season 2 World Championship
2013
2013 - 20 tournaments | 1162 scoreboard games | 11620 scoreboard players
	Champions/2013 Season/Winter
	NA LCS/Season 3/Spring Season
	EU LCS/Season 3/Spring Season
	LPL/2013 Season/Spring Season
	Champions/2013 Season/Spring
	EU LCS/Season 3/Spring Playoffs
	NA LCS/Season 3/Spring Playoffs
	EU LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Season
	EU LCS/Season 3/Summer Season
	LPL/2013 Season/Spring Playoffs
	Champions/2013 Season/Summer
	LPL/2013 Season/Summer Season
	EU LCS/Season 3/Summer Playoffs
	NA LCS/Season 3/Summer Playoffs
	Season 3 Korea Regional Finals
	Season 3 China Regional Fi

  scoreboard_players = pd.read_csv(


2018
2018 - 97 tournaments | 4287 scoreboard games | 42870 scoreboard players
	CLS/2018 Season/Opening Promotion
	CBLOL/2018 Season/Split 1 Promotion
	TCL/2018 Season/Winter Qualifiers
	NA LCS/2018 Season/Spring Promotion
	LJL/2018 Season/Spring Promotion
	EU LCS/2018 Season/Spring Promotion
	LLN/2018 Season/Opening Promotion
	OPL/2018 Season/Split 1 Promotion
	LCL/2018 Season/Spring Promotion
	LMS/2018 Season/Spring Promotion
	LCK/2018 Season/Spring Promotion
	LPL/2018 Season/Spring Season
	LCK/2018 Season/Spring Season
	VCS/2018 Season/Spring Season
	EU LCS/2018 Season/Spring Season
	LLN/2018 Season/Opening Season
	CBLOL/2018 Season/Split 1
	NA LCS/2018 Season/Spring Season
	OPL/2018 Season/Split 1
	CLS/2018 Season/Opening Season
	TCL/2018 Season/Winter Season
	LMS/2018 Season/Spring Season
	LJL/2018 Season/Spring Season
	LCL/2018 Season/Spring Season
	LCL/2018 Season/Spring Playoffs
	CBLOL/2018 Season/Split 1 Playoffs
	OPL/2018 Season/Split 1 Playoffs
	EU LCS/2018 Season/Spring Play

In [217]:
if len(names) > 0:
    url = f'https://lol.fandom.com/wiki/{page.replace(" ", "_")}'
    # webbrowser.open(url)
    print(url)
    print(page)
    teams = list(names.keys())
    print(teams)
    print()
    print(teams[0])
    print(names[teams[0]])
    # for link in names[teams[0]]:
        # webbrowser.open(get_wiki_url(link))
else:
    print('Completed')

https://lol.fandom.com/wiki/LCK/2020_Season/Spring_Promotion
LCK/2020 Season/Spring Promotion
['APK Prince', 'Team Dynamics']

APK Prince
['Ikssu', 'Cover', 'Trigger (Kim Eui-joo)']


In [218]:
while len(names) > 0:
    name = names[teams[0]][0]
    df = get_players_id(players, name)
    print('\n--------------------------------------------------------')
    print(name)
    print(df)

    is_concatenated = False
    if df.shape[0] == 0:
        players = concat_players(players, name)
        is_concatenated = True
        id = players.loc[players["player"] == name, "player_id"].iloc[0]
        print(f'{name} - {id}')
    else:
        id_lst = df['player_id'].unique()
        if len(id_lst) == 1:
            players = concat_players(players, name, id_lst[0])
            is_concatenated = True
            id = players.loc[players["player"] == name, "player_id"].iloc[0]
            print(f'{name} - {id}')

    if is_concatenated:
        del names[teams[0]][0]
        if len(names[teams[0]]) == 0:
            del names[teams[0]]
            del teams[0]
    else:
        print('There are several ids')
        print(name, '\n')
        print(df)
        break


--------------------------------------------------------
Ikssu
     player  player_id
1407  IkSSu       1043
1408  erssu       1043
1409  ikssu       1043
Ikssu - 1043

--------------------------------------------------------
Cover
Empty DataFrame
Columns: [player, player_id]
Index: []
Cover - 3082

--------------------------------------------------------
Trigger (Kim Eui-joo)
Empty DataFrame
Columns: [player, player_id]
Index: []
Trigger (Kim Eui-joo) - 3083

--------------------------------------------------------
Ganknam
Empty DataFrame
Columns: [player, player_id]
Index: []
Ganknam - 3084

--------------------------------------------------------
GgooNg
     player player_id
609  Ggoong       438
GgooNg - 438


In [219]:
players = players.sort_values(by=['player_id', 'player']).reset_index(drop=True)
players

Unnamed: 0,player,player_id
0,Chauster,1
1,SaintVicious,2
2,Saintvicious,2
3,HotshotGG,3
4,Bigfatlp,4
...,...,...
3885,Kagame,3080
3886,Potter,3081
3887,Cover,3082
3888,Trigger (Kim Eui-joo),3083


In [220]:
while True:
    players['lower'] = players['player'].str.lower()
    lst = sorted(list(players[['lower', 'player_id']].itertuples(index=False)))

    is_complete = True
    prev = lst[0]
    for cur in lst[1:]:
        if prev[0] == cur[0] and prev[1] != cur[1]:
            is_complete = False
            break
        prev = cur

    if not is_complete:
        print('incomplete')
        candidate = players.loc[players['lower'] == prev[0]]
        print(candidate)
        df = players.loc[players['player_id'].isin(candidate['player_id'].values)]
        if candidate.shape[0] != df.shape[0]:
            print('improper id')
            print(df)
            break
        else:
            players.loc[players['lower'] == prev[0], 'player_id'] = min(players.loc[players['lower'] == prev[0], 'player_id'])
            print('after correction')
            print(players.loc[players['lower'] == prev[0]])
            print('\n')
    else:
        print('completed')
        break

completed


In [221]:
is_complete = True

counter = Counter(players[['player', 'player_id']].itertuples(index=False))
lst = [key for key, value in counter.items() if value > 1]
if len(lst) > 0:
    is_complete = False
    print('incomplete')
    print(lst)
    for (name, id) in lst:
        idx = players.loc[players['player'] == name].index
        players.drop(idx[1:], inplace=True)
else:
    print('completed')

completed


In [222]:
if is_complete:
    players[['player', 'player_id']].to_csv('./csv/players_id.csv', index=False)
    print('Save complete')

Save complete


## Tournament Rosters

In [9]:
roles = ['top', 'jungle', 'mid', 'bot', 'support']

is_exception = False

while not is_exception:
    players = pd.read_csv('./csv/players_id.csv')
    names = []

    for year in range(2011, 2024):
        tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
        tournament_rosters = pd.read_csv(
            f'./csv/tournament_rosters/{year}_tournament_rosters.csv'
        )
        print(year)
        print(
            f'{year} - {tournaments.shape[0]} tournaments |' +
            f'{tournament_rosters.shape[0]}'
        )
        for page in tournaments['OverviewPage']:
            print(f'\t{page}')
            rosters = tournament_rosters.loc[tournament_rosters['OverviewPage'] == page]
            player_names = []
            for row in rosters.itertuples():
                team = row.Team
                if pd.isna(row.RosterLinks):
                    continue
                roster = list(map(lambda x: x.strip(), row.RosterLinks.split(';;')))
                role = list(map(lambda x: x.strip(), row.Roles.split(';;')))
                for i in range(len(roster)):
                    positions = list(map(lambda x: x.strip(), role[i].split(',')))
                    is_player = False
                    for pos in positions:
                        if pos.lower() in roles:
                            is_player = True
                            break
                    if is_player:
                        player_names.append(roster[i])
            for name in set(player_names):
                if name not in players['player'].values:
                    names.append(name)
            if len(names) > 0:
                break
        if len(names) > 0:
            print(f'{page}')
            print(names)
            break
    if len(names) == 0:
        print('\nCompleted')
        break

    while not is_exception and len(names) > 0:
        name = names[0]
        df = get_players_id(players, name)
        print('\n---------------------------------------------')
        print(name)
        print(df)

        is_concatenated = False
        if df.shape[0] == 0:
            players = concat_players(players, name)
            is_concatenated = True
            id = players.loc[players['player'] == name, 'player_id'].iloc[0]
            print(f'{name} - {id}')
        else:
            id_lst = df['player_id'].unique()
            if len(id_lst) == 1:
                players = concat_players(players, name, id_lst[0])
                is_concatenated = True
                id = players.loc[players['player'] == name, 'player_id'].iloc[0]
                print(f'{name} - {id}')

        if is_concatenated:
            del names[0]
        else:
            print('There are several ids')
            print(name, '\n')
            print(df)
            is_exception = True
            break
    
    if is_exception:
        break

    players = players.sort_values(by=['player_id', 'player']).reset_index(drop=True)

    while True:
        players['lower'] = players['player'].str.lower()
        lst = sorted(list(players[['lower', 'player_id']].itertuples(index=False)))

        is_complete = True
        prev = lst[0]
        for cur in lst[1:]:
            if prev[0] == cur[0] and prev[1] != cur[1]:
                is_complete = False
                break
            prev = cur

        if not is_complete:
            print('incomplete')
            candidate = players.loc[players['lower'] == prev[0]]
            print(candidate)
            df = players.loc[players['player_id'].isin(candidate['player_id'].values)]
            if candidate.shape[0] != df.shape[0]:
                print('improper id')
                print(df)
                break
            else:
                players.loc[players['lower'] == prev[0], 'player_id'] = \
                    min(players.loc[players['lower'] == prev[0], 'player_id'])
                print('after correction')
                print(players.loc[players['lower'] == prev[0]])
                print('\n')
        else:
            print('completed')
            break

    while True:
        is_complete = True

        counter = Counter(players[['player', 'player_id']].itertuples(index=False))
        lst = [key for key, value in counter.items() if value > 1]
        if len(lst) > 0:
            is_complete = False
            print('incomplete')
            print(lst)
            for (name, id) in lst:
                idx = players.loc[players['player'] == name].index
                players.drop(idx[1:], inplace=True)
        else:
            print('completed')
            break

    if is_complete:
        players[['player', 'player_id']].to_csv('./csv/players_id.csv', index=False)
        print('Save complete')

2011
2011 - 1 tournaments |8
	Season 1 World Championship
2012
2012 - 6 tournaments |65
	Champions/2012 Season/Spring
	DreamHack Summer 2012
	Champions/2012 Season/Summer
	Season 2/Regional Finals/Europe
	Season 2/Regional Finals/Korea
	Season 2 World Championship
2013
2013 - 20 tournaments |165
	Champions/2013 Season/Winter
	NA LCS/Season 3/Spring Season
	EU LCS/Season 3/Spring Season
	LPL/2013 Season/Spring Season
	Champions/2013 Season/Spring
	EU LCS/Season 3/Spring Playoffs
	NA LCS/Season 3/Spring Playoffs
	EU LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Promotion
	NA LCS/Season 3/Summer Season
	EU LCS/Season 3/Summer Season
	LPL/2013 Season/Spring Playoffs
	Champions/2013 Season/Summer
	LPL/2013 Season/Summer Season
	EU LCS/Season 3/Summer Playoffs
	NA LCS/Season 3/Summer Playoffs
	Season 3 Korea Regional Finals
	Season 3 China Regional Finals
	Season 3 World Championship
	LPL/2013 Season/Summer Playoffs
2014
2014 - 33 tournaments |301
	Gamescom 2013/Spring Promotion Qual

In [145]:
players = pd.read_csv('./csv/players_id.csv')
roles = ['top', 'jungle', 'mid', 'bot', 'support']
names = {}

for year in range(2011, 2024):
    tournaments = pd.read_csv(f'./csv/tournaments/{year}_tournaments.csv')
    tournament_rosters = pd.read_csv(
        f'./csv/tournament_rosters/{year}_tournament_rosters.csv'
    )
    print(year)
    print(f'{year} - {tournaments.shape[0]} tournaments | {tournament_rosters.shape[0]}')
    for page in tournaments['OverviewPage']:
        print(f'\t{page}')
        rosters = tournament_rosters.loc[tournament_rosters['OverviewPage'] == page]
        player_names = []
        for row in rosters.itertuples():
            team = row.Team
            if pd.isna(row.RosterLinks):
                continue
            roster = list(map(lambda x: x.strip(), row.RosterLinks.split(';;')))
            role = list(map(lambda x: x.strip(), row.Roles.split(';;')))
            for i in range(len(roster)):
                positions = list(map(lambda x: x.strip(), role[i].split(',')))
                is_player = False
                for pos in positions:
                    if pos.lower() in roles:
                        is_player = True
                        break
                if is_player:
                    player_names.append((team, roster[i]))
        for (team, name) in player_names:
            if name not in players['player'].values:
                lst = names.get(team, [])
                lst.append(name)
                names[team] = lst
        if len(names) > 0:
            break
    if len(names) > 0:
        print(f'{page}')
        for team, player_lst in names.items():
            print(f'{team}\n  {player_lst}')
            if len(names[team]) != len(set(player_lst)):
                names[team] = list(set(player_lst))
        break
if len(names) == 0:
    print('\nCompleted')

2011
2011 - 1 tournaments | 8
	Season 1 World Championship
2012
2012 - 6 tournaments | 65
	Champions/2012 Season/Spring
Champions/2012 Season/Spring
DDoL
  ['Dong (Kim Hee-jae)']


In [None]:
if len(names) > 0:
    url = f'https://lol.fandom.com/wiki/{page.replace(" ", "_")}'
    webbrowser.open(url)
    print(url)
    print(page)
    teams = list(names.keys())
    print(teams)
    print()
    print(teams[0])
    print(names[teams[0]])
    for link in names[teams[0]]:
        webbrowser.open(get_wiki_url(link))
else:
    print('Completed')

In [None]:
players.loc[players['player'].str.contains(
    'shoshin',
    case=False
)]
# players.loc[players['player_id'] == 135]

In [None]:
if len(names) > 0:
    changed_team = False

    name = names[teams[0]].pop(0)
    players = pd.concat(
        [
            players,
            # pd.Series({'player': name, 'player_id': get_new_id(players)}).to_frame().T
            pd.Series({'player': name, 'player_id': 2644}).to_frame().T
        ],
        ignore_index=True
    )
    if len(names[teams[0]]) == 0:
        del names[teams[0]]
        del teams[0]
        changed_team = True
print(teams)
print()
if len(teams) > 0:
    print(teams[0])
    print(names[teams[0]])
    if changed_team:
        for link in names[teams[0]]:
            webbrowser.open(get_wiki_url(link))

In [None]:
players = players.sort_values(by=['player_id', 'player']).reset_index(drop=True)
players

In [None]:
while True:
    players['lower'] = players['player'].str.lower()
    players['lower'] = players['lower'].str.replace('_', ' ')
    lst = sorted(list(players[['lower', 'player_id']].itertuples(index=False)))

    is_complete = True
    prev = lst[0]
    for cur in lst[1:]:
        if prev[0] == cur[0] and prev[1] != cur[1]:
            is_complete = False
            break
        prev = cur

    if not is_complete:
        print('incomplete')
        candidate = players.loc[players['lower'] == prev[0]]
        print(candidate)
        df = players.loc[players['player_id'].isin(candidate['player_id'].values)]
        if candidate.shape[0] != df.shape[0]:
            print('improper id')
            print(df)
            break
        else:
            players.loc[players['lower'] == prev[0], 'player_id'] = min(players.loc[players['lower'] == prev[0], 'player_id'])
            print('after correction')
            print(players.loc[players['lower'] == prev[0]])
            print('\n')
    else:
        print('completed')
        break

In [None]:
is_complete = True

counter = Counter(players[['player', 'player_id']].itertuples(index=False))
lst = [key for key, value in counter.items() if value > 1]
if len(lst) > 0:
    is_complete = False
    print('incomplete')
    print(lst)
    for (name, id) in lst:
        idx = players.loc[players['player'] == name].index
        players.drop(idx[1:], inplace=True)
else:
    print('completed')

In [None]:
if is_complete:
    players[['player', 'player_id']].to_csv('./csv/players_id.csv', index=False)
    print('Save complete')