In [1]:
from utility import scraper
from bs4 import BeautifulSoup as soup
from tqdm.notebook import tqdm

import pandas as pd
import numpy as np

import re
import string

## 1.Scrape Champion Strategies (Tencent)
- Updated when a new champion is released
- Can be updated manually

### 1.1 Scrape Champion Catalog Page
- Champion name (in Chinese)
- Champion page url

In [None]:
def scrape_champion_catalog():
    url = 'https://lol.qq.com/data/info-heros.shtml'
    champ_list_soup = scraper.get_page_selenium(url)
    champ_lists = champ_list_soup.find('ul',{'class':'imgtextlist'}).findAll('li')
    
    champion_chinese_list,champion_url_list = list(),list()
    for champ in champ_lists:
        # extract
        champion_chinese = champ.find('a')['title']
        champion_url = 'https://lol.qq.com/data/' + champ.find('a',href=True)['href']
        # store
        champion_chinese_list.append(champion_chinese)
        champion_url_list.append(champion_url)
        
    df_champion_strats = pd.DataFrame({'champion_chinese':champion_chinese_list,
                                       'champion_url':champion_url_list})
    return df_champion_strats

In [None]:
df_champion_strats = scrape_champion_catalog()

In [None]:
df_champion_strats.to_csv('./temp/tencent_champion_catalog.csv',index=False)

### 1.2 Scrape Indivdual Champion Page

In [None]:
df_champion_strats = pd.read_csv('./temp/tencent_champion_catalog.csv')

In [None]:
def extract_strats(url):
    for i in range(3):
        try:
            champ_page_soup = scraper.get_page_selenium(url)
            regex = re.compile('.*arttips.*')
            tips = champ_page_soup.find('div',{'class':regex})

            regex = re.compile('.*allytips.*')
            ally_tips = tips.find('dl',{'id':regex})
            regex = re.compile('.*enemytips.*')
            enemy_tips = tips.find('dl',{'id':regex})

            play_strats,counter_strats = '',''
            for tip in ally_tips.findAll('p'):
                play_strats = '\n'.join([play_strats,tip.text.strip()])
            for tip in enemy_tips.findAll('p'):
                counter_strats = '\n'.join([counter_strats,tip.text.strip()])
            return {'play_strats':play_strats,'counter_strats':counter_strats}
        except:
            pass
    print('Error:'+url)
    play_strats,counter_strats = float('nan'),float('nan')
    return {'play_strats':play_strats,'counter_strats':counter_strats}

In [None]:
url_strats_dict = dict()
for url in tqdm(df_champion_strats['champion_url'].values):
    url_strats_dict[url] = extract_strats(url)

In [None]:
df_url_strats = pd.DataFrame.from_dict(url_strats_dict, orient='index')
df_url_strats.replace('\n-',float('nan'),inplace=True)
df_url_strats.reset_index(inplace=True)
df_url_strats.rename(columns={'index':'champion_url'},inplace=True)

In [None]:
df_url_strats.to_csv('./temp/tencent_url_strats.csv',index=False)

### 1.3 Preprocess Starts Table

Inner join two tables.

In [None]:
df_strats =  pd.merge(df_champion_strats, df_url_strats, on="champion_url", how="inner")

In [None]:
df_strats.to_csv('./temp/tencent_champion_strats.csv',index=False)

Get name mapping and map names to English.

In [None]:
url = 'https://weixia.info/league-of-legends-name.html'
champ_translation_soup = scraper.get_page(url)
champion_names = champ_translation_soup.find('tbody').findAll('tr')

In [None]:
champion_names_cn,champion_names_eng = list(),list()
for champion in champion_names:
    champion_names_cn.append(champion.findAll('td')[3].text.strip())
    champion_names_eng.append(champion.findAll('td')[4].text.strip())
cn_eng_name_mapping = dict(zip(champion_names_cn,champion_names_eng))

In [None]:
df_strats['champion_name_cn'] = df_strats['champion_chinese'].apply(lambda x: x.split(' ')[1])
df_strats['champion_name_eng'] = df_strats['champion_name_cn'].map(cn_eng_name_mapping)

In [None]:
def replace_names(txt,cn_eng_name_mapping):
    if isinstance(txt,str):
        for name in list(cn_eng_name_mapping.keys()):
            if name in txt:
                txt = txt.replace(name,cn_eng_name_mapping[name])
        return txt
    else:
        return txt

In [None]:
df_strats['play_strats'] = df_strats['play_strats'].apply(lambda x: replace_names(x,cn_eng_name_mapping))
df_strats['counter_strats'] = df_strats['counter_strats'].apply(lambda x: replace_names(x,cn_eng_name_mapping))

In [None]:
from utility.translation import google_translate
df_strats['play_strats_en'] = df_strats['play_strats'].apply(google_translate)
df_strats['counter_strats_en'] = df_strats['counter_strats'].apply(google_translate)

In [None]:
df_strats.to_csv('./temp/tencent_champion_strats_translated.csv',index=False)

In [None]:
df_champ_strats = df_strats[['champion_name_cn','champion_name_eng','play_strats_en','counter_strats_en']].copy(deep=True)
df_champ_strats.rename(columns={'champion_name_cn':'champion_cn','champion_name_eng':'champion',
                                'play_strats_en':'play_strats','counter_strats_en':'counter_strats'},inplace=True)
df_champ_strats.to_csv('./preprocessed_data/tencent_champion_strats.csv',index=False)