In [1]:
import os
import typing
import time
import random
from typing import Union, Tuple, List, Literal
from pathlib import Path

from tqdm.auto import tqdm
import requests
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd

from crawler import wait_random_time, PlayerCrawler

### `__get_sub_result` 파싱 규칙
- 순위, 점수, 점수 세부 링크 모두 있음
- 순위만 있고, `FNR`(Final Not Reached) 혹은 `FNR J` 라고 적혀있음 + `DNQ`
- 순위 없고, `WD`(Withdrawn)라고 적혀있음
- 아예 비어있음

In [2]:
main_path = Path('../olympic-historical-dataset')
root_url = 'https://skatingscores.com/'

In [3]:
crawler = PlayerCrawler()
crawler.get_player_page(noc='kor', gender='women', name='yuna_kim').head().loc[0, 'short-program-link']

'https://skatingscores.com/0405/jgphun/jr/women/i/short/kor/yuna_kim/'

In [4]:
olympic_result_df = pd.read_csv(main_path / 'Olympic_Results.csv')
olympic_result_df.head()

Unnamed: 0,result_id,event_title,edition,edition_id,sport,sport_url,result_date,result_location,result_participants,result_format,result_detail,result_description
0,30359,"Super-Heavyweight (>105 kilograms), Men",2004 Summer Olympics,26,Weightlifting,/editions/26/sports/WLF,"25 August 2004 — 16:30 (B), 20:00 (A)","Olympiako Gymnastirio Arsis Varon Nikaias, Nikaia",17 from 15 countries,Total of best lifts in snatch and clean & jerk...,na,"Not so much a competition as a coronation, the..."
1,1626,"Giant Slalom, Women1",1998 Winter Olympics,46,Snowboarding,/editions/46/sports/SBD,9 February 1998,"Mt. Yakebitai, Shiga Kogen, Yamanouchi",31 from 14 countries,"Two runs, total time determined placement.",Gates: 38 / 36Length: 936 mStart Altitude: 196...,The women’s giant slalom was postponed one day...
2,76,"Singles, Men",1976 Winter Olympics,40,Luge,/editions/40/sports/LUG,4 – 7 February 1976,"Kunsteis-Bob- und Rodelbahn, Igls",43 from 15 countries,"Four runs, total time determined placement.",Curves: 14Length: 1220 mStart Altitude: ?Verti...,"Once more, the competitors from East and West ..."
3,962,"1,500 metres, Men",1928 Winter Olympics,30,Speed Skating,/editions/30/sports/SSK,14 February 1928 — 9:00,"Olympia-Eisstadion Badrutts Park, St. Moritz",30 from 14 countries,na,na,There was little doubt that the Olympic 1500 m...
4,258824,"Canadian Singles, Slalom, Men",2008 Summer Olympics,53,Canoe Slalom,/editions/53/sports/CSL,11 – 12 August 2008,"Shunyi Aolinpike Shuishang Gongyuan, Mapo, Shunyi",16 from 16 countries,na,na,Two former Olympic champions in the C-1 slalom...


In [5]:
sport = olympic_result_df['sport'].unique()
sport.sort()
sport

array(['3x3 Basketball', 'Aeronautics', 'Alpine Skiing', 'Alpinism',
       'American Football', 'Archery', 'Art Competitions',
       'Artistic Gymnastics', 'Artistic Swimming', 'Athletics',
       'Australian Rules Football', 'Automobile Racing', 'Badminton',
       'Ballooning', 'Bandy', 'Baseball', 'Basketball', 'Basque pelota',
       'Beach Volleyball', 'Biathlon', 'Bicycle Polo', 'Bobsleigh',
       'Boules', 'Bowling', 'Boxing', 'Canne De Combat', 'Canoe Marathon',
       'Canoe Slalom', 'Canoe Sprint', 'Cricket', 'Croquet',
       'Cross Country Skiing', 'Curling', 'Cycling BMX Freestyle',
       'Cycling BMX Racing', 'Cycling Mountain Bike', 'Cycling Road',
       'Cycling Track', 'Diving', 'Dogsled Racing', 'Equestrian Dressage',
       'Equestrian Driving', 'Equestrian Eventing', 'Equestrian Jumping',
       'Equestrian Vaulting', 'Fencing', 'Figure Skating', 'Firefighting',
       'Fishing', 'Football', 'Freestyle Skiing', 'Gliding', 'Glíma',
       'Golf', 'Handball', 'Ho

In [6]:
event_df = pd.read_csv(main_path / 'Olympic_Athlete_Event_Results.csv')
event_df.head()

Unnamed: 0,edition,edition_id,country_noc,sport,event,result_id,athlete,athlete_id,pos,medal,isTeamSport
0,1908 Summer Olympics,5,ANZ,Athletics,"100 metres, Men",56265,Ernest Hutcheon,64710,DNS,,False
1,1908 Summer Olympics,5,ANZ,Athletics,"400 metres, Men",56313,Henry Murray,64756,DNS,,False
2,1908 Summer Olympics,5,ANZ,Athletics,"800 metres, Men",56338,Harvey Sutton,64808,3 h8 r1/2,,False
3,1908 Summer Olympics,5,ANZ,Athletics,"800 metres, Men",56338,Guy Haskins,922519,DNS,,False
4,1908 Summer Olympics,5,ANZ,Athletics,"800 metres, Men",56338,Joseph Lynch,64735,DNS,,False


In [7]:
athlete_bio_df = pd.read_csv(main_path / 'Olympic_Athlete_Bio.csv')
athlete_bio_df

Unnamed: 0,athlete_id,name,sex,born,height,weight,country,country_noc,description,special_notes
0,65649,Ivanka Bonova,Female,4 April 1949,166.0,55,Bulgaria,BUL,Personal Best: 400 – 53.54 (1980).,
1,112510,Nataliya Uryadova,Female,15 March 1977,184.0,70,Russian Federation,RUS,,Listed in Olympians Who Won a Medal at the Eur...
2,114973,Essa Ismail Rashed,Male,14 December 1986,165.0,55,Qatar,QAT,Personal Best: 10000 – 27:20.97 (2006).,Listed in Olympians Who Won a Medal at the Asi...
3,30359,Péter Boros,Male,12 January 1908,,,Hungary,HUN,"Between 1927 and 1938, Péter Boros competed as...",
4,50557,Rudolf Piowatý,Male,28 April 1900,,,Czechoslovakia,TCH,Rudolf Piowaty joined the Czechoslovak militar...,
...,...,...,...,...,...,...,...,...,...,...
155856,23748,Todd Makler,Male,8 January 1946,183.0,75,United States,USA,"Todd Makler grew up in suburban Philadelphia, ...","Brother of Brooke Makler Son of Paul Makler, Sr."
155857,58581,Géza Hollósi,Male,2 May 1938,175.0,79,Hungary,HUN,Géza Hollósi had the following finishes at maj...,
155858,30387,József Keresztessy,Male,19 September 1885,,,Hungary,HUN,József Keresztessy was the grandson of József ...,
155859,69900,Alexander Thieme,Male,13 January 1954,187.0,84,East Germany,GDR,East German Sprinter Alexander Thieme reached ...,Listed in Olympians Who Won a Medal at the Eur...


In [8]:
df = pd.merge(event_df, olympic_result_df, how='left', on='result_id', suffixes=('', '_duplicate'))
df = df.drop(columns=[col for col in df.columns if col.endswith('_duplicate')])
figure_df = df[df['sport'] == 'Figure Skating']
figure_df.head()

Unnamed: 0,edition,edition_id,country_noc,sport,event,result_id,athlete,athlete_id,pos,medal,isTeamSport,event_title,sport_url,result_date,result_location,result_participants,result_format,result_detail,result_description
1867,1998 Winter Olympics,46,AZE,Figure Skating,"Singles, Men",14168,Igor Pashkevich,96229,16,,False,"Singles, Men",/editions/46/sports/FSK,12 – 14 February 1998,"White Ring, Nagano",29 from 24 countries,"In each section, skaters were ranked on Ordina...",na,"Since Lillehammer, Elvis Stojko (CAN), silver ..."
1868,1998 Winter Olympics,46,AZE,Figure Skating,"Singles, Women",14189,Yuliya Vorobyova,82967,16,,False,"Singles, Women",/editions/46/sports/FSK,18 – 20 February 1998,"White Ring, Nagano",28 from 21 countries,"In each section, skaters were ranked on Ordina...",na,Michelle Kwan had a chance to be on the 1994 U...
1869,1998 Winter Olympics,46,AZE,Figure Skating,"Pairs, Mixed",14210,Inga Rodionova,100207,18,,True,"Pairs, Mixed",/editions/46/sports/FSK,8 – 10 February 1998,"White Ring, Nagano",40 from 14 countries,The pairs were ranked on Ordinal Placement for...,na,Artur Dmitriyev (RUS) had won the pairs gold m...
1870,1998 Winter Olympics,46,AZE,Figure Skating,"Pairs, Mixed",14210,Aleksandr Anishchenko,100208,18,,True,"Pairs, Mixed",/editions/46/sports/FSK,8 – 10 February 1998,"White Ring, Nagano",40 from 14 countries,The pairs were ranked on Ordinal Placement for...,na,Artur Dmitriyev (RUS) had won the pairs gold m...
1904,2002 Winter Olympics,47,AZE,Figure Skating,"Singles, Men",14274,Sergey Rylov,101980,24,,False,"Singles, Men",/editions/47/sports/FSK,12 – 14 February 2002,"Salt Lake Ice Center, Salt Lake City, Utah",28 from 20 countries,"In each section, skaters were ranked on Ordina...",na,"The co-favorites were the two Russians, Yevgen..."


In [9]:
figure_df['edition'].unique()

array(['1998 Winter Olympics', '2002 Winter Olympics',
       '2006 Winter Olympics', '2014 Winter Olympics',
       '2022 Winter Olympics', '1994 Winter Olympics',
       '1988 Winter Olympics', '1992 Winter Olympics',
       '2010 Winter Olympics', '2018 Winter Olympics',
       '1936 Winter Olympics', '1968 Winter Olympics',
       '1972 Winter Olympics', '1976 Winter Olympics',
       '1980 Winter Olympics', '1984 Winter Olympics',
       '1908 Summer Olympics', '1920 Summer Olympics',
       '1924 Winter Olympics', '1928 Winter Olympics',
       '1932 Winter Olympics', '1948 Winter Olympics',
       '1956 Winter Olympics', '1960 Winter Olympics',
       '1964 Winter Olympics', '1952 Winter Olympics'], dtype=object)

In [10]:
sochi_df = figure_df[figure_df['edition'] == '2014 Winter Olympics']
sochi_df.head()

Unnamed: 0,edition,edition_id,country_noc,sport,event,result_id,athlete,athlete_id,pos,medal,isTeamSport,event_title,sport_url,result_date,result_location,result_participants,result_format,result_detail,result_description
2059,2014 Winter Olympics,58,AZE,Figure Skating,"Ice Dancing, Mixed",350316,Yuliya Zlobina,127391,12,,True,"Ice Dancing, Mixed",/editions/58/sports/FSK,16 – 17 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",48 from 15 countries,"Total of points from compulsory dance, optiona...",na,The gold and silver medals were somewhat pre-o...
2060,2014 Winter Olympics,58,AZE,Figure Skating,"Ice Dancing, Mixed",350316,Aleksey Sitnikov,127392,12,,True,"Ice Dancing, Mixed",/editions/58/sports/FSK,16 – 17 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",48 from 15 countries,"Total of points from compulsory dance, optiona...",na,The gold and silver medals were somewhat pre-o...
17228,2014 Winter Olympics,58,CZE,Figure Skating,"Singles, Men",350307,Michal Březina,119015,10,,False,"Singles, Men",/editions/58/sports/FSK,13 – 14 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",29 from 20 countries,Total of points from short program and free sk...,na,The heavy favorite in the men’s competition wa...
17229,2014 Winter Olympics,58,CZE,Figure Skating,"Singles, Men",350307,Tomáš Verner,110135,11,,False,"Singles, Men",/editions/58/sports/FSK,13 – 14 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",29 from 20 countries,Total of points from short program and free sk...,na,The heavy favorite in the men’s competition wa...
17230,2014 Winter Olympics,58,CZE,Figure Skating,"Singles, Women",350310,Elizaveta Ukolova,127604,22,,False,"Singles, Women",/editions/58/sports/FSK,19 – 20 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",30 from 20 countries,Total of points from short program and free sk...,na,Yu-Na Kim was the defending champion and consi...


In [11]:
sochi_df = sochi_df[
    sochi_df['event'].str.match('Singles, Men')
    | sochi_df['event'].str.match('Singles, Women')
]
sochi_df.head()

Unnamed: 0,edition,edition_id,country_noc,sport,event,result_id,athlete,athlete_id,pos,medal,isTeamSport,event_title,sport_url,result_date,result_location,result_participants,result_format,result_detail,result_description
17228,2014 Winter Olympics,58,CZE,Figure Skating,"Singles, Men",350307,Michal Březina,119015,10,,False,"Singles, Men",/editions/58/sports/FSK,13 – 14 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",29 from 20 countries,Total of points from short program and free sk...,na,The heavy favorite in the men’s competition wa...
17229,2014 Winter Olympics,58,CZE,Figure Skating,"Singles, Men",350307,Tomáš Verner,110135,11,,False,"Singles, Men",/editions/58/sports/FSK,13 – 14 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",29 from 20 countries,Total of points from short program and free sk...,na,The heavy favorite in the men’s competition wa...
17230,2014 Winter Olympics,58,CZE,Figure Skating,"Singles, Women",350310,Elizaveta Ukolova,127604,22,,False,"Singles, Women",/editions/58/sports/FSK,19 – 20 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",30 from 20 countries,Total of points from short program and free sk...,na,Yu-Na Kim was the defending champion and consi...
18327,2014 Winter Olympics,58,GEO,Figure Skating,"Singles, Women",350310,Elene Gedevanishvili,110861,19,,False,"Singles, Women",/editions/58/sports/FSK,19 – 20 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",30 from 20 countries,Total of points from short program and free sk...,na,Yu-Na Kim was the defending champion and consi...
22131,2014 Winter Olympics,58,EST,Figure Skating,"Singles, Men",350307,Viktor Romanenkov,127650,24,,False,"Singles, Men",/editions/58/sports/FSK,13 – 14 February 2014,"Dvorets Zimnego Sporta Aisberg, Coastal Cluste...",29 from 20 countries,Total of points from short program and free sk...,na,The heavy favorite in the men’s competition wa...


In [12]:
player_noc = sochi_df.groupby(['athlete_id', 'athlete'])['country_noc'].unique()
(player_noc.apply(len) > 1).sum()

np.int64(0)

In [13]:
player_noc = player_noc.apply(lambda x: x[0])
player_noc

athlete_id  athlete                   
101967      Yevgeny Plyushchenko          RUS
101973      Brian Joubert                 FRA
110128      Daisuke Takahashi             JPN
110132      Viktor Pfeifer                AUT
110135      Tomáš Verner                  CZE
110860      Jelena Glebova                EST
110861      Elene Gedevanishvili          GEO
110867      Carolina Kostner              ITA
119015      Michal Březina                CZE
119048      Abzal Rakhimgaliyev           KAZ
119050      Akiko Suzuki                  JPN
119072      Denis Ten                     KAZ
119077      Florent Amodio                FRA
119083      Javier Fernández              ESP
119084      Jenna McCorkell               GBR
119086      Jeremy Abbott                 USA
119090      Yu-Na Kim                     KOR
119096      Mao Asada                     JPN
119113      Patrick Chan                  CAN
119141      Zoltan Kelemen                ROU
127304      Brooklee Han                 

In [14]:
len(player_noc)

60

In [15]:
indexing = athlete_bio_df['athlete_id'].isin(player_noc.index.get_level_values('athlete_id'))
indexing.sum()

np.int64(60)

In [16]:
athlete_bio_df = athlete_bio_df[indexing]
athlete_bio_df

Unnamed: 0,athlete_id,name,sex,born,height,weight,country,country_noc,description,special_notes
1524,127416,Isadora Williams,Female,8 February 1996,156.0,47.0,Brazil,BRA,,
12322,128089,Park So-Yeon,Female,24 October 1997,,,Republic of Korea,KOR,,Listed in Olympians Who Won a Medal at the Win...
16729,128657,Nataliya Popova,Female,15 September 1993,168.0,52.0,Ukraine,UKR,,
18830,119072,Denis Ten,Male,13 June 1993,164.0,55.0,Kazakhstan,KAZ,"Denis Ten’s first passion was music and, at th...",Listed in Olympians Who Were Murdered (Stabbed...
21833,127908,Alexei Bychenko,Male,5 February 1988,174.0,65.0,Israel,ISR,,Listed in Olympians Who Won a Medal at the Eur...
22234,127943,Paul Bonifacio Parkinson,Male,16 February 1991,,,Italy,ITA,,
24492,110860,Jelena Glebova,Female,16 June 1989,165.0,49.0,Estonia,EST,,Sister of Ilja Glebov
25677,127754,Maé-Bérénice Meité,Female,21 September 1994,168.0,68.0,France,FRA,,Listed in Olympians Who Won a Medal at the Win...
28431,128601,Viktoria Helgesson,Female,13 September 1988,165.0,56.0,Sweden,SWE,,
31121,110861,Elene Gedevanishvili,Female,7 January 1990,160.0,41.0,Georgia,GEO,,Listed in Olympians Who Won a Medal at the Eur...


In [17]:
athlete_bio_df.duplicated('name').sum()

np.int64(0)

In [18]:
athlete_bio_df.isna().mean()

athlete_id       0.000000
name             0.000000
sex              0.000000
born             0.000000
height           0.233333
weight           0.233333
country          0.000000
country_noc      0.000000
description      0.950000
special_notes    0.433333
dtype: float64

In [19]:
athlete_bio_df = athlete_bio_df.drop(columns=['description', 'special_notes'])
athlete_bio_df.head()

Unnamed: 0,athlete_id,name,sex,born,height,weight,country,country_noc
1524,127416,Isadora Williams,Female,8 February 1996,156.0,47.0,Brazil,BRA
12322,128089,Park So-Yeon,Female,24 October 1997,,,Republic of Korea,KOR
16729,128657,Nataliya Popova,Female,15 September 1993,168.0,52.0,Ukraine,UKR
18830,119072,Denis Ten,Male,13 June 1993,164.0,55.0,Kazakhstan,KAZ
21833,127908,Alexei Bychenko,Male,5 February 1988,174.0,65.0,Israel,ISR


In [20]:
athlete_bio_df['country_noc'] = athlete_bio_df['country_noc'].str.lower()
athlete_bio_df.head()

Unnamed: 0,athlete_id,name,sex,born,height,weight,country,country_noc
1524,127416,Isadora Williams,Female,8 February 1996,156.0,47.0,Brazil,bra
12322,128089,Park So-Yeon,Female,24 October 1997,,,Republic of Korea,kor
16729,128657,Nataliya Popova,Female,15 September 1993,168.0,52.0,Ukraine,ukr
18830,119072,Denis Ten,Male,13 June 1993,164.0,55.0,Kazakhstan,kaz
21833,127908,Alexei Bychenko,Male,5 February 1988,174.0,65.0,Israel,isr


In [21]:
athlete_bio_df.dtypes

athlete_id       int64
name            object
sex             object
born            object
height         float64
weight          object
country         object
country_noc     object
dtype: object

In [22]:
athlete_bio_df['born'] = pd.to_datetime(athlete_bio_df['born'])
athlete_bio_df['weight'] = athlete_bio_df['weight'].astype(float)
athlete_bio_df.dtypes

athlete_id              int64
name                   object
sex                    object
born           datetime64[ns]
height                float64
weight                float64
country                object
country_noc            object
dtype: object

In [23]:
athlete_bio_df.head()

Unnamed: 0,athlete_id,name,sex,born,height,weight,country,country_noc
1524,127416,Isadora Williams,Female,1996-02-08,156.0,47.0,Brazil,bra
12322,128089,Park So-Yeon,Female,1997-10-24,,,Republic of Korea,kor
16729,128657,Nataliya Popova,Female,1993-09-15,168.0,52.0,Ukraine,ukr
18830,119072,Denis Ten,Male,1993-06-13,164.0,55.0,Kazakhstan,kaz
21833,127908,Alexei Bychenko,Male,1988-02-05,174.0,65.0,Israel,isr


In [24]:
athlete_bio_df['sex'] = athlete_bio_df['sex'].replace({'Male': 'men', 'Female': 'women'})
athlete_bio_df.head()

Unnamed: 0,athlete_id,name,sex,born,height,weight,country,country_noc
1524,127416,Isadora Williams,women,1996-02-08,156.0,47.0,Brazil,bra
12322,128089,Park So-Yeon,women,1997-10-24,,,Republic of Korea,kor
16729,128657,Nataliya Popova,women,1993-09-15,168.0,52.0,Ukraine,ukr
18830,119072,Denis Ten,men,1993-06-13,164.0,55.0,Kazakhstan,kaz
21833,127908,Alexei Bychenko,men,1988-02-05,174.0,65.0,Israel,isr


In [25]:
url = 'https://skatingscores.com/kaz/men/denis_ten/'
soup = crawler.get_page(url)
soup

(<!DOCTYPE html>
 <html><head><meta content="text/html; charset=utf-8" http-equiv="Content-type"/><meta content="Latest Figure Skating Results, Scores, Recaps, Rankings, Standings, Statistics" name="description"/><meta content="figure,skating,isu,results,scores,recaps,rankings,statistics,icecalc,score calculator" name="keywords"/><meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/><script src="/static/scores/script15.js" type="text/javascript"></script><link href="/static/scores/style144.css" rel="stylesheet" type="text/css"/><link href="/static/scores/i/icon_16x16%402x.png" rel="icon" type="image/png"/><link href="https://skatingscores.com/static/scores/i/icon_512.png" rel="image_src"/><meta content="summary" name="twitter:card"/><meta content="@SkatingScores" name="twitter:site"/><meta content="@SkatingScores" name="twitter:creator"/><meta content="The best resource for Figure Skating scores and stats on the web." name="twitter:

In [26]:
url

'https://skatingscores.com/kaz/men/denis_ten/'

In [27]:
soup, _ = soup
soup

<!DOCTYPE html>
<html><head><meta content="text/html; charset=utf-8" http-equiv="Content-type"/><meta content="Latest Figure Skating Results, Scores, Recaps, Rankings, Standings, Statistics" name="description"/><meta content="figure,skating,isu,results,scores,recaps,rankings,statistics,icecalc,score calculator" name="keywords"/><meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/><script src="/static/scores/script15.js" type="text/javascript"></script><link href="/static/scores/style144.css" rel="stylesheet" type="text/css"/><link href="/static/scores/i/icon_16x16%402x.png" rel="icon" type="image/png"/><link href="https://skatingscores.com/static/scores/i/icon_512.png" rel="image_src"/><meta content="summary" name="twitter:card"/><meta content="@SkatingScores" name="twitter:site"/><meta content="@SkatingScores" name="twitter:creator"/><meta content="The best resource for Figure Skating scores and stats on the web." name="twitter:ti

In [28]:
seasonal_results = soup.find_all('div', attrs='event-grid-wrap')[-1].find_all('table', attrs='event-grid stab')
seasonal_results

[<table class="event-grid stab"><tbody><tr class="group-row"><th colspan="5">2017/18</th></tr><tr><th class="l" width="15%">Event</th><th class="l">Location</th><th class="c" width="10%">SP</th><th class="c" width="10%">FS</th><th class="c" width="10%">Total</th></tr><tr class="o"><td class="event-title"><a href="/1718/oly/">Olympics</a></td><td class="l"><span title="KOR">🇰🇷</span> PyeongChang <br/> <small class="gray">Feb. 14, 2018</small></td><td class="abs-rec-box c"><span class="abs-rec"></span> <span class="abs-tm" title=""></span> 27 <br/> <span><a href="/1718/oly/sr/men/i/short/kaz/denis_ten/">70.12</a></span></td><td class="abs-rec-box c"></td><td class="abs-rec-box c _">27 <br/> <a href="/1718/oly/sr/men/results/">FNR</a></td></tr><tr class=""><td class="event-title"><a href="/1718/4cc/">4CC</a></td><td class="l"><span title="TPE">🇹🇼</span> Taipei <br/> <small class="gray">Jan. 22, 2018</small></td><td class="abs-rec-box c _"><span class="abs-rec"></span> <span class="abs-tm"

In [29]:
def get_sub_result(sub_result: BeautifulSoup) -> Tuple[str, float, str]:
    if sub_result.text.strip() == '':
        return '', 0., ''
    
    br_tag = sub_result.find('br')
    if br_tag is not None:
        sub_result_ranking = list(br_tag.previous_siblings)[0].strip()
    else:
        sub_result_ranking = sub_result.text.strip()
    
    sub_result_score_link_tag = sub_result.find('a')
    if sub_result_score_link_tag is None:
        sub_result_link = ''
        sub_result_score = 0.
    elif sub_result_score_link_tag.text == 'WD':
        sub_result_link = sub_result_score_link_tag['href']
        sub_result_score = 0.
        sub_result_ranking = 'WD'
    else:
        sub_result_link = sub_result_score_link_tag['href']
        if 'FNR' in sub_result_score_link_tag.text:
            sub_result_score = 0.
        else:
            sub_result_score = float(sub_result_score_link_tag.text.strip())
    
    return sub_result_link, sub_result_score, sub_result_ranking

In [30]:
events = seasonal_results[0].find_all('tr')[2:]
events

[<tr class="o"><td class="event-title"><a href="/1718/oly/">Olympics</a></td><td class="l"><span title="KOR">🇰🇷</span> PyeongChang <br/> <small class="gray">Feb. 14, 2018</small></td><td class="abs-rec-box c"><span class="abs-rec"></span> <span class="abs-tm" title=""></span> 27 <br/> <span><a href="/1718/oly/sr/men/i/short/kaz/denis_ten/">70.12</a></span></td><td class="abs-rec-box c"></td><td class="abs-rec-box c _">27 <br/> <a href="/1718/oly/sr/men/results/">FNR</a></td></tr>,
 <tr class=""><td class="event-title"><a href="/1718/4cc/">4CC</a></td><td class="l"><span title="TPE">🇹🇼</span> Taipei <br/> <small class="gray">Jan. 22, 2018</small></td><td class="abs-rec-box c _"><span class="abs-rec"></span> <span class="abs-tm" title=""></span> 11 <br/> <span><a href="/1718/4cc/sr/men/i/short/kaz/denis_ten/">75.30</a></span></td><td class="abs-rec-box c _"><span class="abs-rec"></span> <span class="abs-tm" title=""></span> 15 <br/> <span><a href="/1718/4cc/sr/men/i/long/kaz/denis_ten/">

In [31]:
sub_results = events[3].find_all('td')[-3:]
get_sub_result(sub_results[2])

('', 0.0, '3')

In [32]:
'''
for seasonal_result in seasonal_results:
    season = seasonal_result.find('tr', attrs='group-row').text
    events = seasonal_result.find_all('tr')[2:]
    for event in events:
        # Title Name and URL
        title = event.find('td', attrs='event-title').find('a')
        title_name = title.text
        event_url = root_url + title['href']

        # Location and Date
        l_tag = event.find('td', attrs='l').find('br')
        location = list(l_tag.previous_siblings)[0].strip()
        date = list(l_tag.next_siblings)[-1].text.strip()
                
        sub_results = event.find_all('td')[-3:]
        # Short Program
        short_program_link, short_program_score, short_program_ranking = self.__get_sub_result(sub_results[0])

        # Freeskating
        freeskating_link, freeskating_score, freeskating_ranking = self.__get_sub_result(sub_results[1])

        # Final Result
        final_result_link, final_result_score, final_result_ranking = self.__get_sub_result(sub_results[2])
        '''


"\nfor seasonal_result in seasonal_results:\n    season = seasonal_result.find('tr', attrs='group-row').text\n    events = seasonal_result.find_all('tr')[2:]\n    for event in events:\n        # Title Name and URL\n        title = event.find('td', attrs='event-title').find('a')\n        title_name = title.text\n        event_url = root_url + title['href']\n\n        # Location and Date\n        l_tag = event.find('td', attrs='l').find('br')\n        location = list(l_tag.previous_siblings)[0].strip()\n        date = list(l_tag.next_siblings)[-1].text.strip()\n                \n        sub_results = event.find_all('td')[-3:]\n        # Short Program\n        short_program_link, short_program_score, short_program_ranking = self.__get_sub_result(sub_results[0])\n\n        # Freeskating\n        freeskating_link, freeskating_score, freeskating_ranking = self.__get_sub_result(sub_results[1])\n\n        # Final Result\n        final_result_link, final_result_score, final_result_ranking = sel

In [33]:
for i, row in tqdm(athlete_bio_df.iterrows(), total=len(athlete_bio_df)):
    name = row['name']
    name = name.lower().replace(' ', '_')
    noc = row['country_noc']
    gender = row['sex']
    try:
        crawled_df = crawler.get_player_page(name=name, noc=noc, gender=gender)
        if len(crawled_df) != 0:
            crawled_df.to_csv(f'./data/{noc}-{name}.csv')
        wait_random_time()
    except KeyboardInterrupt:
        raise KeyboardInterrupt
    except requests.HTTPError as e:
        print(e, end='\t')
    finally:
        print(f'{noc}, {gender}, {name}')

  0%|          | 0/60 [00:00<?, ?it/s]

bra, women, isadora_williams
404 Client Error: Not Found for url: https://skatingscores.com/kor/women/park_so-yeon	kor, women, park_so-yeon
404 Client Error: Not Found for url: https://skatingscores.com/ukr/women/nataliya_popova/	ukr, women, nataliya_popova
kaz, men, denis_ten
isr, men, alexei_bychenko
404 Client Error: Not Found for url: https://skatingscores.com/ita/men/paul_bonifacio_parkinson/	ita, men, paul_bonifacio_parkinson
404 Client Error: Not Found for url: https://skatingscores.com/est/women/jelena_glebova/	est, women, jelena_glebova
404 Client Error: Not Found for url: https://skatingscores.com/fra/women/ma%C3%A9-b%C3%A9r%C3%A9nice_meit%C3%A9	fra, women, maé-bérénice_meité
swe, women, viktoria_helgesson
geo, women, elene_gedevanishvili
ita, women, valentina_marchei
jpn, women, mao_asada
can, men, kevin_reynolds
aus, men, brendan_kerry
404 Client Error: Not Found for url: https://skatingscores.com/esp/men/javier_fern%C3%A1ndez/	esp, men, javier_fernández
404 Client Error: N