# Import Libraries

In [5]:
import pandas as pd
import numpy as np
import httpx
from selectolax.parser import HTMLParser
import re

# Get Data From vlr.gg

## Data Extracting & Cleaning Function

In [54]:
def extract_df(df):
    
    
    def extract_data(data) -> str:
        list_data = re.findall("[\d+-.]+",data)

        if len(list_data) < 3:
            result = '0 0 0'
        else:
            result = ' '.join(list_data)

        return result
    
    filled_df =df.fillna('Nan Nan Nan').astype('object')
    
    name = filled_df.iloc[:,0].map(lambda s : s.split(' ')[0])
    team = filled_df.iloc[:,0].map(lambda s : s.split(' ')[1])
    
    filled_df.iloc[:,0] = name
    filled_df.iloc[:,1] = team
    
    formatted_df = filled_df.rename(columns={'Unnamed: 0':'Name','Unnamed: 1':'Team'})
    
    formatted_df.iloc[:,2:] = formatted_df.iloc[:,2:].applymap(extract_data)
    
    new_columns = []
    for c in formatted_df.columns[2:]:
        for side in ['all','atk','def']:
            new_columns.append((c,side))

    new_columns = np.array(new_columns).reshape(-1,3,2)
    
    
    result_df = pd.DataFrame(formatted_df.iloc[:,:2])
    
    
    result_df.columns = [('Name','Name'),('Team','Team')]
    
    
    for nc,c in zip(new_columns,formatted_df.iloc[:,2:].columns):
        result_df[list(map(lambda x: tuple(x),list(nc)))] = formatted_df[c].str.split(' ',expand=True)
        
    
    result_df.columns = pd.MultiIndex.from_tuples(result_df.columns,name=['Type','Side'])
    
    return result_df

## Scraping Data

### Preparing URL(s)

In [55]:
base_url = 'https://www.vlr.gg'

event_url = '/event/matches/1188/champions-tour-2023-lock-in-s-o-paulo/?series_id=all'

### Scraping All Matches in the Event

In [56]:
res = httpx.get(base_url+event_url) #get https response from vlr.gg

html = HTMLParser(res.text) # parsing into plain text

matches = html.css('a.wf-module-item') # get all matches in events

completed_matches = [match for match in matches if match.css_first('div.ml-status').text() == 'Completed'] # find all completed match by css status class

In [57]:
# incase need a perfect matches with no missing data point(s) for VCT LOCK//IN
# perfect_completed_matches = completed_matches[:21] + completed_matches[22:30] + completed_matches[31:]

### Scraping Map(s) played and players data

In [73]:
re_strip = lambda sp,st : sp.join(re.findall('\S+',st))

for index,match in enumerate(completed_matches[:2]):
    
    # formatting name (tbh i couldn't remember why i did this but for the pretty formatted name)
    match_res = httpx.get(match_url := f'{base_url}{match.attributes["href"]}')

    match_html = HTMLParser(match_res.text)

    match_name = match_html.css_first('title').text().strip().split(' | ')[0].split(' vs. ')

    messy_match_result = match_html.css_first('div.js-spoiler').text()
    match_result = ' ' + re_strip('',messy_match_result) + ' '

    print(index, '>> ',end='')
    print(match_sum:=match_result.join(match_name))
    print()
    
    date = match_html.css_first('div.match-header-date')
    print('\t'+re_strip(' ',date.text()))
    print()
    
    stage = match_html.css_first('div.match-header-event-series')
    print('\t'+re_strip(' ',stage.text()))
    print()
    
    ban_pick = match_html.css_first('div.match-header-note')
    for bp in re_strip(' ',ban_pick.text()).split(';'):
        print('\t'+bp.strip())
#     #scraping all tables from pages w/ css table class
#     tables = match_html.css('table.wf-table-inset')

    
#     for table in tables: # for all table in matches
#         df = pd.read_html(table.html)[0]
#         display(transform_subcolumns_df(df))


    maps = match_html.css('div.vm-stats-game')

    maps = [maps[1],maps[0],maps[2]]
    
    for m in maps:
        current_map = m.css_first('div.map')
        match_header = 'Overall' if not current_map else ' '.join(re_strip(' ',current_map.text()).split(' ')[::2])
        
        print(match_header)
        
        tables = m.css('table.wf-table-inset')
        
        for table in tables:
            df = pd.read_html(table.html)[0]
            display(extract_df(df))
        

0 >> KOI 0:2 NRG Esports

	Tuesday, February 14th 12:10 AM +07 Patch 6.02

	Bracket Stage: Alpha - Round of 16

	NRG ban Ascent
	KOI ban Split
	NRG pick Icebox
	KOI pick Haven
	NRG ban Fracture
	KOI ban Lotus
	Pearl remains
Overall


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,trexx,KOI,1.24,1.45,1.03,275,329,229,46,28,...,37,3,2,1,4,1,3,-1,1,-2
1,sheydos,KOI,0.95,1.16,0.75,158,188,135,27,18,...,25,2,1,1,4,1,3,-2,0,-2
2,starxo,KOI,0.76,0.79,0.72,123,123,124,17,9,...,13,0,0,0,6,1,5,-6,-1,-5
3,Wolfen,KOI,0.75,0.81,0.7,187,214,161,28,16,...,40,10,6,4,13,7,6,-3,-1,-2
4,koldamenta,KOI,0.72,0.77,0.64,110,115,103,17,8,...,20,1,1,0,3,2,1,-2,-1,-1


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,crashies,NRG,1.26,1.44,1.08,220,252,188,36,24,...,22,5,1,4,4,1,3,1,0,1
1,s0m,NRG,1.25,1.29,1.23,227,209,253,36,17,...,41,4,2,2,4,2,2,0,0,0
2,FNS,NRG,1.12,1.15,1.1,173,161,186,24,12,...,17,5,4,1,0,0,0,5,4,1
3,ardiis,NRG,1.08,1.27,0.88,190,217,159,27,16,...,12,10,8,2,4,1,3,6,7,-1
4,Victor,NRG,0.98,1.05,0.92,172,149,198,29,14,...,30,6,3,3,4,2,2,2,1,1


Icebox 1:12:59


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,trexx,KOI,1.24,1.72,0.77,304,391,218,27,18,...,36,2,1,1,1,1,0,1,0,1
1,sheydos,KOI,1.15,1.56,0.73,193,241,147,17,13,...,28,1,1,0,2,0,2,-1,1,-2
2,Wolfen,KOI,0.81,0.81,0.81,168,176,162,14,7,...,50,5,4,1,8,5,3,-3,-1,-2
3,starxo,KOI,0.73,1.0,0.46,134,149,119,10,6,...,20,0,0,0,4,0,4,-4,0,-4
4,koldamenta,KOI,0.59,1.08,0.09,73,134,12,5,5,...,0,0,0,0,1,0,1,-1,0,-1


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,s0m,NRG,1.42,1.77,1.07,295,353,238,24,15,...,29,4,2,2,0,0,0,4,2,2
1,crashies,NRG,1.27,1.66,0.87,244,325,163,22,16,...,16,3,1,2,2,0,2,1,1,0
2,Victor,NRG,1.0,1.23,0.76,161,181,143,14,9,...,26,2,2,0,3,2,1,-1,0,-1
3,FNS,NRG,1.0,1.21,0.8,165,158,174,12,6,...,14,4,3,1,0,0,0,4,3,1
4,ardiis,NRG,0.94,1.18,0.69,156,136,177,11,5,...,14,3,2,1,3,0,3,0,2,-2


Haven 53:43


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,trexx,KOI,1.24,1.19,1.29,246,254,240,19,10,...,38,1,1,0,3,0,3,-2,1,-3
1,koldamenta,KOI,0.85,0.45,1.19,147,92,194,12,3,...,21,1,1,0,2,2,0,-1,-1,0
2,starxo,KOI,0.8,0.59,0.98,112,92,130,7,3,...,6,0,0,0,2,1,1,-2,-1,-1
3,sheydos,KOI,0.76,0.76,0.77,123,125,123,10,5,...,19,1,0,1,2,1,1,-1,-1,0
4,Wolfen,KOI,0.68,0.8,0.59,205,260,160,14,9,...,33,5,2,3,5,2,3,0,0,0


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,crashies,NRG,1.24,1.21,1.28,196,178,218,14,8,...,31,2,0,2,2,1,1,0,-1,1
1,ardiis,NRG,1.23,1.36,1.06,224,299,136,16,11,...,8,7,6,1,1,1,0,6,5,1
2,FNS,NRG,1.23,1.09,1.4,180,163,202,12,6,...,22,1,1,0,0,0,0,1,1,0
3,s0m,NRG,1.07,0.81,1.4,158,66,270,12,2,...,59,0,0,0,4,2,2,-4,-2,-2
4,Victor,NRG,0.96,0.86,1.08,183,117,263,15,5,...,32,4,1,3,1,0,1,3,1,2


1 >> DetonatioN FocusMe 0:2 Giants Gaming

	Tuesday, February 14th 3:05 AM +07 Patch 6.02

	Bracket Stage: Alpha - Round of 16

	DFM ban Split
	GIA ban Fracture
	DFM pick Haven
	GIA pick Icebox
	DFM ban Lotus
	GIA ban Ascent
	Pearl remains
Overall


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,takej,DFM,1.0,1.11,0.93,197,200,195,13,6,...,64,2,2,0,1,0,1,1,2,-1
1,Reita,DFM,0.79,0.61,0.92,208,184,226,25,12,...,58,3,1,2,8,6,2,-5,-5,0
2,Suggest,DFM,0.72,0.6,0.87,155,131,183,21,10,...,29,4,2,2,5,2,3,-1,0,-1
3,Anthem,DFM,0.71,0.89,0.5,159,188,131,19,12,...,42,3,0,3,2,2,0,1,-2,3
4,xnfri,DFM,0.66,0.66,0.51,151,158,151,19,11,...,14,1,1,0,2,0,2,-1,1,-2
5,Seoldam,DFM,0.27,0.37,0.05,129,153,74,7,6,...,27,2,2,0,4,2,2,-2,0,-2


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,Cloud,GIA,1.58,1.71,1.59,287,250,306,42,18,...,43,4,3,1,1,0,1,3,3,0
1,nukkye,GIA,1.41,1.27,1.49,233,223,245,32,15,...,35,5,1,4,4,2,2,1,-1,2
2,rhyme,GIA,1.32,1.27,1.33,201,180,218,28,11,...,35,3,1,2,4,2,2,-1,-1,0
3,hoody,GIA,1.25,1.15,1.39,208,202,219,27,13,...,16,5,3,2,2,1,1,3,2,1
4,Fit1nho,GIA,0.97,1.18,0.73,161,224,108,20,12,...,13,5,2,3,4,2,2,1,0,1


Haven 50:48


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,Reita,DFM,0.8,0.79,0.84,263,247,303,15,10,...,46,2,1,1,3,3,0,-1,-2,1
1,Suggest,DFM,0.75,0.67,0.94,150,107,254,10,5,...,67,3,2,1,1,1,0,2,1,1
2,xnfri,DFM,0.58,0.75,0.17,102,118,65,7,6,...,20,0,0,0,1,0,1,-1,0,-1
3,Anthem,DFM,0.47,0.59,0.21,128,155,64,6,5,...,50,0,0,0,1,1,0,-1,-1,0
4,Seoldam,DFM,0.27,0.37,0.05,129,153,74,7,6,...,27,2,2,0,4,2,2,-2,0,-2


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,Cloud,GIA,1.96,2.35,1.8,372,389,366,26,9,...,37,3,2,1,1,0,1,2,2,0
1,nukkye,GIA,1.52,1.3,1.61,216,188,228,12,3,...,35,1,0,1,2,1,1,-1,-1,0
2,rhyme,GIA,1.45,1.33,1.5,204,204,204,13,3,...,35,1,0,1,1,0,1,0,0,0
3,hoody,GIA,1.2,1.21,1.2,178,199,171,10,4,...,10,2,1,1,1,1,0,1,0,1
4,Fit1nho,GIA,1.01,1.14,0.96,159,168,156,10,3,...,8,3,0,3,2,0,2,1,0,1


Icebox 51:43


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,takej,DFM,1.0,1.11,0.93,197,200,195,13,6,...,64,2,2,0,1,0,1,1,2,-1
1,Anthem,DFM,0.95,1.19,0.79,190,238,159,13,7,...,41,3,0,3,1,1,0,2,-1,3
2,Reita,DFM,0.77,0.44,0.99,152,91,194,10,2,...,73,1,0,1,5,3,2,-4,-3,-1
3,xnfri,DFM,0.74,0.57,0.85,200,220,187,12,5,...,12,1,1,0,1,0,1,0,1,-1
4,Suggest,DFM,0.69,0.53,0.79,159,169,153,11,5,...,17,1,0,1,4,1,3,-3,-1,-2


Type,Name,Team,R,R,R,ACS,ACS,ACS,K,K,...,HS%,FK,FK,FK,FD,FD,FD,+/–.1,+/–.1,+/–.1
Side,Name,Team,all,atk,def,all,atk,def,all,atk,...,def,all,atk,def,all,atk,def,all,atk,def
0,nukkye,GIA,1.3,1.24,1.37,249,237,269,20,12,...,35,4,1,3,2,1,1,2,0,2
1,hoody,GIA,1.3,1.1,1.59,238,203,291,17,9,...,28,3,2,1,1,0,1,2,2,0
2,rhyme,GIA,1.2,1.22,1.17,198,170,240,15,8,...,35,2,1,1,3,2,1,-1,-1,0
3,Cloud,GIA,1.19,1.06,1.39,202,193,217,16,9,...,67,1,1,0,0,0,0,1,1,0
4,Fit1nho,GIA,0.93,1.22,0.5,162,247,37,10,9,...,50,2,2,0,2,2,0,0,0,0


# OOP Design

In [None]:
class Event:
    def __init__(self,url):
        self.url = url
        # self.dates -> str(maybe pd.datatime)
        # self.players -> List[Player]
        # self.prize_pool -> int,float
        # self.region -> str
        # self.matches -> List[Match]

In [None]:
class Match:
    def __init__(self,html):
        # self.date -> str
        # self.stage -> str
        # self.maps -> List[Map]
        # self.winner -> Team
        