In [2]:
!pip install requests
!pip install bs4
!pip install lxml
!pip install pandas



In [1]:
# import libraries
import os
import pandas as pd
import lxml
import requests  
from bs4 import BeautifulSoup
from scraper import *



# scraping of the performance info

In [2]:
from scraper import *
from utils import *

list_url = ['https://www.vlr.gg/318937/karmine-corp-vs-team-heretics-champions-tour-2024-emea-stage-1-w5']


In [4]:
def general_data_scraper(list_url):
    """
    Function that extract the general data from a vlr.gg url (page of a match between two teams)
    The general data aims the overview display in the vlr.gg page and gather the data displayed in the tables in a textual form

    parameter:
        list_url : list of url representing the different matches
    
    return:
        result : a dataframe of all the extracted data flatten
    """

    match_stats = []

    for matchnum in range(len(list_url)):

        url = list_url[matchnum]

        unique_match_id = url.split('/')[3]

        source_match = requests.get(url=url).text
        soup_match = BeautifulSoup(source_match, features="html.parser")
        
        table_match = soup_match.findAll('table', {'class':'wf-table-inset mod-overview'})

        stage = soup_match.findAll('div', {'class':'match-header-event-series'})[0].text.strip().split(":", 1)[0]

        series = soup_match.findAll('div', {'class':'match-header-event-series'})[0].text.strip().split("\n", 1)[1].strip()

        try:
            winner = soup_match.findAll('div', {'class':'match-bet-item-team'})[0].text.strip().split("\n")[2].strip()

            rounds = soup_match.findAll('div', {'class':'vlr-rounds'})
        
            for a in [a for a in list(range(len(table_match))) if a not in [2,3]]:
                if a in [0,1]:
                    map_num = 1
                else:
                    map_num = a//2
                
                # need to check if this is a BO1 for map name
                bo_x = soup_match.findAll('div', {'class':'match-header-vs-note'})[1].text.strip()

                if "1" in bo_x:
                    map_name = soup_match.findAll('div', {'class':'map'})[0].text.strip().split('\n')[0].strip()
                else:
                    map_name = soup_match.findAll('div', {'class':'vm-stats-gamesnav-item js-map-switch'})[map_num-1].text.strip()[1:].strip()

                stage = soup_match.findAll('div', {'class':'match-header-event-series'})[0].text.strip().split(":", 1)[0]
                team_name = soup_match.findAll('div', {'class':'wf-title-med'})[a % 2].text.strip()
                round_played = rounds[map_num-1]
                
                table = table_match[a]
                headers_match = []
                
                for i in table.find_all('th'):
                    title_match = i.text.strip()
                    headers_match.append(title_match)
                
                print(headers_match)
                    
                headers_match[0] = 'Player Name'
                headers_match[1] = 'Agent Name'
                headers_match[6] = 'K/D +/–'   
                headers_match[12] = 'FK/FD +/–'   
                df_match = pd.DataFrame(columns=headers_match)
                
                for row in table.find_all('tr')[1:]:
                    data = row.find_all('td')
                    row_data = [td.text.strip() for td in data]
                    row_data[1] = str(data[1]).split('title="',1)[1].split('"',1)[0].title()
                    length = len(df_match)
                    df_match.loc[length] = row_data
                
                df_match['Team Name'] = team_name
                df_match['Map Name'] = map_name
                df_match['Map #'] = map_num
                df_match['Id'] = unique_match_id
                df_match['Unique Enum'] = unique_match_id + str(map_num)
                df_match['Stage'] = stage
                df_match['Series'] = series
                df_match['winner'] = winner

                try:
                    all_rounds = round_played.find_all('div', class_='vlr-rounds-row-col')
                    scoring_one_by_one_for_all = []
                    for i,item in enumerate(all_rounds):
                        try:
                            title_value = item['title']
                            if len(title_value) > 0:
                                scoring_one_by_one_for_all.append(title_value)
                        except:
                            continue
                    
                    scoring_round_per_team = reorganize_rounds_based_on_titles(scoring_one_by_one_for_all)
                    df_match['rounds'] = ', '.join(map(str, scoring_round_per_team[a%2]))
                except:
                    continue
                
                match_stats.append(df_match)

        except NameError:
            print(f'Matches on the {series}, {stage} are not played yet or')
            print(NameError)
    try:
        result = pd.concat(match_stats).reset_index(drop=True)
        result['Player Name'] = result['Player Name'].str.split("\n").str[0].str.strip()
        result['D'] = result['D'].str[1:-1]
        result = result.apply(pd.to_numeric, errors='ignore')
        result.drop('KAST', axis=1, inplace = True)
    except:
        result = None
        
    # result = create_id_column(result)
    return result

In [5]:
t = general_data_scraper(list_url)

['', '', 'R', 'ACS', 'K', 'D', 'A', '+/–', 'KAST', 'ADR', 'HS%', 'FK', 'FD', '+/–']


In [1]:
from scraper import *

main("https://www.vlr.gg/event/matches/1924/champions-tour-2024-pacific-kickoff/?series_id=all")

Get all the matches...
Fetching General Data...
Fetching Performance Data...
Fetching Economy Data...
Fetching Picks and Bans Data...
Saving the data...
DataFrame saved as CSV file: champions-tour-2024-pacific-kickoff_data\general_data_champions-tour-2024-pacific-kickoff.csv
DataFrame saved as CSV file: champions-tour-2024-pacific-kickoff_data\performance_data_champions-tour-2024-pacific-kickoff.csv
DataFrame saved as CSV file: champions-tour-2024-pacific-kickoff_data\economy_data_champions-tour-2024-pacific-kickoff.csv
DataFrame saved as CSV file: champions-tour-2024-pacific-kickoff_data\pick_ban_data_champions-tour-2024-pacific-kickoff.csv


0

In [6]:
result.head()

Unnamed: 0,Stage,Series,Team Name,Bans,Picks,Decider
0,Group Stage,Decider (C),M8,"[Bind, Ascent]",[Breeze],[Sunset]
1,Group Stage,Decider (C),VIT,"[Icebox, Split]",[Lotus],[Sunset]
2,Play-Ins,Group B,GTZ,"[Lotus, Split, Haven]",[],[Pearl]
3,Play-Ins,Group B,MOUZ,"[Fracture, Ascent, Bind]",[],[Pearl]
