### **Install Dependencies**

In [1]:
# !pip install -r requirements.txt

### **Actual Scraping**

In [2]:
from ExtractMatchData._functions.get_matches_url import get_matches_url
from ExtractMatchData._functions.get_match_id import get_match_ids
from ExtractMatchData._functions.get_request import get_request
from ExtractMatchData._functions.download_files import download_files
from ExtractMatchData._functions.unrar import unrar
from ExtractMatchData._functions.extrair_dados import run_csda_on_demos
from ExtractMatchData._ETL.merge_files import merge_csv_files
from ExtractMatchData._functions.chromelib import pd
import os

# -------------------------------------------------------------------------
#   The ID of each event can be found in HLTV in either of the links below
#   For example: 
#   https://www.hltv.org/events/7909/blast-bounty-2025-season-1-finals
#   https://www.hltv.org/results?event=7909
# -------------------------------------------------------------------------

# id_event = [ 7909 ]
id_event = [ 8043, 8034, 8229, 7909 ]

root = os.path.join(os.getcwd(), 'ExtractMatchData') # Get the root directory

# Gets the URLs from all the matches in the event and other info into a pandas dataframe
tournaments_df = get_matches_url(*id_event, root=root)

# Get an ID for each match used in a url
tournaments_df = get_match_ids(tournaments_df)

# Makes a get request to fetch the direct links to download the demos
tournaments_df = get_request(tournaments_df)

# Makes the request to fetch the direct link to download the demos
tournaments_df = download_files(tournaments_df)

unrar(tournaments_df)

tournaments_df = run_csda_on_demos(tournaments_df)

match_data = merge_csv_files(tournaments_df)

matches_df = match_data['_match']
teams_df = match_data['_teams']
kills_df = match_data['_kills']
players_df = match_data['_players']
players_economy_df = match_data['_players_economy']
clutches_df = match_data['_clutches']

tournaments_df['event_id'] = tournaments_df['url_event'].str.replace('https://www.hltv.org/results?event=', '')
tournaments_df.rename(columns={'url': 'url_match'}, inplace=True)

output_dir = os.path.join(os.getcwd(), "ExtractMatchData" ,"tournaments_tables")
os.makedirs(output_dir, exist_ok=True)

dataframes = {
    'tournaments': tournaments_df,
    'matches': matches_df,
    'teams': teams_df, 
    'kills': kills_df,
    'players': players_df,
    'players_economy': players_economy_df,
    'clutches': clutches_df
}

for name, df in dataframes.items():
    print(f"\n{name}:")
    display(df.head(5))
    output_path = os.path.join(output_dir, f"{name}.csv")
    df.to_csv(output_path, index=False)
    print(f"Saved {output_path}")

---------------------------------------------
       MATCHUPS CAN BE Bo1, Bo3 or Bo5
---------------------------------------------

Event: PGL Cluj-Napoca 2025
Number of matchups: 41

Event: IEM Katowice 2025
Number of matchups: 29

Event: IEM Katowice 2025 Play-in
Number of matchups: 20

Event: BLAST Bounty 2025 Season 1 Finals
Number of matchups: 7


Total number of matchups: 97
---------------------------------------------


Fetching Match IDs : 100%|███████████████████████████████████████████████████████████| 97/97 [22:22]
Downloading .rar   : 100%|███████████████████████████████████████████████████████████| 97/97 [15:00]
Exctracting .rar   : 100%|███████████████████████████████████████████████████████████| 97/97 [11:04]



---------------------------------------------

Number of games: 18


Processing BLAST Bounty 2025 Season 1 Finals: 100%|██████████████████████████████████| 18/18 [03:11]



Number of games: 87


Processing IEM Katowice 2025: 100%|██████████████████████████████████████████████████| 87/87 [12:55]



Number of games: 52


Processing IEM Katowice 2025 Play-in: 100%|██████████████████████████████████████████| 52/52 [07:51]



Number of games: 96


Processing PGL Cluj-Napoca 2025: 100%|███████████████████████████████████████████████| 96/96 [16:40]



tournaments:


Unnamed: 0,url_match,url_event,tournament,nspc_tournament,match_id,match_format,match_date,team_1,team_1_result,team_2,team_2_result,event_id,demo_path,output_data_path
0,https://www.hltv.org/matches/2379366/mouz-vs-f...,https://www.hltv.org/results?event=8043,PGL Cluj-Napoca 2025,pgl-cluj-napoca-2025,94567,Bo5,2025-02-23,MOUZ,3,Falcons,1,8043,c:\Projects\cs2-match-and-player-scraper\Extra...,c:\Projects\cs2-match-and-player-scraper\Extra...
1,https://www.hltv.org/matches/2379365/astralis-...,https://www.hltv.org/results?event=8043,PGL Cluj-Napoca 2025,pgl-cluj-napoca-2025,94558,Bo3,2025-02-23,Astralis,2,FaZe,1,8043,c:\Projects\cs2-match-and-player-scraper\Extra...,c:\Projects\cs2-match-and-player-scraper\Extra...
2,https://www.hltv.org/matches/2379364/falcons-v...,https://www.hltv.org/results?event=8043,PGL Cluj-Napoca 2025,pgl-cluj-napoca-2025,94531,Bo3,2025-02-22,Falcons,2,FaZe,0,8043,c:\Projects\cs2-match-and-player-scraper\Extra...,c:\Projects\cs2-match-and-player-scraper\Extra...
3,https://www.hltv.org/matches/2379363/mouz-vs-a...,https://www.hltv.org/results?event=8043,PGL Cluj-Napoca 2025,pgl-cluj-napoca-2025,94527,Bo3,2025-02-22,MOUZ,2,Astralis,1,8043,c:\Projects\cs2-match-and-player-scraper\Extra...,c:\Projects\cs2-match-and-player-scraper\Extra...
4,https://www.hltv.org/matches/2379362/faze-vs-s...,https://www.hltv.org/results?event=8043,PGL Cluj-Napoca 2025,pgl-cluj-napoca-2025,94505,Bo3,2025-02-21,FaZe,2,SAW,0,8043,c:\Projects\cs2-match-and-player-scraper\Extra...,c:\Projects\cs2-match-and-player-scraper\Extra...


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\tournaments.csv

matches:


Unnamed: 0,checksum,date,source,map,kill_count,assist_count,death_count,tournament
0,59e5b2c76e05bce0,2025-01-26,esl,de_anubis,204,72,207,BLAST Bounty 2025 Season 1 Finals
1,2a4c3f36d8d46afa,2025-01-26,esl,de_nuke,130,42,133,BLAST Bounty 2025 Season 1 Finals
2,d8b7e0bddd1b3eb3,2025-01-26,esl,de_dust2,110,45,110,BLAST Bounty 2025 Season 1 Finals
3,9cb48de6a17ebe1,2025-01-26,esl,de_mirage,147,42,149,BLAST Bounty 2025 Season 1 Finals
4,f101e58241ef725a,2025-01-23,esl,de_anubis,107,38,108,BLAST Bounty 2025 Season 1 Finals


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\matches.csv

teams:


Unnamed: 0,name,team,match_checksum,tournament
0,Team Spirit,Team A,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
1,Eternal Fire,Team B,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
2,Eternal Fire,Team A,2a4c3f36d8d46afa,BLAST Bounty 2025 Season 1 Finals
3,Team Spirit,Team B,2a4c3f36d8d46afa,BLAST Bounty 2025 Season 1 Finals
4,Team Spirit,Team A,d8b7e0bddd1b3eb3,BLAST Bounty 2025 Season 1 Finals


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\teams.csv

kills:


Unnamed: 0,killer_name,killer_steamid,killer_team_name,victim_name,victim_steamid,victim_side,victim_team_name,weapon_name,headshot,is_trade_kill,match_checksum,tournament
0,chopper,76561198045898864,Team Spirit,jottAAA,76561198410750263,Terrorist,Eternal Fire,USP-S,1,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
1,MAJ3R,76561197967432889,Eternal Fire,zont1x,76561198995880877,Counter Terrorist,Team Spirit,Glock-18,1,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
2,sh1ro,76561198081484775,Team Spirit,XANTARES,76561198044118796,Terrorist,Eternal Fire,Dual Berettas,1,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
3,Wicadia,76561198812513923,Eternal Fire,sh1ro,76561198081484775,Counter Terrorist,Team Spirit,Glock-18,1,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
4,chopper,76561198045898864,Team Spirit,MAJ3R,76561197967432889,Terrorist,Eternal Fire,USP-S,1,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\kills.csv

players:


Unnamed: 0,name,steamid,team_name,kills,assists,deaths,headshots,hs_%,kd,kast,...,1k,2k,3k,4k,5k,htlv_2,htlv,crosshair_share_code,match_checksum,tournament
0,donk,76561198386265483,Team Spirit,16,7,24,9,56,0.666667,76.666664,...,9,2,1,0,0,0.866753,0.705846,CSGO-EvvTA-D6U88-mXTHk-acm3G-bkMHA,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
1,zont1x,76561198995880877,Team Spirit,24,9,22,7,29,1.090909,76.666664,...,9,4,1,1,0,1.238397,1.137852,CSGO-YRxPX-pKo3w-qZ2eU-GFa9n-biaAB,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
2,Wicadia,76561198812513923,Eternal Fire,20,4,22,10,50,0.909091,56.666668,...,6,4,2,0,0,0.884602,0.968446,CSGO-6pOpF-xdjGY-EF69i-LRLJ6-OAtKG,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
3,sh1ro,76561198081484775,Team Spirit,23,7,16,5,21,1.4375,86.666664,...,11,5,1,0,0,1.349885,1.186563,CSGO-9vc2t-THGUY-Sf8QP-hMYOK-HzQRO,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
4,XANTARES,76561198044118796,Eternal Fire,23,10,21,14,60,1.095238,76.666664,...,8,4,1,1,0,1.247204,1.137264,CSGO-Zwh2K-kGAt3-9txk7-rqbm6-8LhWA,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\players.csv

players_economy:


Unnamed: 0,steamid,name,player_side,equipment_value,type,round,match_checksum,tournament
0,76561197967432889,MAJ3R,Terrorist,850,pistol,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
1,76561198045898864,chopper,Counter Terrorist,400,pistol,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
2,76561198995880877,zont1x,Counter Terrorist,1150,pistol,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
3,76561198812513923,Wicadia,Terrorist,1150,pistol,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
4,76561199063238565,magixx,Counter Terrorist,900,pistol,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\players_economy.csv

clutches:


Unnamed: 0,round,won,steamid,name,survived,kill_count,match_checksum,tournament
0,1,0,76561199063238565,magixx,0,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
1,2,0,76561198081484775,sh1ro,0,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
2,3,0,76561198083485506,woxic,1,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
3,4,0,76561198083485506,woxic,1,0,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals
4,5,0,76561198812513923,Wicadia,0,1,59e5b2c76e05bce0,BLAST Bounty 2025 Season 1 Finals


Saved c:\Projects\cs2-match-and-player-scraper\ExtractMatchData\tournaments_tables\clutches.csv


In [None]:
display(tournaments_df.head(5))

In [9]:
%pip install PyMySQL[rsa]

Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import pandas as pd
# Get list of CSV files in tournaments_tables directory
csv_dir = os.path.join(os.getcwd(), "ExtractMatchData", "tournaments_tables")
csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

# Dictionary to store column names for each table
table_columns = {}

# Read column names from each CSV file
for csv_file in csv_files:
    # Get table name without .csv extension
    table_name = csv_file.replace('.csv', '')
    
    # Read CSV file
    df = pd.read_csv(os.path.join(csv_dir, csv_file))
    
    # Store column names
    table_columns[table_name] = list(df.columns)
    
    print(f"\nColumns in {table_name}:")
    print(table_columns[table_name])



Columns in clutches:
['round', 'won', 'steamid', 'name', 'survived', 'kill_count', 'match_checksum', 'tournament']

Columns in kills:
['killer_name', 'killer_steamid', 'killer_team_name', 'victim_name', 'victim_steamid', 'victim_side', 'victim_team_name', 'weapon_name', 'headshot', 'is_trade_kill', 'match_checksum', 'tournament']

Columns in matches:
['checksum', 'date', 'source', 'map', 'kill_count', 'assist_count', 'death_count', 'tournament']

Columns in players:
['name', 'steamid', 'team_name', 'kills', 'assists', 'deaths', 'headshots', 'hs_%', 'kd', 'kast', 'avg_damages_per_round', 'avg_kills_per_round', 'avg_death_per_round', 'utility_damage_per_round', 'win_count', 'health_damage', 'armor_damage', 'utility_damage', 'first_kill', 'first_death', 'trade_kill', 'trade_death', 'first_trade_kill', 'first_trade_death', '1k', '2k', '3k', '4k', '5k', 'htlv_2', 'htlv', 'crosshair_share_code', 'match_checksum', 'tournament']

Columns in players_economy:
['steamid', 'name', 'player_side', 

In [8]:
import pymysql
import pandas as pd
import os
import numpy as np

# Database connection parameters
db_params = {
    'host': 'localhost',
    'user': 'root', 
    'password': 'root'
}

# Create database connection without specifying database
connection = pymysql.connect(**db_params)
cursor = connection.cursor()

try:
    # Create database if it doesn't exist
    cursor.execute("CREATE DATABASE IF NOT EXISTS dbCS2")
    cursor.execute("USE dbCS2")
    
    # Create tables for each CSV file
    for table_name, columns in table_columns.items():
        # Generate column definitions
        column_defs = []
        for col in columns:
            # Handle special column types
            if 'date' in col.lower():
                col_type = 'DATE'
            elif any(num in col.lower() for num in ['count', 'score', 'tick', 'round']):
                col_type = 'INT'
            elif any(float_col in col.lower() for float_col in ['rate', 'kd']):
                col_type = 'FLOAT'
            else:
                col_type = 'VARCHAR(255)'
            
            column_defs.append(f"`{col}` {col_type}")
        
        # Create table
        create_table_sql = f"""
        CREATE TABLE IF NOT EXISTS `{table_name}` (
            {', '.join(column_defs)}
        )
        """
        cursor.execute(create_table_sql)
        
        # Read CSV file
        csv_path = os.path.join(csv_dir, f"{table_name}.csv")
        if os.path.exists(csv_path):
            df = pd.read_csv(csv_path)
            
            # Replace NaN values with None for MySQL compatibility
            df = df.replace({np.nan: None})
            
            # Insert data row by row using pymysql
            columns_str = '`, `'.join(df.columns)
            placeholders = ', '.join(['%s'] * len(df.columns))
            insert_sql = f"INSERT INTO `{table_name}` (`{columns_str}`) VALUES ({placeholders})"
            
            values = [tuple(row) for row in df.values]
            cursor.executemany(insert_sql, values)
            
            print(f"Created and populated table: {table_name}")

    connection.commit()
    print("\nAll tables created and populated successfully!")

except Exception as e:
    print(f"An error occurred: {str(e)}")
    connection.rollback()

finally:
    cursor.close()
    connection.close()


Created and populated table: clutches
Created and populated table: kills
Created and populated table: matches
An error occurred: not enough arguments for format string
