In [113]:
import polars as pl
import sys
from pathlib import Path
import numpy as np
import pandas as pd
from collections import defaultdict
import warnings

warnings.filterwarnings("ignore")

sys.path.insert(0, str(Path.cwd().parent / "src"))

from db.config import get_database_url

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    roc_curve,
    auc,
    precision_recall_curve,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
)

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

In [114]:
# Cada linha
# Winner 0 = t1 win
# Winner 1 = t2 win
# History_Size: amount of previous matches to consider for average opening kills/deaths
# t1_id, t1_avg_opening_kills, t1_avg_opening_deaths, t2_id, t2_avg_opening_kills, t2_avg_opening_deaths, t1_old1_match_avg_opening, winner

In [115]:
db_url = get_database_url(make_ipv6_in_bracket=True)

player_map_stats = pl.read_database_uri(
    """
    SELECT * FROM player_map_stats JOIN map_stats USING(map_stat_id) JOIN matches USING (match_id)
                                        """,
    db_url,
)
player_map_stats

match_id,map_stat_id,player_id,team_id,opening_kills_ct,opening_deaths_ct,multikills_ct,kast_ct,clutches_ct,kills_ct,headshot_kills_ct,assists_ct,flash_assists_ct,deaths_ct,traded_deaths_ct,adr_ct,swing_ct,rating_3_dot_0_ct,opening_kills_tr,opening_deaths_tr,multikills_tr,kast_tr,clutches_tr,kills_tr,headshot_kills_tr,assists_tr,flash_assists_tr,deaths_tr,traded_deaths_tr,adr_tr,swing_tr,rating_3_dot_0_tr,map_name,team_1_score,team_2_score,team_1_overtime_score,team_2_overtime_score,team_1_ct_score,team_1_tr_score,team_2_ct_score,team_2_tr_score,picked_by,starting_ct,event_id,match_date,team_1_id,team_2_id,team_1_map_score,team_2_map_score,team_winner_id
i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]",i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]","decimal[38,10]","decimal[38,10]",i32,i32,i32,"decimal[38,10]",i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]","decimal[38,10]","decimal[38,10]",str,i32,i32,i32,i32,i32,i32,i32,i32,str,str,i32,"datetime[μs, UTC]",i32,i32,i32,i32,i32
2388750,215707,19617,11446,1,0,4,80.0000000000,1,17,5,2,1,6,0,151.4000000000,17.2300000000,3.0000000000,1,1,4,75.0000000000,0,12,6,5,1,6,2,107.9000000000,10.0900000000,1.9700000000,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602
2388750,215707,22649,11446,1,5,2,60.0000000000,0,5,2,2,0,8,1,52.0000000000,-6.8300000000,0.5500000000,2,1,3,91.7000000000,1,12,4,3,0,6,3,95.0000000000,5.5900000000,1.5700000000,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602
2388750,215707,21065,11446,0,0,1,80.0000000000,0,7,1,2,0,8,2,72.7000000000,-4.7600000000,0.7200000000,1,1,2,83.3000000000,0,9,1,3,0,5,1,70.1000000000,2.4500000000,1.2000000000,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602
2388750,215707,21651,11446,0,3,0,60.0000000000,0,2,2,3,0,9,2,39.5000000000,-4.4000000000,0.4000000000,0,2,1,75.0000000000,0,9,5,2,0,8,1,81.9000000000,-1.4200000000,1.0500000000,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602
2388750,215707,21650,11446,0,0,0,60.0000000000,0,4,2,0,0,10,3,41.7000000000,-5.1700000000,0.5400000000,1,2,1,83.3000000000,0,6,1,3,0,6,1,54.4000000000,1.1900000000,0.9800000000,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2388696,215735,7528,4991,0,1,2,91.7000000000,1,9,4,9,0,6,2,88.8000000000,0.4100000000,1.1500000000,0,0,3,90.0000000000,0,10,4,5,0,7,2,110.8000000000,-0.6900000000,1.6700000000,"""Train""",9,13,,,4,5,8,5,"""leftover""","""team_1""",8886,2025-12-19 11:15:00 UTC,11176,4991,1,2,4991
2388696,215735,23046,4991,1,0,5,91.7000000000,0,15,5,1,0,5,0,95.7000000000,8.6900000000,1.7100000000,2,0,2,90.0000000000,0,9,6,8,4,6,1,83.3000000000,5.4700000000,1.5300000000,"""Train""",9,13,,,4,5,8,5,"""leftover""","""team_1""",8886,2025-12-19 11:15:00 UTC,11176,4991,1,2,4991
2388696,215735,17803,4991,0,4,0,83.3000000000,0,4,2,7,0,7,1,67.7000000000,-6.3900000000,0.5900000000,0,2,2,70.0000000000,0,6,2,4,2,8,2,61.1000000000,-3.4000000000,1.0100000000,"""Train""",9,13,,,4,5,8,5,"""leftover""","""team_1""",8886,2025-12-19 11:15:00 UTC,11176,4991,1,2,4991
2388696,215735,23414,4991,2,1,4,83.3000000000,0,12,5,6,0,8,2,118.2000000000,3.1800000000,1.3400000000,3,0,2,60.0000000000,0,8,5,2,0,9,2,94.2000000000,-3.2400000000,0.9600000000,"""Train""",9,13,,,4,5,8,5,"""leftover""","""team_1""",8886,2025-12-19 11:15:00 UTC,11176,4991,1,2,4991


para cada player, agregando por time e mapa
estatisca do time naquele mapa terao as seguintes metricas agregadas tanto para a soma de ct e tr
stats dos players: avg(), median(), 25_percentil(), 75_percentil(), std() 
kills, clutches, headshot, flash, assist, deaths, traded_deaths, adr, swing, rating_3_dot_0, opening_kills, opening_deaths, multikills, kast

In [116]:
# List of stats to aggregate
stat_columns = [
    'kills', 'clutches', 'headshot', 'flash', 'assist', 'deaths', 
    'traded_deaths', 'adr', 'swing', 'rating_3_dot_0', 
    'opening_kills', 'opening_deaths', 'multikills', 'kast'
]

# Map column names from database to desired names
column_mapping = {
    'headshot_kills': 'headshot',
    'flash_assists': 'flash',
    'assists': 'assist'
}

# Create combined CT + TR columns for each stat
for stat in stat_columns:
    # Map the column name if needed
    if stat in column_mapping.values():
        # Find the original name
        original_stat = [k for k, v in column_mapping.items() if v == stat][0]
        ct_col = f"{original_stat}_ct"
        tr_col = f"{original_stat}_tr"
    else:
        ct_col = f"{stat}_ct"
        tr_col = f"{stat}_tr"
    
    # Sum CT and TR values
    player_map_stats = player_map_stats.with_columns(
        (pl.col(ct_col) + pl.col(tr_col)).alias(stat)
    )

player_map_stats.head()

match_id,map_stat_id,player_id,team_id,opening_kills_ct,opening_deaths_ct,multikills_ct,kast_ct,clutches_ct,kills_ct,headshot_kills_ct,assists_ct,flash_assists_ct,deaths_ct,traded_deaths_ct,adr_ct,swing_ct,rating_3_dot_0_ct,opening_kills_tr,opening_deaths_tr,multikills_tr,kast_tr,clutches_tr,kills_tr,headshot_kills_tr,assists_tr,flash_assists_tr,deaths_tr,traded_deaths_tr,adr_tr,swing_tr,rating_3_dot_0_tr,map_name,team_1_score,team_2_score,team_1_overtime_score,team_2_overtime_score,team_1_ct_score,team_1_tr_score,team_2_ct_score,team_2_tr_score,picked_by,starting_ct,event_id,match_date,team_1_id,team_2_id,team_1_map_score,team_2_map_score,team_winner_id,kills,clutches,headshot,flash,assist,deaths,traded_deaths,adr,swing,rating_3_dot_0,opening_kills,opening_deaths,multikills,kast
i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]",i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]","decimal[38,10]","decimal[38,10]",i32,i32,i32,"decimal[38,10]",i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]","decimal[38,10]","decimal[38,10]",str,i32,i32,i32,i32,i32,i32,i32,i32,str,str,i32,"datetime[μs, UTC]",i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,"decimal[38,10]","decimal[38,10]","decimal[38,10]",i32,i32,i32,"decimal[38,10]"
2388750,215707,19617,11446,1,0,4,80.0,1,17,5,2,1,6,0,151.4,17.23,3.0,1,1,4,75.0,0,12,6,5,1,6,2,107.9,10.09,1.97,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602,29,1,11,2,7,12,2,259.3,27.32,4.97,2,1,8,155.0
2388750,215707,22649,11446,1,5,2,60.0,0,5,2,2,0,8,1,52.0,-6.83,0.55,2,1,3,91.7,1,12,4,3,0,6,3,95.0,5.59,1.57,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602,17,1,6,0,5,14,4,147.0,-1.24,2.12,3,6,5,151.7
2388750,215707,21065,11446,0,0,1,80.0,0,7,1,2,0,8,2,72.7,-4.76,0.72,1,1,2,83.3,0,9,1,3,0,5,1,70.1,2.45,1.2,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602,16,0,2,0,5,13,3,142.8,-2.31,1.92,1,1,3,163.3
2388750,215707,21651,11446,0,3,0,60.0,0,2,2,3,0,9,2,39.5,-4.4,0.4,0,2,1,75.0,0,9,5,2,0,8,1,81.9,-1.42,1.05,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602,11,0,7,0,5,17,3,121.4,-5.82,1.45,0,5,1,135.0
2388750,215707,21650,11446,0,0,0,60.0,0,4,2,0,0,10,3,41.7,-5.17,0.54,1,2,1,83.3,0,6,1,3,0,6,1,54.4,1.19,0.98,"""Ancient""",13,9,,,3,6,9,4,"""team_1""","""team_2""",8895,2025-12-19 08:30:00 UTC,11446,4602,1,2,4602,10,0,3,0,3,16,4,96.1,-3.98,1.52,1,2,1,143.3


In [117]:
# Group by team_id and map_stat_id, then calculate aggregations
agg_expressions = []

for stat in stat_columns:
    agg_expressions.extend([
        pl.col(stat).mean().alias(f"{stat}_avg"),
        pl.col(stat).median().alias(f"{stat}_median"),
        pl.col(stat).quantile(0.25).alias(f"{stat}_p25"),
        pl.col(stat).quantile(0.75).alias(f"{stat}_p75"),
        pl.col(stat).std().alias(f"{stat}_std")
    ])

team_map_stats = player_map_stats.group_by(['team_id', 'map_stat_id']).agg(agg_expressions)

# Sort by team_id and map_stat_id for better readability
team_map_stats = team_map_stats.sort(['team_id', 'map_stat_id'])

print(f"Shape: {team_map_stats.shape}")
print(f"Columns: {len(team_map_stats.columns)}")
team_map_stats.head(10)

Shape: (158, 72)
Columns: 72


team_id,map_stat_id,kills_avg,kills_median,kills_p25,kills_p75,kills_std,clutches_avg,clutches_median,clutches_p25,clutches_p75,clutches_std,headshot_avg,headshot_median,headshot_p25,headshot_p75,headshot_std,flash_avg,flash_median,flash_p25,flash_p75,flash_std,assist_avg,assist_median,assist_p25,assist_p75,assist_std,deaths_avg,deaths_median,deaths_p25,deaths_p75,deaths_std,traded_deaths_avg,traded_deaths_median,traded_deaths_p25,traded_deaths_p75,traded_deaths_std,adr_avg,adr_median,adr_p25,adr_p75,adr_std,swing_avg,swing_median,swing_p25,swing_p75,swing_std,rating_3_dot_0_avg,rating_3_dot_0_median,rating_3_dot_0_p25,rating_3_dot_0_p75,rating_3_dot_0_std,opening_kills_avg,opening_kills_median,opening_kills_p25,opening_kills_p75,opening_kills_std,opening_deaths_avg,opening_deaths_median,opening_deaths_p25,opening_deaths_p75,opening_deaths_std,multikills_avg,multikills_median,multikills_p25,multikills_p75,multikills_std,kast_avg,kast_median,kast_p25,kast_p75,kast_std
i32,i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
4602,215707,14.4,15.0,11.0,16.0,4.037326,0.6,1.0,0.0,1.0,0.547723,7.2,8.0,5.0,8.0,2.774887,1.2,1.0,0.0,1.0,1.643168,6.8,7.0,6.0,7.0,1.48324,16.6,16.0,15.0,18.0,1.81659,2.6,3.0,1.0,4.0,2.073644,150.68,142.5,126.1,145.9,51.68996,-2.794,-3.91,-11.29,3.93,9.315392,2.016,1.87,1.49,2.34,0.715493,3.0,3.0,3.0,3.0,0.707107,1.4,1.0,0.0,2.0,1.67332,3.2,4.0,2.0,4.0,1.643168,138.02,136.7,130.0,146.7,14.274698
4602,215715,20.2,19.0,16.0,23.0,5.357238,1.2,1.0,1.0,2.0,0.83666,9.4,8.0,8.0,13.0,4.09878,0.4,0.0,0.0,1.0,0.547723,6.6,5.0,5.0,8.0,4.159327,22.6,22.0,22.0,24.0,1.341641,4.6,5.0,4.0,5.0,1.140175,152.08,157.7,121.4,172.9,31.87957,3.506,5.09,4.81,5.68,4.726947,2.22,2.23,2.06,2.41,0.44911,2.6,3.0,2.0,4.0,1.67332,3.2,4.0,2.0,4.0,1.643168,4.0,4.0,3.0,4.0,1.870829,138.94,131.4,130.4,144.3,21.184853
4602,215718,15.8,17.0,14.0,17.0,2.167948,0.4,0.0,0.0,1.0,0.547723,7.6,8.0,7.0,8.0,2.50998,1.0,0.0,0.0,1.0,1.732051,4.6,5.0,4.0,5.0,1.81659,12.6,12.0,12.0,14.0,1.341641,2.6,2.0,1.0,4.0,1.81659,161.04,165.5,151.3,168.7,18.387441,5.664,5.27,2.55,9.37,4.558517,2.468,2.42,2.25,2.75,0.384148,1.8,2.0,1.0,2.0,0.83666,2.2,2.0,2.0,2.0,0.447214,4.4,5.0,3.0,5.0,1.341641,149.16,150.0,137.5,162.5,13.639208
4602,215774,12.8,13.0,11.0,13.0,2.683282,0.2,0.0,0.0,0.0,0.447214,6.6,7.0,4.0,7.0,4.159327,1.6,1.0,0.0,2.0,2.073644,6.4,6.0,6.0,8.0,1.67332,6.8,6.0,5.0,9.0,2.04939,1.8,2.0,1.0,2.0,0.83666,179.26,199.9,124.3,211.9,58.770469,12.89,17.02,10.66,17.67,7.635011,3.352,3.44,2.89,3.67,0.989328,2.0,2.0,1.0,3.0,1.0,1.0,1.0,0.0,2.0,1.0,3.2,3.0,2.0,4.0,1.30384,171.68,175.0,158.3,191.7,21.739641
4602,215779,13.6,14.0,11.0,16.0,4.929503,0.2,0.0,0.0,0.0,0.447214,7.0,8.0,2.0,10.0,4.898979,1.4,1.0,0.0,2.0,1.67332,4.4,4.0,4.0,6.0,2.302173,10.6,12.0,9.0,12.0,2.50998,1.8,2.0,1.0,2.0,0.83666,160.9,161.2,128.0,184.3,56.44267,9.752,12.36,7.07,15.88,10.990381,2.834,2.83,2.7,3.34,1.120661,2.2,2.0,2.0,3.0,0.83666,1.4,1.0,0.0,1.0,2.073644,3.6,4.0,3.0,5.0,1.67332,150.0,141.7,141.7,183.3,34.840924
4869,215705,15.0,13.0,13.0,17.0,5.291503,0.4,0.0,0.0,1.0,0.547723,5.6,6.0,5.0,6.0,1.81659,0.2,0.0,0.0,0.0,0.447214,3.2,3.0,3.0,4.0,0.83666,16.4,17.0,16.0,17.0,1.516575,2.2,2.0,2.0,3.0,0.83666,144.8,130.8,117.8,162.3,47.139421,-2.37,-4.05,-7.16,2.3,7.07509,2.014,1.84,1.48,2.2,0.823456,2.6,2.0,1.0,5.0,2.302173,2.0,2.0,1.0,3.0,1.0,3.0,3.0,2.0,4.0,1.0,138.64,130.3,121.2,156.0,25.442936
4869,215713,13.4,13.0,12.0,15.0,1.516575,0.0,0.0,0.0,0.0,0.0,5.8,5.0,5.0,8.0,2.167948,1.4,1.0,1.0,2.0,1.140175,5.6,6.0,5.0,6.0,1.140175,14.0,15.0,13.0,16.0,2.54951,2.4,2.0,1.0,4.0,2.073644,135.58,147.3,144.8,150.0,29.662636,-3.89,-2.56,-3.47,-1.07,4.7517,1.898,1.94,1.93,1.98,0.231236,2.4,2.0,2.0,3.0,1.140175,2.0,2.0,0.0,4.0,2.0,3.0,3.0,3.0,3.0,0.707107,145.0,145.0,136.7,145.0,17.816986
4869,215720,10.2,8.0,8.0,13.0,4.764452,0.2,0.0,0.0,0.0,0.447214,3.8,3.0,3.0,3.0,3.03315,0.6,0.0,0.0,1.0,0.894427,4.8,5.0,3.0,6.0,2.387467,13.6,14.0,13.0,14.0,1.81659,2.4,2.0,2.0,4.0,1.67332,120.44,106.3,102.0,126.3,38.183085,-6.182,-6.34,-12.99,-3.29,9.06487,1.542,1.2,1.1,1.77,0.74978,1.6,1.0,0.0,3.0,1.81659,2.2,2.0,1.0,4.0,1.788854,2.0,1.0,1.0,3.0,1.414214,120.68,132.1,111.9,132.1,21.609535
4869,215725,10.8,12.0,10.0,13.0,3.563706,0.2,0.0,0.0,0.0,0.447214,4.8,6.0,4.0,6.0,3.03315,0.8,1.0,0.0,1.0,0.83666,4.2,4.0,3.0,6.0,1.788854,6.0,6.0,6.0,6.0,0.707107,2.8,3.0,2.0,3.0,0.83666,180.8,220.8,129.6,238.1,75.214659,17.784,24.99,11.41,27.05,13.098896,3.938,4.55,3.37,4.93,1.480716,1.6,2.0,1.0,2.0,0.547723,1.0,1.0,1.0,1.0,0.707107,2.4,3.0,1.0,4.0,1.81659,195.0,200.0,191.7,200.0,7.462908
4869,215732,16.0,15.0,15.0,18.0,2.44949,0.2,0.0,0.0,0.0,0.447214,8.0,7.0,7.0,8.0,1.732051,1.4,1.0,1.0,2.0,1.140175,7.4,7.0,7.0,8.0,0.547723,16.0,17.0,15.0,17.0,2.0,3.0,2.0,2.0,4.0,2.0,148.06,148.5,135.1,165.0,18.723862,-2.278,-3.1,-6.75,-0.76,9.68797,1.99,1.76,1.63,2.27,0.558435,2.6,2.0,2.0,3.0,0.894427,2.2,2.0,1.0,3.0,1.30384,3.4,4.0,2.0,5.0,1.81659,138.32,133.3,125.0,150.0,15.130003


## Create Match-Level Dataset

Now we'll create the final dataset where each row represents a match with:
- Team 1 and Team 2 IDs
- All statistics prefixed with `t1_` or `t2_`
- Winner column (0 = team 1 wins, 1 = team 2 wins)

In [118]:
# First, aggregate team statistics by map_stat_id (which is unique per map per match)
team_map_stats = player_map_stats.group_by(['team_id', 'map_stat_id']).agg(agg_expressions)

# Get match information (one row per map_stat_id)
match_info = player_map_stats.select([
    'map_stat_id',
    'match_id',
    'team_1_id',
    'team_2_id',
    'team_winner_id',
    'map_name',
    'match_date',
    'event_id'
]).unique()

print(f"Team stats shape: {team_map_stats.shape}")
print(f"Match info shape: {match_info.shape}")
team_map_stats.head()

Team stats shape: (158, 72)
Match info shape: (79, 8)


team_id,map_stat_id,kills_avg,kills_median,kills_p25,kills_p75,kills_std,clutches_avg,clutches_median,clutches_p25,clutches_p75,clutches_std,headshot_avg,headshot_median,headshot_p25,headshot_p75,headshot_std,flash_avg,flash_median,flash_p25,flash_p75,flash_std,assist_avg,assist_median,assist_p25,assist_p75,assist_std,deaths_avg,deaths_median,deaths_p25,deaths_p75,deaths_std,traded_deaths_avg,traded_deaths_median,traded_deaths_p25,traded_deaths_p75,traded_deaths_std,adr_avg,adr_median,adr_p25,adr_p75,adr_std,swing_avg,swing_median,swing_p25,swing_p75,swing_std,rating_3_dot_0_avg,rating_3_dot_0_median,rating_3_dot_0_p25,rating_3_dot_0_p75,rating_3_dot_0_std,opening_kills_avg,opening_kills_median,opening_kills_p25,opening_kills_p75,opening_kills_std,opening_deaths_avg,opening_deaths_median,opening_deaths_p25,opening_deaths_p75,opening_deaths_std,multikills_avg,multikills_median,multikills_p25,multikills_p75,multikills_std,kast_avg,kast_median,kast_p25,kast_p75,kast_std
i32,i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
12485,215740,16.0,15.0,10.0,20.0,7.106335,0.2,0.0,0.0,0.0,0.447214,9.8,10.0,8.0,12.0,2.863564,1.2,0.0,0.0,1.0,2.167948,5.2,5.0,4.0,6.0,1.923538,14.8,14.0,14.0,16.0,1.095445,3.6,4.0,2.0,5.0,1.516575,140.64,114.9,105.9,182.5,44.464233,-3.026,-5.91,-6.27,-2.3,6.631744,2.166,1.85,1.63,2.42,0.801704,2.2,2.0,1.0,3.0,1.923538,2.4,2.0,1.0,4.0,1.516575,3.6,3.0,3.0,5.0,1.949359,148.04,137.2,136.4,146.2,24.683456
13547,215741,11.8,12.0,11.0,13.0,2.588436,0.4,0.0,0.0,1.0,0.547723,6.0,6.0,5.0,7.0,1.581139,1.0,0.0,0.0,0.0,2.236068,3.8,3.0,3.0,4.0,1.923538,15.6,15.0,15.0,16.0,0.894427,2.0,2.0,1.0,2.0,1.224745,138.22,143.1,125.6,149.1,23.504404,-5.252,-6.1,-10.95,1.64,8.190609,1.678,1.62,1.32,1.93,0.426814,1.8,1.0,1.0,1.0,1.788854,2.0,1.0,1.0,2.0,2.345208,2.0,2.0,1.0,3.0,1.0,129.16,133.3,116.7,141.7,16.137472
11711,215681,11.4,11.0,9.0,11.0,5.22494,0.0,0.0,0.0,0.0,0.0,5.8,6.0,6.0,6.0,3.193744,0.4,0.0,0.0,1.0,0.547723,3.8,3.0,2.0,5.0,2.167948,14.2,13.0,13.0,16.0,1.643168,1.4,2.0,1.0,2.0,0.894427,131.46,114.4,112.0,158.2,52.035546,-6.684,-10.83,-11.02,-0.26,7.004779,1.738,1.47,1.2,1.94,0.712299,2.0,2.0,2.0,3.0,1.224745,1.8,2.0,0.0,2.0,2.04939,2.2,2.0,0.0,3.0,2.48998,127.14,129.7,117.9,138.1,16.860694
13547,215705,16.2,17.0,14.0,18.0,3.193744,0.0,0.0,0.0,0.0,0.0,7.2,7.0,6.0,8.0,1.923538,0.0,0.0,0.0,0.0,0.0,5.4,6.0,3.0,7.0,2.302173,15.0,15.0,13.0,16.0,2.738613,3.2,3.0,2.0,4.0,1.923538,151.32,161.9,125.5,172.7,32.566117,2.368,4.4,-0.78,6.9,9.793269,2.256,2.21,1.99,2.47,0.663611,2.0,1.0,1.0,3.0,1.414214,2.6,2.0,1.0,5.0,2.302173,4.2,4.0,3.0,5.0,1.30384,136.64,132.6,131.0,140.9,19.305258
13581,215697,5.0,4.0,3.0,8.0,2.828427,0.2,0.0,0.0,0.0,0.447214,3.8,4.0,3.0,5.0,1.923538,0.8,0.0,0.0,2.0,1.095445,2.4,2.0,1.0,3.0,1.67332,14.4,14.0,14.0,15.0,0.547723,2.4,2.0,1.0,4.0,1.516575,90.2,68.4,60.5,123.0,44.216004,-10.854,-12.67,-13.79,-7.36,3.748377,1.028,0.76,0.65,1.42,0.504153,0.4,0.0,0.0,1.0,0.547723,2.6,3.0,1.0,3.0,1.67332,0.8,0.0,0.0,2.0,1.095445,73.34,75.0,50.0,83.3,24.600163


In [119]:
# Separate team 1 and team 2 statistics by matching team_id with team_1_id or team_2_id
# For team 1
team1_match_mapping = match_info.select(['map_stat_id', 'team_1_id'])
team1_stats = team_map_stats.join(
    team1_match_mapping,
    left_on=['map_stat_id', 'team_id'],
    right_on=['map_stat_id', 'team_1_id'],
    how='inner'
)

# For team 2
team2_match_mapping = match_info.select(['map_stat_id', 'team_2_id'])
team2_stats = team_map_stats.join(
    team2_match_mapping,
    left_on=['map_stat_id', 'team_id'],
    right_on=['map_stat_id', 'team_2_id'],
    how='inner'
)

print(f"Team 1 stats: {team1_stats.shape}")
print(f"Team 2 stats: {team2_stats.shape}")
print(f"Team 1 columns: {team1_stats.columns}")

Team 1 stats: (79, 72)
Team 2 stats: (79, 72)
Team 1 columns: ['team_id', 'map_stat_id', 'kills_avg', 'kills_median', 'kills_p25', 'kills_p75', 'kills_std', 'clutches_avg', 'clutches_median', 'clutches_p25', 'clutches_p75', 'clutches_std', 'headshot_avg', 'headshot_median', 'headshot_p25', 'headshot_p75', 'headshot_std', 'flash_avg', 'flash_median', 'flash_p25', 'flash_p75', 'flash_std', 'assist_avg', 'assist_median', 'assist_p25', 'assist_p75', 'assist_std', 'deaths_avg', 'deaths_median', 'deaths_p25', 'deaths_p75', 'deaths_std', 'traded_deaths_avg', 'traded_deaths_median', 'traded_deaths_p25', 'traded_deaths_p75', 'traded_deaths_std', 'adr_avg', 'adr_median', 'adr_p25', 'adr_p75', 'adr_std', 'swing_avg', 'swing_median', 'swing_p25', 'swing_p75', 'swing_std', 'rating_3_dot_0_avg', 'rating_3_dot_0_median', 'rating_3_dot_0_p25', 'rating_3_dot_0_p75', 'rating_3_dot_0_std', 'opening_kills_avg', 'opening_kills_median', 'opening_kills_p25', 'opening_kills_p75', 'opening_kills_std', 'opening

In [120]:
# Rename team1 columns with t1_ prefix
t1_rename = {col: f't1_{col}' for col in team1_stats.columns if col not in ['map_stat_id', 'team_id']}
team1_stats = team1_stats.rename(t1_rename).rename({'team_id': 't1_id'})

# Rename team2 columns with t2_ prefix  
t2_rename = {col: f't2_{col}' for col in team2_stats.columns if col not in ['map_stat_id', 'team_id']}
team2_stats = team2_stats.rename(t2_rename).rename({'team_id': 't2_id'})

print("Team 1 columns sample:", team1_stats.columns[:10])
print("Team 2 columns sample:", team2_stats.columns[:10])

Team 1 columns sample: ['t1_id', 'map_stat_id', 't1_kills_avg', 't1_kills_median', 't1_kills_p25', 't1_kills_p75', 't1_kills_std', 't1_clutches_avg', 't1_clutches_median', 't1_clutches_p25']
Team 2 columns sample: ['t2_id', 'map_stat_id', 't2_kills_avg', 't2_kills_median', 't2_kills_p25', 't2_kills_p75', 't2_kills_std', 't2_clutches_avg', 't2_clutches_median', 't2_clutches_p25']


In [121]:
# Join team1 and team2 stats
match_dataset = team1_stats.join(
    team2_stats,
    on='map_stat_id',
    how='inner'
)

# Join with match info to get winner
match_dataset = match_dataset.join(
    match_info.select(['map_stat_id', 'team_1_id', 'team_2_id', 'team_winner_id', 'map_name', 'match_date', 'event_id']),
    on='map_stat_id',
    how='inner'
)

# Create winner column: 0 if team_1 wins, 1 if team_2 wins
match_dataset = match_dataset.with_columns(
    (pl.col('team_winner_id') == pl.col('team_2_id')).cast(pl.Int32).alias('winner')
)

# Drop unnecessary columns (match_id, map_stat_id, player_id are already not present)
# Also drop team_winner_id since we have the winner column now
match_dataset = match_dataset.drop(['map_stat_id', 'team_winner_id', 'team_1_id', 'team_2_id'])

print(f"\nFinal match dataset shape: {match_dataset.shape}")
print(f"Columns: {match_dataset.columns}")
print(f"\nWinner distribution:")
print(match_dataset.group_by('winner').len())
match_dataset.head()


Final match dataset shape: (79, 146)
Columns: ['t1_id', 't1_kills_avg', 't1_kills_median', 't1_kills_p25', 't1_kills_p75', 't1_kills_std', 't1_clutches_avg', 't1_clutches_median', 't1_clutches_p25', 't1_clutches_p75', 't1_clutches_std', 't1_headshot_avg', 't1_headshot_median', 't1_headshot_p25', 't1_headshot_p75', 't1_headshot_std', 't1_flash_avg', 't1_flash_median', 't1_flash_p25', 't1_flash_p75', 't1_flash_std', 't1_assist_avg', 't1_assist_median', 't1_assist_p25', 't1_assist_p75', 't1_assist_std', 't1_deaths_avg', 't1_deaths_median', 't1_deaths_p25', 't1_deaths_p75', 't1_deaths_std', 't1_traded_deaths_avg', 't1_traded_deaths_median', 't1_traded_deaths_p25', 't1_traded_deaths_p75', 't1_traded_deaths_std', 't1_adr_avg', 't1_adr_median', 't1_adr_p25', 't1_adr_p75', 't1_adr_std', 't1_swing_avg', 't1_swing_median', 't1_swing_p25', 't1_swing_p75', 't1_swing_std', 't1_rating_3_dot_0_avg', 't1_rating_3_dot_0_median', 't1_rating_3_dot_0_p25', 't1_rating_3_dot_0_p75', 't1_rating_3_dot_0_std'

t1_id,t1_kills_avg,t1_kills_median,t1_kills_p25,t1_kills_p75,t1_kills_std,t1_clutches_avg,t1_clutches_median,t1_clutches_p25,t1_clutches_p75,t1_clutches_std,t1_headshot_avg,t1_headshot_median,t1_headshot_p25,t1_headshot_p75,t1_headshot_std,t1_flash_avg,t1_flash_median,t1_flash_p25,t1_flash_p75,t1_flash_std,t1_assist_avg,t1_assist_median,t1_assist_p25,t1_assist_p75,t1_assist_std,t1_deaths_avg,t1_deaths_median,t1_deaths_p25,t1_deaths_p75,t1_deaths_std,t1_traded_deaths_avg,t1_traded_deaths_median,t1_traded_deaths_p25,t1_traded_deaths_p75,t1_traded_deaths_std,t1_adr_avg,…,t2_adr_p25,t2_adr_p75,t2_adr_std,t2_swing_avg,t2_swing_median,t2_swing_p25,t2_swing_p75,t2_swing_std,t2_rating_3_dot_0_avg,t2_rating_3_dot_0_median,t2_rating_3_dot_0_p25,t2_rating_3_dot_0_p75,t2_rating_3_dot_0_std,t2_opening_kills_avg,t2_opening_kills_median,t2_opening_kills_p25,t2_opening_kills_p75,t2_opening_kills_std,t2_opening_deaths_avg,t2_opening_deaths_median,t2_opening_deaths_p25,t2_opening_deaths_p75,t2_opening_deaths_std,t2_multikills_avg,t2_multikills_median,t2_multikills_p25,t2_multikills_p75,t2_multikills_std,t2_kast_avg,t2_kast_median,t2_kast_p25,t2_kast_p75,t2_kast_std,map_name,match_date,event_id,winner
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,"datetime[μs, UTC]",i32,i32
13581,4.6,5.0,4.0,6.0,1.67332,0.0,0.0,0.0,0.0,0.0,3.2,3.0,3.0,4.0,1.48324,0.2,0.0,0.0,0.0,0.447214,1.4,1.0,1.0,2.0,0.547723,13.0,13.0,13.0,13.0,0.0,1.8,2.0,1.0,3.0,1.30384,99.8,…,149.2,243.2,56.77658,18.37,16.77,13.8,22.07,10.600134,3.942,4.06,3.15,4.77,1.174168,1.6,1.0,1.0,2.0,1.516575,1.0,0.0,0.0,2.0,1.414214,3.4,3.0,2.0,4.0,1.67332,193.32,200.0,183.3,200.0,9.146967,"""Dust2""",2025-12-19 08:20:00 UTC,8892,1
12087,15.8,15.0,12.0,20.0,5.118594,0.6,1.0,0.0,1.0,0.547723,8.2,8.0,7.0,10.0,2.387467,1.6,2.0,1.0,2.0,1.140175,7.0,7.0,6.0,7.0,3.24037,16.0,16.0,15.0,17.0,2.236068,4.0,3.0,3.0,4.0,2.345208,147.04,…,135.1,165.0,18.723862,-2.278,-3.1,-6.75,-0.76,9.68797,1.99,1.76,1.63,2.27,0.558435,2.6,2.0,2.0,3.0,0.894427,2.2,2.0,1.0,3.0,1.30384,3.4,4.0,2.0,5.0,1.81659,138.32,133.3,125.0,150.0,15.130003,"""Ancient""",2025-12-19 11:00:00 UTC,8886,0
13404,15.4,14.0,13.0,18.0,3.435113,0.4,0.0,0.0,0.0,0.894427,6.6,7.0,5.0,8.0,2.073644,1.2,1.0,1.0,2.0,0.83666,4.8,5.0,4.0,6.0,2.588436,9.0,10.0,9.0,10.0,1.732051,1.8,2.0,2.0,2.0,1.095445,190.94,…,104.8,117.2,16.09882,-9.448,-10.23,-11.88,-3.64,7.583889,1.37,1.5,1.48,1.53,0.325346,0.8,0.0,0.0,1.0,1.30384,2.6,2.0,2.0,2.0,1.949359,1.4,1.0,1.0,2.0,0.547723,117.02,106.7,106.7,126.7,19.608978,"""Ancient""",2025-12-20 07:00:00 UTC,8892,0
13419,12.6,14.0,11.0,14.0,3.781534,0.2,0.0,0.0,0.0,0.447214,6.0,6.0,6.0,7.0,1.870829,0.4,0.0,0.0,1.0,0.547723,3.4,4.0,3.0,4.0,1.516575,16.6,16.0,16.0,18.0,1.341641,3.6,3.0,3.0,4.0,1.516575,140.24,…,166.2,169.6,26.076081,3.21,-0.16,-2.65,0.84,9.95151,2.614,2.35,2.25,2.37,0.751984,2.0,1.0,0.0,4.0,2.345208,2.2,2.0,1.0,3.0,1.30384,4.2,4.0,3.0,4.0,1.643168,150.02,147.3,144.4,152.8,14.273122,"""Nuke""",2025-12-19 16:00:00 UTC,8886,1
11514,14.8,15.0,12.0,16.0,4.207137,0.0,0.0,0.0,0.0,0.0,7.6,8.0,3.0,10.0,4.722288,0.6,1.0,0.0,1.0,0.547723,5.2,4.0,4.0,5.0,2.167948,7.0,6.0,6.0,9.0,2.44949,2.6,2.0,2.0,4.0,1.341641,196.46,…,68.5,159.7,56.908769,-11.214,-9.14,-14.47,-6.21,6.040449,1.538,1.29,0.98,1.84,0.78627,0.6,0.0,0.0,1.0,0.894427,2.4,3.0,1.0,4.0,1.81659,1.2,1.0,1.0,2.0,0.83666,111.64,91.6,91.6,141.7,31.549374,"""Ancient""",2025-12-19 02:35:00 UTC,8890,0


In [122]:
# Get unique maps and sort them
unique_maps = match_dataset['map_name'].unique().sort()
print(f"Unique maps in dataset: {unique_maps.to_list()}")

map_to_encoding = {map_name: idx for idx, map_name in enumerate(unique_maps)}
print(f"\nMap encoding dictionary:")
for map_name, encoding in map_to_encoding.items():
    print(f"  {map_name}: {encoding}")

match_dataset = match_dataset.with_columns(
    pl.col('map_name').replace(map_to_encoding, return_dtype=pl.Int32).alias('map_encoding')
)

match_dataset = match_dataset.drop('map_name')

print(match_dataset.group_by('map_encoding').len().sort('map_encoding'))
match_dataset.head()

Unique maps in dataset: ['Ancient', 'Dust2', 'Inferno', 'Mirage', 'Nuke', 'Overpass', 'Train']

Map encoding dictionary:
  Ancient: 0
  Dust2: 1
  Inferno: 2
  Mirage: 3
  Nuke: 4
  Overpass: 5
  Train: 6
shape: (7, 2)
┌──────────────┬─────┐
│ map_encoding ┆ len │
│ ---          ┆ --- │
│ i32          ┆ u32 │
╞══════════════╪═════╡
│ 0            ┆ 14  │
│ 1            ┆ 13  │
│ 2            ┆ 17  │
│ 3            ┆ 7   │
│ 4            ┆ 10  │
│ 5            ┆ 9   │
│ 6            ┆ 9   │
└──────────────┴─────┘


t1_id,t1_kills_avg,t1_kills_median,t1_kills_p25,t1_kills_p75,t1_kills_std,t1_clutches_avg,t1_clutches_median,t1_clutches_p25,t1_clutches_p75,t1_clutches_std,t1_headshot_avg,t1_headshot_median,t1_headshot_p25,t1_headshot_p75,t1_headshot_std,t1_flash_avg,t1_flash_median,t1_flash_p25,t1_flash_p75,t1_flash_std,t1_assist_avg,t1_assist_median,t1_assist_p25,t1_assist_p75,t1_assist_std,t1_deaths_avg,t1_deaths_median,t1_deaths_p25,t1_deaths_p75,t1_deaths_std,t1_traded_deaths_avg,t1_traded_deaths_median,t1_traded_deaths_p25,t1_traded_deaths_p75,t1_traded_deaths_std,t1_adr_avg,…,t2_adr_p25,t2_adr_p75,t2_adr_std,t2_swing_avg,t2_swing_median,t2_swing_p25,t2_swing_p75,t2_swing_std,t2_rating_3_dot_0_avg,t2_rating_3_dot_0_median,t2_rating_3_dot_0_p25,t2_rating_3_dot_0_p75,t2_rating_3_dot_0_std,t2_opening_kills_avg,t2_opening_kills_median,t2_opening_kills_p25,t2_opening_kills_p75,t2_opening_kills_std,t2_opening_deaths_avg,t2_opening_deaths_median,t2_opening_deaths_p25,t2_opening_deaths_p75,t2_opening_deaths_std,t2_multikills_avg,t2_multikills_median,t2_multikills_p25,t2_multikills_p75,t2_multikills_std,t2_kast_avg,t2_kast_median,t2_kast_p25,t2_kast_p75,t2_kast_std,match_date,event_id,winner,map_encoding
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,"datetime[μs, UTC]",i32,i32,i32
13581,4.6,5.0,4.0,6.0,1.67332,0.0,0.0,0.0,0.0,0.0,3.2,3.0,3.0,4.0,1.48324,0.2,0.0,0.0,0.0,0.447214,1.4,1.0,1.0,2.0,0.547723,13.0,13.0,13.0,13.0,0.0,1.8,2.0,1.0,3.0,1.30384,99.8,…,149.2,243.2,56.77658,18.37,16.77,13.8,22.07,10.600134,3.942,4.06,3.15,4.77,1.174168,1.6,1.0,1.0,2.0,1.516575,1.0,0.0,0.0,2.0,1.414214,3.4,3.0,2.0,4.0,1.67332,193.32,200.0,183.3,200.0,9.146967,2025-12-19 08:20:00 UTC,8892,1,1
12087,15.8,15.0,12.0,20.0,5.118594,0.6,1.0,0.0,1.0,0.547723,8.2,8.0,7.0,10.0,2.387467,1.6,2.0,1.0,2.0,1.140175,7.0,7.0,6.0,7.0,3.24037,16.0,16.0,15.0,17.0,2.236068,4.0,3.0,3.0,4.0,2.345208,147.04,…,135.1,165.0,18.723862,-2.278,-3.1,-6.75,-0.76,9.68797,1.99,1.76,1.63,2.27,0.558435,2.6,2.0,2.0,3.0,0.894427,2.2,2.0,1.0,3.0,1.30384,3.4,4.0,2.0,5.0,1.81659,138.32,133.3,125.0,150.0,15.130003,2025-12-19 11:00:00 UTC,8886,0,0
13404,15.4,14.0,13.0,18.0,3.435113,0.4,0.0,0.0,0.0,0.894427,6.6,7.0,5.0,8.0,2.073644,1.2,1.0,1.0,2.0,0.83666,4.8,5.0,4.0,6.0,2.588436,9.0,10.0,9.0,10.0,1.732051,1.8,2.0,2.0,2.0,1.095445,190.94,…,104.8,117.2,16.09882,-9.448,-10.23,-11.88,-3.64,7.583889,1.37,1.5,1.48,1.53,0.325346,0.8,0.0,0.0,1.0,1.30384,2.6,2.0,2.0,2.0,1.949359,1.4,1.0,1.0,2.0,0.547723,117.02,106.7,106.7,126.7,19.608978,2025-12-20 07:00:00 UTC,8892,0,0
13419,12.6,14.0,11.0,14.0,3.781534,0.2,0.0,0.0,0.0,0.447214,6.0,6.0,6.0,7.0,1.870829,0.4,0.0,0.0,1.0,0.547723,3.4,4.0,3.0,4.0,1.516575,16.6,16.0,16.0,18.0,1.341641,3.6,3.0,3.0,4.0,1.516575,140.24,…,166.2,169.6,26.076081,3.21,-0.16,-2.65,0.84,9.95151,2.614,2.35,2.25,2.37,0.751984,2.0,1.0,0.0,4.0,2.345208,2.2,2.0,1.0,3.0,1.30384,4.2,4.0,3.0,4.0,1.643168,150.02,147.3,144.4,152.8,14.273122,2025-12-19 16:00:00 UTC,8886,1,4
11514,14.8,15.0,12.0,16.0,4.207137,0.0,0.0,0.0,0.0,0.0,7.6,8.0,3.0,10.0,4.722288,0.6,1.0,0.0,1.0,0.547723,5.2,4.0,4.0,5.0,2.167948,7.0,6.0,6.0,9.0,2.44949,2.6,2.0,2.0,4.0,1.341641,196.46,…,68.5,159.7,56.908769,-11.214,-9.14,-14.47,-6.21,6.040449,1.538,1.29,0.98,1.84,0.78627,0.6,0.0,0.0,1.0,0.894427,2.4,3.0,1.0,4.0,1.81659,1.2,1.0,1.0,2.0,0.83666,111.64,91.6,91.6,141.7,31.549374,2025-12-19 02:35:00 UTC,8890,0,0


-----

In [None]:
# Identify columns to normalize (all numerical stats, excluding IDs, metadata, map_encoding, and target)
# Columns to exclude from normalization
columns_to_exclude = ['t1_id', 't2_id', 'match_date', 'event_id', 'map_encoding', 'winner']

# Get columns to normalize
columns_to_normalize = [col for col in match_dataset.columns if col not in columns_to_exclude]

print(f"Columns to exclude from normalization: {len(columns_to_exclude)}")
print(f"  - {columns_to_exclude}")
print(f"\nColumns to normalize: {len(columns_to_normalize)}")
print(f"  - Sample: {columns_to_normalize[:5]}")

# Convert to pandas for easier sklearn integration
match_df = match_dataset.to_pandas()

# Initialize the scaler
scaler = StandardScaler()

# Fit and transform the numerical columns
match_df[columns_to_normalize] = scaler.fit_transform(match_df[columns_to_normalize])

# Convert back to polars
match_dataset_normalized = pl.from_pandas(match_df)

match_dataset_normalized.head()

Columns to exclude from normalization: 6
  - ['t1_id', 't2_id', 'match_date', 'event_id', 'map_encoding', 'winner']

Columns to normalize: 140
  - Sample: ['t1_kills_avg', 't1_kills_median', 't1_kills_p25', 't1_kills_p75', 't1_kills_std']

Normalized dataset shape: (79, 146)
Total columns: 146

✅ Normalization complete!
   - Normalized features: 140
   - Excluded features: 6


t1_id,t1_kills_avg,t1_kills_median,t1_kills_p25,t1_kills_p75,t1_kills_std,t1_clutches_avg,t1_clutches_median,t1_clutches_p25,t1_clutches_p75,t1_clutches_std,t1_headshot_avg,t1_headshot_median,t1_headshot_p25,t1_headshot_p75,t1_headshot_std,t1_flash_avg,t1_flash_median,t1_flash_p25,t1_flash_p75,t1_flash_std,t1_assist_avg,t1_assist_median,t1_assist_p25,t1_assist_p75,t1_assist_std,t1_deaths_avg,t1_deaths_median,t1_deaths_p25,t1_deaths_p75,t1_deaths_std,t1_traded_deaths_avg,t1_traded_deaths_median,t1_traded_deaths_p25,t1_traded_deaths_p75,t1_traded_deaths_std,t1_adr_avg,…,t2_adr_p25,t2_adr_p75,t2_adr_std,t2_swing_avg,t2_swing_median,t2_swing_p25,t2_swing_p75,t2_swing_std,t2_rating_3_dot_0_avg,t2_rating_3_dot_0_median,t2_rating_3_dot_0_p25,t2_rating_3_dot_0_p75,t2_rating_3_dot_0_std,t2_opening_kills_avg,t2_opening_kills_median,t2_opening_kills_p25,t2_opening_kills_p75,t2_opening_kills_std,t2_opening_deaths_avg,t2_opening_deaths_median,t2_opening_deaths_p25,t2_opening_deaths_p75,t2_opening_deaths_std,t2_multikills_avg,t2_multikills_median,t2_multikills_p25,t2_multikills_p75,t2_multikills_std,t2_kast_avg,t2_kast_median,t2_kast_p25,t2_kast_p75,t2_kast_std,match_date,event_id,winner,map_encoding
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,"datetime[μs, UTC]",i32,i32,i32
13581,-2.454467,-2.138643,-2.182169,-2.176593,-1.346583,-1.012739,-0.4022,-0.113228,-0.700404,-1.189982,-1.92263,-1.796755,-1.242346,-1.581538,-1.218992,-1.182578,-0.845102,-0.415619,-1.230383,-1.140768,-2.151954,-2.114257,-1.741161,-2.116546,-1.887471,-0.318765,-0.298634,-0.088742,-0.550052,-2.339843,-1.161034,-0.663403,-1.015073,-0.625656,-0.575983,-2.20726,…,1.21811,2.591226,1.082831,2.660284,2.195874,2.416465,2.470011,0.935318,3.063255,3.097274,2.324434,3.484042,1.831806,-0.684693,-0.989361,-0.284566,-0.697489,0.188738,-1.696993,-2.286385,-1.452421,-0.806078,-0.241356,0.036909,-0.248219,-0.300261,-0.058415,0.092552,2.337034,2.305718,2.036746,2.117576,-1.301107,2025-12-19 08:20:00 UTC,8892,1,1
12087,0.413616,0.257851,0.02444,0.882253,0.574498,1.387453,2.486326,-0.113228,1.427747,0.569326,0.557575,0.500068,0.791754,0.591358,-0.495871,0.982871,1.936692,1.636502,0.657,-0.053911,1.193178,1.128972,1.413695,0.580343,1.163961,0.463299,0.454305,0.430562,0.454668,0.537123,0.940921,0.142887,0.894231,0.123235,0.873887,0.048155,…,0.562372,-0.027511,-1.195464,-0.434122,-0.496913,-0.458104,-0.665498,0.596832,-0.383656,-0.654307,-0.427667,-0.334272,-0.342814,0.72392,0.111477,0.898631,0.304359,-0.945903,0.152144,0.074718,-0.11822,0.158773,-0.442162,0.036909,0.478051,-0.300261,0.710717,0.284297,-0.25825,-0.445034,-0.410846,-0.246725,-0.430701,2025-12-19 11:00:00 UTC,8886,0,0
13404,0.311184,0.018201,0.300266,0.445275,-0.364209,0.587389,-0.4022,-0.113228,-0.700404,1.682956,-0.23609,0.040703,-0.225296,-0.132941,-0.746839,0.364171,0.545795,1.636502,0.657,-0.529952,-0.120981,0.047896,0.151753,0.040965,0.425159,-1.361517,-1.051573,-1.127351,-1.303592,-0.111355,-1.161034,-0.663403,-0.060421,-1.374547,-0.866126,2.144105,…,-0.846766,-1.628222,-1.352631,-1.508652,-1.463173,-1.175696,-1.061042,-0.183952,-1.478474,-1.078399,-0.699255,-1.464493,-1.16603,-1.811584,-2.090199,-1.467763,-1.699337,-0.199236,0.768523,0.074718,1.21598,-0.806078,0.732255,-1.678287,-1.700758,-1.147424,-1.596679,-1.413882,-1.263333,-1.542035,-1.179129,-1.34849,0.220896,2025-12-20 07:00:00 UTC,8892,0,0
13419,-0.405836,0.018201,-0.251386,-0.428681,-0.171045,-0.212675,-0.4022,-0.113228,-0.700404,0.246487,-0.533715,-0.418661,0.283229,-0.49509,-0.909033,-0.873228,-0.845102,-0.415619,-0.286691,-0.983128,-0.957264,-0.492642,-0.479219,-1.037791,-0.789522,0.619711,0.454305,0.690215,0.705848,-0.613664,0.558747,0.142887,0.894231,0.123235,-0.279798,-0.276503,…,2.008715,0.126533,-0.755271,0.388335,-0.098484,0.115411,-0.445752,0.694626,0.718226,0.308055,0.694901,-0.181539,0.340753,-0.121248,-0.989361,-1.467763,1.306207,1.699956,0.152144,0.074718,-0.11822,0.158773,-0.442162,0.722988,0.478051,0.546903,-0.058415,0.052198,0.293838,0.132336,0.403619,-0.114324,-0.555359,2025-12-19 16:00:00 UTC,8886,1,4
11514,0.157537,0.257851,0.02444,0.008297,0.066271,-1.012739,-0.4022,-0.113228,-0.700404,-1.189982,0.259951,0.500068,-1.242346,0.591358,1.371312,-0.563878,0.545795,-0.415619,-0.286691,-0.983128,0.117957,-0.492642,0.151753,-0.498413,-0.051357,-1.882893,-2.055491,-1.906307,-1.554772,0.811715,-0.396686,-0.663403,-0.060421,0.123235,-0.523354,2.407651,…,-2.534942,-0.204995,1.090746,-1.773313,-1.315456,-1.53799,-1.41401,-0.756693,-1.181813,-1.420935,-1.604552,-0.991022,0.461842,-2.093307,-2.090199,-1.467763,-1.699337,-0.945903,0.460334,1.25527,-0.11822,1.123624,0.490704,-1.849806,-1.700758,-1.147424,-1.596679,-1.027185,-1.517199,-2.164769,-1.813068,-0.639199,1.957973,2025-12-19 02:35:00 UTC,8890,0,0
