## Configuration

Specify the path to your CSV file and whether it's a full or minimal export.

In [1]:
# Configuration Parameters
CSV_FILE_PATH = r"results_20260120_123810\all_games_full.csv"  # Update this path
CSV_TYPE = "full"  # Options: "full" or "minimal"
INCLUDE_OT_GAMES = False  # Set to False to exclude overtime games

## Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

print("Libraries imported successfully")

Libraries imported successfully


## Load and Prepare Data

In [3]:
# Load CSV data
df = pd.read_csv(CSV_FILE_PATH, sep=';')

print(f"Loaded {len(df)} rows from {CSV_FILE_PATH}")
print(f"\nColumns available: {list(df.columns)}")
print(f"\nNumber of unique games: {df['game_id'].nunique()}")
print(f"\nFirst few rows:")
df.head()

Loaded 272420 rows from results_20260120_123810\all_games_full.csv

Columns available: ['round_number', 'is_t1_ct', 'is_t1_winner', 'is_ot', 'is_ct_winner', 'outcome_reason_code', 'outcome_ct_wins', 'outcome_bomb_planted', 'outcome_ct_survivors', 'outcome_t_survivors', 'csf', 'csf_key', 'ct_equipment_share_per_player', 't_equipment_share_per_player', 'ct_equipment_per_player', 't_equipment_per_player', 'rng_csf', 'rng_round_outcome', 'rng_bombplant', 'rng_survivors_ct', 'rng_survivors_t', 'rng_equipment_ct', 'rng_equipment_t', 't1_funds', 't1_funds_start', 't1_earned', 't1_rs_eq_value', 't1_fte_eq_value', 't1_re_eq_value', 't1_survivors', 't1_score_start', 't1_score_end', 't1_consecutive_loss', 't1_consecutive_losses_start', 't1_consecutive_wins', 't1_consecutive_wins_start', 't1_loss_bonus_level', 't1_spent', 't2_funds', 't2_funds_start', 't2_earned', 't2_rs_eq_value', 't2_fte_eq_value', 't2_re_eq_value', 't2_survivors', 't2_score_start', 't2_score_end', 't2_consecutive_loss', 't2_con

Unnamed: 0,round_number,is_t1_ct,is_t1_winner,is_ot,is_ct_winner,outcome_reason_code,outcome_ct_wins,outcome_bomb_planted,outcome_ct_survivors,outcome_t_survivors,...,t2_consecutive_losses_start,t2_consecutive_wins,t2_consecutive_wins_start,t2_loss_bonus_level,t2_spent,t1_name,t1_strategy,t2_name,t2_strategy,game_id
0,1,False,False,False,True,4,True,False,4,0,...,0,1,0,0,4000.0,Team A,all_in,Team B,anti_allin_v2,sim_1_20260120_123810_DPE1-2_7374
1,2,False,True,False,False,1,False,True,2,5,...,0,0,1,1,9369.0,Team A,all_in,Team B,anti_allin_v2,sim_1_20260120_123810_DPE1-2_7374
2,3,False,True,False,False,1,False,True,1,2,...,1,0,0,2,0.0,Team A,all_in,Team B,anti_allin_v2,sim_1_20260120_123810_DPE1-2_7374
3,4,False,True,False,False,2,False,True,0,2,...,2,0,0,3,0.0,Team A,all_in,Team B,anti_allin_v2,sim_1_20260120_123810_DPE1-2_7374
4,5,False,True,False,False,1,False,True,0,3,...,3,0,0,4,30944.8,Team A,all_in,Team B,anti_allin_v2,sim_1_20260120_123810_DPE1-2_7374


## Filter Data

In [4]:
# Apply OT filter
if not INCLUDE_OT_GAMES:
    df_filtered = df[df['is_ot'] == False].copy()
    print(f"Filtered out OT games. Remaining rows: {len(df_filtered)}")
else:
    df_filtered = df.copy()
    print(f"Including all games (with OT). Total rows: {len(df_filtered)}")

print(f"\nGames after filter: {df_filtered['game_id'].nunique()}")
print(f"Rounds with OT: {df_filtered['is_ot'].sum()}")
print(f"Rounds without OT: {(~df_filtered['is_ot']).sum()}")

Filtered out OT games. Remaining rows: 265575

Games after filter: 10000
Rounds with OT: 0
Rounds without OT: 265575


## Data Summary Statistics

## Round Outcome & RNG Analysis

Analyzing the relationship between round outcomes (outcome_reason_code, outcome_ct_wins, outcome_bomb_planted, outcome_ct_survivors, outcome_t_survivors, csf, csf_key) and their corresponding RNG values (rng_csf, rng_round_outcome, rng_bombplant, rng_survivors_ct, rng_survivors_t).

In [5]:
# Check which columns are available (full vs minimal export)
outcome_cols = ['outcome_reason_code', 'outcome_ct_wins', 'outcome_bomb_planted', 
                'outcome_ct_survivors', 'outcome_t_survivors', 'csf', 'csf_key']
rng_cols = ['rng_csf', 'rng_round_outcome', 'rng_bombplant', 
            'rng_survivors_ct', 'rng_survivors_t']

available_outcome_cols = [col for col in outcome_cols if col in df_filtered.columns]
available_rng_cols = [col for col in rng_cols if col in df_filtered.columns]

print(f"Available outcome columns: {available_outcome_cols}")
print(f"Available RNG columns: {available_rng_cols}")

# Create working dataframe for RNG analysis
if len(available_outcome_cols) > 0 and len(available_rng_cols) > 0:
    df_rng = df_filtered[['round_number', 'game_id'] + available_outcome_cols + available_rng_cols].copy()
    print(f"\nRNG analysis dataframe created with {len(df_rng)} rows")
    print(f"\nFirst few rows:")
    display(df_rng.head())
else:
    print("\nWARNING: Not all required columns available. Use full CSV export (mode 1 or 2) for complete RNG analysis.")
    df_rng = None

Available outcome columns: ['outcome_reason_code', 'outcome_ct_wins', 'outcome_bomb_planted', 'outcome_ct_survivors', 'outcome_t_survivors', 'csf', 'csf_key']
Available RNG columns: ['rng_csf', 'rng_round_outcome', 'rng_bombplant', 'rng_survivors_ct', 'rng_survivors_t']

RNG analysis dataframe created with 265575 rows

First few rows:


Unnamed: 0,round_number,game_id,outcome_reason_code,outcome_ct_wins,outcome_bomb_planted,outcome_ct_survivors,outcome_t_survivors,csf,csf_key,rng_csf,rng_round_outcome,rng_bombplant,rng_survivors_ct,rng_survivors_t
0,1,sim_1_20260120_123810_DPE1-2_7374,4,True,False,4,0,0.5,50,0.168061,60.136808,0.0,75.980044,23.617581
1,2,sim_1_20260120_123810_DPE1-2_7374,1,False,True,2,5,0.608283,61,0.731593,23.580757,0.0,67.542782,94.422331
2,3,sim_1_20260120_123810_DPE1-2_7374,1,False,True,1,2,0.136204,14,0.786853,23.134467,0.0,50.733517,13.221199
3,4,sim_1_20260120_123810_DPE1-2_7374,2,False,True,0,2,0.10307,10,0.772724,57.407526,0.385598,27.422233,18.277501
4,5,sim_1_20260120_123810_DPE1-2_7374,1,False,True,0,3,0.506593,51,0.639253,13.176475,0.0,1.16554,57.345864


## Survivor Distribution Analysis by Side, Round End Reason, and CSF

Detailed breakdown showing survivor count percentages for each combination of side, round end reason, and CSF (in 1% increments).


In [6]:
if df_rng is not None:
    # Determine which survivor column to use based on availability
    if 'outcome_ct_survivors' in df_rng.columns:
        ct_survivors_col = 'outcome_ct_survivors'
    else:
        ct_survivors_col = None
    
    if 'outcome_t_survivors' in df_rng.columns:
        t_survivors_col = 'outcome_t_survivors'
    else:
        t_survivors_col = None
    
    if 'outcome_reason_code' not in df_rng.columns:
        print("ERROR: outcome_reason_code column not available")
    else:
        # Prepare data with CSF binned to 1% increments
        df_analysis = df_rng.copy()
        if 'csf' in df_rng.columns:
            df_analysis['csf_bin'] = (df_rng['csf'] * 100).round(0).astype(int)
        else:
            print("WARNING: csf column not available")
            df_analysis['csf_bin'] = 0
        
        # Get unique values
        sides = ['CT', 'T']
        reasons = sorted(df_analysis['outcome_reason_code'].unique())
        csf_bins = sorted(df_analysis['csf_bin'].unique())
        
        print(f"Sides: {sides}")
        print(f"Round end reasons: {reasons}")
        print(f"CSF bins (1% steps): {csf_bins[:10]}...{csf_bins[-10:]}")  # Show first and last 10
        print(f"\nTotal combinations to analyze: {len(sides) * len(reasons) * len(csf_bins)}")


Sides: ['CT', 'T']
Round end reasons: [np.int64(1), np.int64(2), np.int64(3), np.int64(4)]
CSF bins (1% steps): [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10)]...[np.int64(90), np.int64(91), np.int64(92), np.int64(93), np.int64(94), np.int64(95), np.int64(96), np.int64(97), np.int64(98), np.int64(99)]

Total combinations to analyze: 792


In [7]:
# Create comprehensive survivor distribution analysis
if df_rng is not None and ct_survivors_col and t_survivors_col:
    results = []
    
    for side in sides:
        for reason in reasons:
            for csf_bin in csf_bins:
                # Filter data for this combination
                mask = (df_analysis['outcome_reason_code'] == reason) & (df_analysis['csf_bin'] == csf_bin)
                subset = df_analysis[mask]
                
                if len(subset) == 0:
                    continue
                
                # Get survivor data based on side
                if side == 'CT':
                    survivors = subset[ct_survivors_col]
                    rng_vals = subset.get('rng_survivors_ct', pd.Series([np.nan] * len(subset)))
                else:
                    survivors = subset[t_survivors_col]
                    rng_vals = subset.get('rng_survivors_t', pd.Series([np.nan] * len(subset)))
                
                # Calculate survivor distribution
                survivor_counts = survivors.value_counts().sort_index()
                total = len(survivors)
                
                # Calculate percentages for each survivor count
                for survivor_count, count in survivor_counts.items():
                    percentage = (count / total) * 100
                    results.append({
                        'Side': side,
                        'Round_End_Reason': reason,
                        'CSF_Percentage': csf_bin,
                        'Survivor_Count': survivor_count,
                        'Percentage': percentage,
                        'Count': count
                    })
    
    df_survivors = pd.DataFrame(results)
    print(f"Analysis completed. Generated {len(df_survivors)} result rows.")
    print(f"\nSample results:")
    display(df_survivors.head(15))
else:
    print("Cannot perform analysis - missing required columns")


Analysis completed. Generated 3300 result rows.

Sample results:


Unnamed: 0,Side,Round_End_Reason,CSF_Percentage,Survivor_Count,Percentage,Count
0,CT,1,1,0,5.696203,9
1,CT,1,1,1,51.265823,81
2,CT,1,1,2,25.949367,41
3,CT,1,1,3,8.227848,13
4,CT,1,1,4,6.329114,10
5,CT,1,1,5,2.531646,4
6,CT,1,2,0,10.21645,236
7,CT,1,2,1,46.406926,1072
8,CT,1,2,2,25.151515,581
9,CT,1,2,3,10.779221,249


## Summary Table: Survivor Percentages by Side, Reason, and CSF


In [8]:
if 'df_survivors' in locals():
    # Create pivot tables for better visualization
    print("=" * 80)
    print("SURVIVOR DISTRIBUTION BY SIDE, ROUND END REASON, AND CSF")
    print("=" * 80)
    
    for side in ['CT', 'T']:
        print(f"\n{'='*80}\n{side} SIDE - Survivor Percentages\n{'='*80}")
        
        side_data = df_survivors[df_survivors['Side'] == side].copy()
        
        for reason in sorted(side_data['Round_End_Reason'].unique()):
            reason_data = side_data[side_data['Round_End_Reason'] == reason]
            csf_values = sorted(reason_data['CSF_Percentage'].unique())
            
            print(f"\nRound End Reason: {reason}")
            print("-" * 80)
            
            # Create a table for this reason across all CSF values
            for csf_pct in csf_values:
                csf_data = reason_data[reason_data['CSF_Percentage'] == csf_pct].sort_values('Survivor_Count')
                total_records = csf_data['Count'].sum()
                
                print(f"  CSF {csf_pct}% (n={total_records:,}):", end="")
                survivor_strs = []
                for _, row in csf_data.iterrows():
                    survivor_strs.append(f"{int(row['Survivor_Count'])} surv: {row['Percentage']:.1f}%")
                print(" | ".join(survivor_strs))


SURVIVOR DISTRIBUTION BY SIDE, ROUND END REASON, AND CSF

CT SIDE - Survivor Percentages

Round End Reason: 1
--------------------------------------------------------------------------------
  CSF 1% (n=158):0 surv: 5.7% | 1 surv: 51.3% | 2 surv: 25.9% | 3 surv: 8.2% | 4 surv: 6.3% | 5 surv: 2.5%
  CSF 2% (n=2,310):0 surv: 10.2% | 1 surv: 46.4% | 2 surv: 25.2% | 3 surv: 10.8% | 4 surv: 4.6% | 5 surv: 2.8%
  CSF 3% (n=2,159):0 surv: 11.6% | 1 surv: 48.6% | 2 surv: 22.6% | 3 surv: 9.0% | 4 surv: 4.6% | 5 surv: 3.6%
  CSF 4% (n=368):0 surv: 8.2% | 1 surv: 53.8% | 2 surv: 18.2% | 3 surv: 10.6% | 4 surv: 5.4% | 5 surv: 3.8%
  CSF 5% (n=29):1 surv: 41.4% | 2 surv: 41.4% | 3 surv: 3.4% | 4 surv: 10.3% | 5 surv: 3.4%
  CSF 6% (n=81):0 surv: 14.8% | 1 surv: 46.9% | 2 surv: 23.5% | 3 surv: 9.9% | 4 surv: 3.7% | 5 surv: 1.2%
  CSF 7% (n=74):0 surv: 12.2% | 1 surv: 67.6% | 2 surv: 14.9% | 3 surv: 5.4%
  CSF 8% (n=111):0 surv: 10.8% | 1 surv: 53.2% | 2 surv: 18.9% | 3 surv: 12.6% | 4 surv: 4.5%
  C

## RNG Value Boxplots by Side, Round End Reason, and CSF

Boxplots showing the distribution of RNG values for survivors.


In [18]:
from scipy import stats

if df_rng is not None:
    # Test for uniform distribution of RNG values
    rng_cols_available = [col for col in ['rng_survivors_ct', 'rng_survivors_t'] if col in df_rng.columns]
    
    if len(rng_cols_available) > 0:
        # Prepare results for uniformity tests
        test_results = []
        
        for side in ['CT', 'T']:
            rng_col = f'rng_survivors_{side.lower()}'
            
            if rng_col not in df_rng.columns:
                continue
            
            print(f"\n{'='*80}")
            print(f"{side} SIDE - Uniformity Tests for RNG Values")
            print(f"{'='*80}\n")
            
            top_reasons = df_rng['outcome_reason_code'].value_counts().head(6).index
            
            for reason in top_reasons:
                reason_data = df_rng[df_rng['outcome_reason_code'] == reason].copy()
                reason_data['csf_bin'] = (reason_data['csf'] * 100).round(0).astype(int)
                
                csf_bins_test = sorted(reason_data['csf_bin'].unique())
                csf_bins_test = csf_bins_test[::max(1, len(csf_bins_test)//4)]  # Sample ~4 CSF bins
                
                print(f"Round End Reason: {reason}")
                print("-" * 80)
                
                for csf in csf_bins_test:
                    rng_values = reason_data[reason_data['csf_bin'] == csf][rng_col].dropna().values
                    
                    if len(rng_values) < 5:
                        print(f"  CSF {csf}% - Insufficient data (n={len(rng_values)})")
                        continue
                    
                    # Kolmogorov-Smirnov test (test against uniform distribution [0,1])
                    ks_stat, ks_pval = stats.ks_1samp(rng_values, stats.uniform.cdf)
                    
                    # Chi-square goodness-of-fit test (bin the data within [0,1])
                    n_bins = 10
                    # Clip values to [0, 1] range to handle floating point edge cases
                    clipped_values = np.clip(rng_values, 0, 1)
                    observed, bin_edges = np.histogram(clipped_values, bins=n_bins, range=(0, 1))
                    expected = np.ones(n_bins) * (len(rng_values) / n_bins)
                    
                    # Verify counts match before performing chi-square
                    obs_sum = np.sum(observed)
                    exp_sum = np.sum(expected)
                    
                    if obs_sum == exp_sum and np.all(observed > 0):
                        chi2_stat, chi2_pval = stats.chisquare(observed, expected)
                        chi2_str = f"stat={chi2_stat:.4f}, p-value={chi2_pval:.6f}"
                    else:
                        chi2_stat, chi2_pval = np.nan, np.nan
                        if np.any(observed == 0):
                            chi2_str = "N/A (bins with zero observations)"
                        else:
                            chi2_str = f"N/A (count mismatch: {obs_sum} vs {exp_sum})"
                    
                    # Calculate actual min, max, mean, std
                    mean_val = np.mean(rng_values)
                    std_val = np.std(rng_values)
                    min_val = np.min(rng_values)
                    max_val = np.max(rng_values)
                    
                    # Determine if uniform (using alpha=0.05)
                    is_uniform_ks = "âœ“ Uniform" if ks_pval > 0.05 else "âœ— Not Uniform"
                    is_uniform_chi2 = "âœ“ Uniform" if (not np.isnan(chi2_pval) and chi2_pval > 0.05) else ("N/A" if np.isnan(chi2_pval) else "âœ— Not Uniform")
                    
                    print(f"  CSF {csf}% (n={len(rng_values)}):")
                    print(f"    Range: [{min_val:.4f}, {max_val:.4f}]  Mean: {mean_val:.4f}  Std: {std_val:.4f}")
                    print(f"    KS Test:    stat={ks_stat:.4f}, p-value={ks_pval:.6f} {is_uniform_ks}")
                    print(f"    Chi-Square: {chi2_str} {is_uniform_chi2}")
                    
                    test_results.append({
                        'Side': side,
                        'Reason': reason,
                        'CSF': csf,
                        'N': len(rng_values),
                        'Mean': mean_val,
                        'Std': std_val,
                        'Min': min_val,
                        'Max': max_val,
                        'KS_Stat': ks_stat,
                        'KS_Pval': ks_pval,
                        'Chi2_Stat': chi2_stat,
                        'Chi2_Pval': chi2_pval
                    })
                
                print()
        
        # Create summary dataframe
        df_uniformity_tests = pd.DataFrame(test_results)
        print(f"\n{'='*80}")
        print("UNIFORMITY TEST SUMMARY")
        print(f"{'='*80}\n")
        display(df_uniformity_tests)
    else:
        print("No survivor RNG columns available for uniformity tests")



CT SIDE - Uniformity Tests for RNG Values

Round End Reason: 4
--------------------------------------------------------------------------------
  CSF 1% (n=5):
    Range: [8.7554, 65.5750]  Mean: 35.0569  Std: 22.8642
    KS Test:    stat=1.0000, p-value=0.000000 âœ— Not Uniform
    Chi-Square: N/A (bins with zero observations) N/A
  CSF 25% (n=257):
    Range: [0.0325, 99.2112]  Mean: 49.6395  Std: 27.6558
    KS Test:    stat=0.9883, p-value=0.000000 âœ— Not Uniform
    Chi-Square: N/A (bins with zero observations) N/A
  CSF 49% (n=624):
    Range: [0.0191, 99.9441]  Mean: 51.6061  Std: 28.6847
    KS Test:    stat=0.9856, p-value=0.000000 âœ— Not Uniform
    Chi-Square: N/A (bins with zero observations) N/A
  CSF 73% (n=1065):
    Range: [0.0048, 99.7540]  Mean: 49.5069  Std: 29.0874
    KS Test:    stat=0.9887, p-value=0.000000 âœ— Not Uniform
    Chi-Square: N/A (bins with zero observations) N/A
  CSF 97% (n=9150):
    Range: [0.0077, 99.9915]  Mean: 49.9170  Std: 28.6424
    KS 

Unnamed: 0,Side,Reason,CSF,N,Mean,Std,Min,Max,KS_Stat,KS_Pval,Chi2_Stat,Chi2_Pval
0,CT,4,1,5,35.056879,22.864192,8.755441,65.574998,1.0,0.0,,
1,CT,4,25,257,49.639453,27.655768,0.032466,99.211225,0.988327,0.0,,
2,CT,4,49,624,51.606091,28.684738,0.019144,99.944064,0.985577,0.0,,
3,CT,4,73,1065,49.506916,29.087371,0.004754,99.754044,0.988732,0.0,,
4,CT,4,97,9150,49.916986,28.642392,0.007731,99.991545,0.98929,0.0,80619.254645,0.0
5,CT,2,1,508,49.669614,28.873745,0.212808,99.95977,0.996063,0.0,,
6,CT,2,25,847,48.466391,29.395192,0.179065,99.753477,0.988194,0.0,,
7,CT,2,49,695,50.780275,28.588039,0.017602,99.990451,0.991367,0.0,,
8,CT,2,73,379,48.517298,29.140429,0.298412,99.415378,0.994723,0.0,,
9,CT,2,97,247,50.538905,29.313202,0.118877,99.460324,0.983806,0.0,,
