In [1]:
#Getting dataframe setup
import pandas as pd
import numpy as np

#Causal inference library
from dowhy import CausalModel
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#Getting libraries for Logistic Regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

#testing effect modification 
import statsmodels.api as sm
import statsmodels.formula.api as smf

#for modelling 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Load dataframe
df = pd.read_csv('./out/df.csv')

In [3]:
#Add kill participation 
df['adc_killpart'] = ((df['adc_kills'] + df['adc_assists']) / df['kills']) 
df['jng_killpart'] = ((df['jng_kills'] + df['jng_assists']) / df['kills']) 

#add pre 10 kill participation for jng
#df['jng_killpartat10'] = ((df['jng_killsat10']+df['jng_assistsat10']) / df['killsat10'])

In [4]:
df = df.dropna()

In [5]:
#Define performance features for both roles 
#mb add adc_totaldamage
#for now add only killpart and maybe show trough other stuff that kills also should be add idk
#For adc adc_deaths was added looking at TP vs FP
#used tp fp analysis with model ['adc_killpart', 'adc_dpm', 'adc_cspm', 'adc_damagetakenperminute','adc_deaths' to add earnedgold -_> 0.77... -> 0.81
#remove killpart as it doesnt increase probability
adc_performance_features = ['adc_dpm', 'adc_cspm', 'adc_damagetakenperminute','adc_deaths', 'adc_earnedgold']
jng_performance_features = ['jng_killpart', 'jng_cspm', 'jng_dpm', 'jng_deaths', 'jng_assists'] 
#assists and deaths for jungle were added using the distribution analysis of TP and FP while earned gold was added using TN and FN

In [6]:
#use pearsoin correlation of variables to avoid double counting in the performance index
#use this maybe as defense 
corr_matrix = df[adc_performance_features].corr(method='pearson')
print(corr_matrix)

                           adc_dpm  adc_cspm  adc_damagetakenperminute  \
adc_dpm                   1.000000  0.145925                  0.298941   
adc_cspm                  0.145925  1.000000                 -0.136674   
adc_damagetakenperminute  0.298941 -0.136674                  1.000000   
adc_deaths               -0.017151 -0.309463                  0.552302   
adc_earnedgold            0.550871  0.464974                  0.041858   

                          adc_deaths  adc_earnedgold  
adc_dpm                    -0.017151        0.550871  
adc_cspm                   -0.309463        0.464974  
adc_damagetakenperminute    0.552302        0.041858  
adc_deaths                  1.000000       -0.197960  
adc_earnedgold             -0.197960        1.000000  


In [7]:
#Define the causal graph
#TODO: Draw the proper graph up again please so I can be sure of results
causal_graph = '''
digraph {
    opp_rating_before -> win_prob;
    opp_rating_before -> rating_after;
    rating_before -> win_prob;
    rating_before -> rating_after;
    side_adv -> win_prob;
    win_prob -> result;
    win_prob -> golddiffat15;
    win_prob -> gamelength;
    visionscore -> gamelength;
    visionscore -> kills;
    kills -> golddiffat15;
    kills -> jng_killpart;
    kills -> adc_killpart;
    golddiffat15 -> result;
    adc_dpm -> adc_kills;
    adc_dpm -> adc_assists;
    adc_dpm -> result;
    jng_dpm -> jng_kills;
    jng_dpm -> jng_assists;
    adc_kills -> kills;
    adc_kills -> adc_killpart;
    adc_assists -> adc_killpart;
    jng_kills -> kills; 
    jng_kills -> jng_killpart;
    jng_assists -> jng_killpart;
    adc_killpart -> result;
    jng_killpart -> result;
    result -> rating_after;
    adc_cspm -> golddiffat15;
    adc_cspm -> result;
    jng_cspm -> golddiffat15;
    jng_cspm -> result;
    adc_deaths -> result;
    jng_deaths -> result;
    adc_damagetakenperminute -> adc_deaths;
}
'''

causal_graph_perf_ind = '''
digraph {
    opp_rating_before -> win_prob;
    opp_rating_before -> rating_after;
    rating_before -> win_prob;
    rating_before -> rating_after;
    side_adv -> win_prob;
    win_prob -> result;
    win_prob -> golddiffat15;
    win_prob -> gamelength;
    visionscore -> gamelength;
    visionscore -> kills;
    kills -> golddiffat15;
    kills -> jng_killpart;
    kills -> adc_killpart;
    golddiffat15 -> result;
    adc_dpm -> adc_kills;
    adc_dpm -> adc_assists;
    jng_dpm -> jng_kills;
    jng_dpm -> jng_assists;
    adc_kills -> kills;
    adc_kills -> adc_killpart;
    adc_assists -> adc_killpart;
    jng_kills -> kills; 
    jng_kills -> jng_killpart;
    jng_assists -> jng_killpart;
    result -> rating_after;
    adc_cspm -> golddiffat15;
    jng_cspm -> golddiffat15;
    adc_damagetakenperminute -> adc_deaths;

    opp_rating_before -> adc_performance_index_std;
    rating_before -> adc_performance_index_std;
    win_prob -> adc_performance_index_std;
    side_adv -> adc_performance_index_std;

    opp_rating_before -> jng_performance_index_std;
    rating_before -> jng_performance_index_std;
    win_prob -> jng_performance_index_std;
    side_adv -> jng_performance_index_std;

    adc_performance_index_std -> adc_kills;
    adc_performance_index_std -> adc_assists;
    adc_performance_index_std -> adc_deaths;
    adc_performance_index_std -> adc_dpm;
    adc_performance_index_std -> adc_killpart;

    jng_performance_index_std -> jng_kills;
    jng_performance_index_std -> jng_assists;
    jng_performance_index_std -> jng_deaths;
    jng_performance_index_std -> jng_dpm;
    jng_performance_index_std -> jng_killpart;

    adc_performance_index_std -> result;
    jng_performance_index_std -> result;
}
'''

In [8]:
def causal_inference(treatment, outcome, method_name = 'backdoor.linear_regression', data = df):
    #Create causal model
    model = CausalModel(
        data = data,
        treatment = treatment,
        outcome = outcome,
        graph = causal_graph_perf_ind
    )

    #Identify causal effects
    identified_estimand = model.identify_effect()

    #Estimate the causal effect using backdoor adjustment with linearregression
    estimate = model.estimate_effect(
        identified_estimand,
        method_name = method_name
    )
    
    refute_results = None
    #refute_results = model.refute_estimate(identified_estimand, estimate, method_name='random_common_cause')

    return estimate, refute_results

In [9]:
'''for feature in adc_performance_features:
    estimate, _ = causal_inference(feature, 'result')
    print(f'Estimate for the causal effect of {feature} on the result: {estimate.value}')
    
for feature in jng_performance_features:
    estimate, _ = causal_inference(feature, 'result')
    print(f'Estimate for the causal effect of {feature} on the result: {estimate.value}')
'''

"for feature in adc_performance_features:\n    estimate, _ = causal_inference(feature, 'result')\n    print(f'Estimate for the causal effect of {feature} on the result: {estimate.value}')\n    \nfor feature in jng_performance_features:\n    estimate, _ = causal_inference(feature, 'result')\n    print(f'Estimate for the causal effect of {feature} on the result: {estimate.value}')\n"

Estimate for the causal effect of adc_killpart on the result: 0.1600550424721764
Estimate for the causal effect of adc_dpm on the result: 0.0002592780663185912
Estimate for the causal effect of adc_cspm on the result: 0.006768097456448019
Estimate for the causal effect of adc_damagetakenperminute on the result: -0.0007198617017317721
Estimate for the causal effect of adc_deaths on the result: -0.1134743559687198
Estimate for the causal effect of jng_killpart on the result: 0.005632731760707443
Estimate for the causal effect of jng_cspm on the result: 0.016706927469950283
Estimate for the causal effect of jng_dpm on the result: 0.0003182545692026473
Estimate for the causal effect of jng_deaths on the result: -0.10252868545166982
Estimate for the causal effect of jng_assists on the result: 0.03617036774012511

In [10]:
#We want to add a model for each set of performance variables which tries to predict outcomes for both games
#Model using the adc performance features
X_adc = df[adc_performance_features]
y = df['result']    #same for adc and jng

#Create the train and test split with size of test set being 0.3
X_adc_train, X_adc_test, y_adc_train, y_adc_test = train_test_split(X_adc, y, random_state=42, test_size=0.3)

#Scale data
scaler = StandardScaler()
X_adc_train_scaled = scaler.fit_transform(X_adc_train)
X_adc_test_scaled = scaler.fit_transform(X_adc_test)

log_reg_adc = LogisticRegression()
#Train model on the training data
log_reg_adc.fit(X_adc_train_scaled, y_adc_train)

#Predict using the testing data
y_adc_pred = log_reg_adc.predict(X_adc_test_scaled)
y_adc_prob = log_reg_adc.predict_proba(X_adc_test_scaled)[:,1]

#Print results
print(f'Accuracy:{accuracy_score(y_adc_test, y_adc_pred)}')
print("AUC:", roc_auc_score(y_adc_test, y_adc_prob))
print("Classification Report:\n", classification_report(y_adc_test, y_adc_pred))

Accuracy:0.8110632183908046
AUC: 0.8804574733194628
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.80      0.81     20772
           1       0.80      0.83      0.81     20988

    accuracy                           0.81     41760
   macro avg       0.81      0.81      0.81     41760
weighted avg       0.81      0.81      0.81     41760



In [11]:
#define function to print out coefficient matrix of a give logistic regression model
def log_reg_coefficient_summary(log_reg, features):
    coef_matrix = log_reg.coef_
    constant_coef = log_reg.intercept_[0]
    print("\nLogistic Regression Coefficients\n")

    print(f"{'Term':<30} {'Value':>12}")
    print("-" * 45)

    print(f"{'Intercept (β₀)':<30} {constant_coef:>12.6f}")

    for name, coef in zip(features, coef_matrix[0]):
        print(f"{name:<30} {coef:>12.6f}")

log_reg_coefficient_summary(log_reg_adc, adc_performance_features)


Logistic Regression Coefficients

Term                                  Value
---------------------------------------------
Intercept (β₀)                    -0.070335
adc_dpm                            0.337668
adc_cspm                          -0.704063
adc_damagetakenperminute          -0.165350
adc_deaths                        -1.523100
adc_earnedgold                     1.288004


In [12]:
#Create a results dataframe 
full_test_adc = df.loc[X_adc_test.index].copy()

# Add the model predictions
full_test_adc['predicted_result_adc'] = y_adc_pred
full_test_adc['predicted_prob_adc'] = y_adc_prob

#create tp, fp, tn, fn
tp_adc = full_test_adc[
    (full_test_adc['result'] == 1) &
    (full_test_adc['predicted_result_adc'] == 1)
]

fp_adc = full_test_adc[
    (full_test_adc['result'] == 0) &
    (full_test_adc['predicted_result_adc'] == 1)
]

tn_adc = full_test_adc[
    (full_test_adc['result'] == 0) &
    (full_test_adc['predicted_result_adc'] == 0)
]

fn_adc = full_test_adc[
    (full_test_adc['result'] == 1) &
    (full_test_adc['predicted_result_adc'] == 0)
]

#filter for features to look at 
numeric_features_adc = full_test_adc.select_dtypes(include=['number']).columns
other_features_adc = [f for f in numeric_features_adc if f not in adc_performance_features + ['result']]

In [13]:
#Model other features as distribution of tp vs fp

'''for feature in other_features_adc:
    plt.figure(figsize=(8,4))
    sns.kdeplot(tp_adc[feature], label="TP", fill=True)
    sns.kdeplot(fp_adc[feature], label="FP", fill=True)
    plt.title(f"Distribution of {feature}")
    plt.legend()
    plt.show()

    #from this the most noticable is definetely the adc_deaths as a strong indicator increasing accuracy by around 9% 
    #have closer look at FN and TN again
    #also look at the papers '''

'for feature in other_features_adc:\n    plt.figure(figsize=(8,4))\n    sns.kdeplot(tp_adc[feature], label="TP", fill=True)\n    sns.kdeplot(fp_adc[feature], label="FP", fill=True)\n    plt.title(f"Distribution of {feature}")\n    plt.legend()\n    plt.show()\n\n    #from this the most noticable is definetely the adc_deaths as a strong indicator increasing accuracy by around 9% \n    #have closer look at FN and TN again\n    #also look at the papers '

In [14]:
#Now look at the same comparison but between FN and TN
'''for feature in other_features_adc:
    plt.figure(figsize=(8,4))
    sns.kdeplot(tn_adc[feature], label="tn", fill=True)
    sns.kdeplot(fn_adc[feature], label="fn", fill=True)
    plt.title(f"Distribution of {feature}")
    plt.legend()
    plt.show()'''

'for feature in other_features_adc:\n    plt.figure(figsize=(8,4))\n    sns.kdeplot(tn_adc[feature], label="tn", fill=True)\n    sns.kdeplot(fn_adc[feature], label="fn", fill=True)\n    plt.title(f"Distribution of {feature}")\n    plt.legend()\n    plt.show()'

In [15]:
#Run same logistic regression but with jungle 
#Model using the jng performance features
X_jng = df[jng_performance_features]
y = df['result']    #y values are same for jungle and adc

#Create the train and test split with size of test set being 0.3
X_jng_train, X_jng_test, y_jng_train, y_jng_test = train_test_split(X_jng, y, random_state=42, test_size=0.3)

#Scale data
scaler = StandardScaler()
X_jng_train_scaled = scaler.fit_transform(X_jng_train)
X_jng_test_scaled = scaler.fit_transform(X_jng_test)

log_reg_jng = LogisticRegression()
#Train model on the training data
log_reg_jng.fit(X_jng_train_scaled, y_jng_train)

#Predict using the testing data
y_jng_pred = log_reg_jng.predict(X_jng_test_scaled)
y_jng_prob = log_reg_jng.predict_proba(X_jng_test_scaled)[:,1]

#Print results
print(f'Accuracy:{accuracy_score(y_jng_test, y_jng_pred)}')
print("AUC:", roc_auc_score(y_jng_test, y_jng_prob))
print("Classification Report:\n", classification_report(y_jng_test, y_jng_pred))
log_reg_coefficient_summary(log_reg_jng, jng_performance_features)

Accuracy:0.8833333333333333
AUC: 0.9501222921034242
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88     20772
           1       0.88      0.89      0.88     20988

    accuracy                           0.88     41760
   macro avg       0.88      0.88      0.88     41760
weighted avg       0.88      0.88      0.88     41760


Logistic Regression Coefficients

Term                                  Value
---------------------------------------------
Intercept (β₀)                     0.006484
jng_killpart                      -1.032801
jng_cspm                           0.279170
jng_dpm                            0.301663
jng_deaths                        -2.071417
jng_assists                        2.538956


In [16]:
#Create a results dataframe 
full_test_jng = df.loc[X_jng_test.index].copy()

# Add the model predictions
full_test_jng['predicted_result_jng'] = y_jng_pred
full_test_jng['predicted_prob_jng'] = y_jng_prob

#create tp, fp, tn, fn
tp_jng = full_test_jng[
    (full_test_jng['result'] == 1) &
    (full_test_jng['predicted_result_jng'] == 1)
]

fp_jng = full_test_jng[
    (full_test_jng['result'] == 0) &
    (full_test_jng['predicted_result_jng'] == 1)
]

tn_jng = full_test_jng[
    (full_test_jng['result'] == 0) &
    (full_test_jng['predicted_result_jng'] == 0)
]

fn_jng = full_test_jng[
    (full_test_jng['result'] == 1) &
    (full_test_jng['predicted_result_jng'] == 0)
]

#filter for features to look at 
numeric_features_jng = full_test_jng.select_dtypes(include=['number']).columns
other_features_jng = [f for f in numeric_features_jng if f not in jng_performance_features + ['result']+ adc_performance_features]

In [17]:
#Look at what are the major differences by looking at the distributions of correct win pred and wrong win pred 
'''for feature in other_features_jng:
    plt.figure(figsize=(8,4))
    sns.kdeplot(tp_jng[feature], label="TP", fill=True)
    sns.kdeplot(fp_jng[feature], label="FP", fill=True)
    plt.title(f"Distribution of {feature}")
    plt.legend()
    plt.show()'''

'for feature in other_features_jng:\n    plt.figure(figsize=(8,4))\n    sns.kdeplot(tp_jng[feature], label="TP", fill=True)\n    sns.kdeplot(fp_jng[feature], label="FP", fill=True)\n    plt.title(f"Distribution of {feature}")\n    plt.legend()\n    plt.show()'

In [18]:
'''for feature in other_features_jng:
    plt.figure(figsize=(8,4))
    sns.kdeplot(tn_jng[feature], label="TN", fill=True)
    sns.kdeplot(fn_jng[feature], label="FN", fill=True)
    plt.title(f"Distribution of {feature}")
    plt.legend()
    plt.show()'''

'for feature in other_features_jng:\n    plt.figure(figsize=(8,4))\n    sns.kdeplot(tn_jng[feature], label="TN", fill=True)\n    sns.kdeplot(fn_jng[feature], label="FN", fill=True)\n    plt.title(f"Distribution of {feature}")\n    plt.legend()\n    plt.show()'

In [19]:
model_base = smf.logit(
    'result ~ Q("total cs")',
    data=df
).fit()

print(model_base.summary())


Optimization terminated successfully.
         Current function value: 0.684605
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:                 result   No. Observations:               139197
Model:                          Logit   Df Residuals:                   139195
Method:                           MLE   Df Model:                            1
Date:                Thu, 15 Jan 2026   Pseudo R-squ.:                 0.01232
Time:                        13:51:43   Log-Likelihood:                -95295.
converged:                       True   LL-Null:                       -96483.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        -1.4247      0.030    -46.923      0.000      -1.484      -1.365
Q("total cs")     0.

In [None]:
model_adc = smf.logit(
    "result ~ adc_golddiffat15 + jng_golddiffat15",
    data=df
).fit()

print(model_adc.summary())
#kind of implies that cs is stronger performance indicator for jungle than for adc 
#adc_total_cs    1.000581
#jng_total_cs    1.007934
#adc_kills    1.497634
#jng_kills    1.358577
import numpy as np

np.exp(model_adc.params)

Optimization terminated successfully.
         Current function value: 0.592880
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                 result   No. Observations:               139197
Model:                          Logit   Df Residuals:                   139194
Method:                           MLE   Df Model:                            2
Date:                Thu, 15 Jan 2026   Pseudo R-squ.:                  0.1446
Time:                        13:51:44   Log-Likelihood:                -82527.
converged:                       True   LL-Null:                       -96483.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            0.0054      0.006      0.905      0.365      -0.006       0.017
adc_golddif

Intercept           1.005386
adc_golddiffat15    1.000670
jng_golddiffat15    1.000809
dtype: float64

In [21]:
'''#Try to create latent variable 
#using or atleast assuming latent variable model  

def create_perf_ind(features, log_reg: LogisticRegression, name):
    scaler = StandardScaler()
    Z = scaler.fit_transform(df[features])

    #get the weights based on the logistic regression object
    weights = log_reg.coef_[0]

    df[name] = np.dot(Z,weights)
    df[name + '_std'] = (df[name] - df[name].mean()) / df[name].std()

#create adc and jng performance index
create_perf_ind(adc_performance_features, log_reg_adc, 'adc_performance_index')
create_perf_ind(jng_performance_features, log_reg_jng, 'jng_performance_index')

print(df[['adc_performance_index_std', 'jng_performance_index_std']]) '''

"#Try to create latent variable \n#using or atleast assuming latent variable model  \n\ndef create_perf_ind(features, log_reg: LogisticRegression, name):\n    scaler = StandardScaler()\n    Z = scaler.fit_transform(df[features])\n\n    #get the weights based on the logistic regression object\n    weights = log_reg.coef_[0]\n\n    df[name] = np.dot(Z,weights)\n    df[name + '_std'] = (df[name] - df[name].mean()) / df[name].std()\n\n#create adc and jng performance index\ncreate_perf_ind(adc_performance_features, log_reg_adc, 'adc_performance_index')\ncreate_perf_ind(jng_performance_features, log_reg_jng, 'jng_performance_index')\n\nprint(df[['adc_performance_index_std', 'jng_performance_index_std']]) "

In [22]:
'''estimate, _ = causal_inference('jng_performance_index_std', 'result')
print(f'Estimate for the causal effect of jng_performance_index_std on the result: {estimate.value}')

estimate, _ = causal_inference('adc_performance_index_std', 'result')
print(f'Estimate for the causal effect of adc_performance_index_std on the result: {estimate.value}')'''

"estimate, _ = causal_inference('jng_performance_index_std', 'result')\nprint(f'Estimate for the causal effect of jng_performance_index_std on the result: {estimate.value}')\n\nestimate, _ = causal_inference('adc_performance_index_std', 'result')\nprint(f'Estimate for the causal effect of adc_performance_index_std on the result: {estimate.value}')"

In [28]:
from semopy import Model

df['adc_deaths_rev'] = -df['adc_deaths']
df['adc_damagetakenperminute_rev'] = -df['adc_damagetakenperminute']

#Define CFA model
model_desc = '''
ADC_Performance =~ adc_dpm + adc_cspm + adc_damagetakenperminute_rev + adc_deaths_rev + adc_earnedgold
'''

#Estimate both the factor loadings for each indicator and also the measurement errors by fitting a model to the data
model_adc = Model(model_desc)
model_adc.fit(df)

#Estimate factor scores using MAP
factor_scores = model_adc.predict_factors(df)

factor_scores['ADC_Performance_std'] = (
    factor_scores['ADC_Performance'] - factor_scores['ADC_Performance'].mean()
) / factor_scores['ADC_Performance'].std()

print(factor_scores['ADC_Performance_std'])

factor_scores = factor_scores.reset_index(drop=True)
df = df.reset_index(drop=True)

df['adc_performance_index_std'] = factor_scores['ADC_Performance_std']

0        -0.273911
1        -1.753341
2        -0.130052
3        -0.623404
4        -0.810846
            ...   
139192    1.708834
139193    0.634976
139194    1.538290
139195    2.164805
139196    1.153990
Name: ADC_Performance_std, Length: 139197, dtype: float64


In [None]:

#jng_performance_features = ['jng_killpart', 'jng_cspm', 'jng_dpm', 'jng_deaths', 'jng_assists'] 

df['jng_deaths_rev'] = -df['jng_deaths']

#measurement model e.g. CFA model
model_desc_jng = '''
JNG_Performance =~ jng_killpart + jng_cspm + jng_dpm + jng_deaths + jng_assists
'''

model_jng = Model(model_desc_jng)
model_jng.fit(df)

factor_scores = model_jng.predict_factors(df)

factor_scores['JNG_Performance_std'] = (
    factor_scores['JNG_Performance'] - factor_scores['JNG_Performance'].mean()
) / factor_scores['JNG_Performance'].std()

print(model_jng.inspect())

factor_scores = factor_scores.reset_index(drop=True)
df = df.reset_index(drop=True)

df['jng_performance_index_std'] = factor_scores['JNG_Performance_std']

0         0.465460
1        -0.390623
2         1.156382
3        -0.152914
4        -0.660452
            ...   
139192    0.289807
139193    0.597777
139194    0.175794
139195   -0.235345
139196    0.869370
Name: JNG_Performance_std, Length: 139197, dtype: float64
               lval  op             rval      Estimate   Std. Err     z-value  \
0      jng_killpart   ~  JNG_Performance      1.000000          -           -   
1          jng_cspm   ~  JNG_Performance      1.658021   0.046147    35.92898   
2           jng_dpm   ~  JNG_Performance    150.719974   4.545619   33.157196   
3        jng_deaths   ~  JNG_Performance     -4.299091   0.083972  -51.196798   
4       jng_assists   ~  JNG_Performance     24.060488   0.440356   54.638729   
5   JNG_Performance  ~~  JNG_Performance      0.011209   0.000225   49.806033   
6       jng_assists  ~~      jng_assists     13.013608   0.128476  101.292358   
7          jng_cspm  ~~         jng_cspm      1.470994   0.005684  258.817725   
8   

In [25]:
print(len(factor_scores))
print(len(df))
print(df.isnull().sum())

139197
139197
gameid                          0
teamid                          0
result                          0
side                            0
kills                           0
                               ..
adc_deaths_rev                  0
adc_damagetakenperminute_rev    0
adc_performance_index_std       0
jng_deaths_rev                  0
jng_performance_index_std       0
Length: 71, dtype: int64


In [26]:
estimate, _ = causal_inference('jng_performance_index_std', 'result', data=df)
print(f'Estimate for the causal effect of jng_performance_index_std on the result: {estimate.value}')

estimate, _ = causal_inference('adc_performance_index_std', 'result', data=df)
print(f'Estimate for the causal effect of adc_performance_index_std on the result: {estimate.value}')

Estimate for the causal effect of jng_performance_index_std on the result: 0.19494863576654764
Estimate for the causal effect of adc_performance_index_std on the result: 0.12346041371413508
