# Purpose of notebook
This notebook is explore the win/loss classification problem. The goal of this analysis is to identify what attributes are good at predicting match outcomes, in different senarios. 

Topics covered:
- basic decision tree model
    - Data set broken up by spike planted vs not spike planted

## Additional background / hypothesis
* To win a match of valorant, you need to win 13 rounds (5 if swiftplay)
    - To win a round, how you win is dependent on if you are attacker or defender
        - If attacker, you win by either planting the spike and it detonates, or you eliminate all opponents
        - If defender, you win by either deativating a planted spike, eliminating all opponents, or just suriving without the spike being planted


* Hypothesis is depending on if you are attacker or defender, and if the spike has been planted or not, your strategy will change
    - More specifically, if you are attacker what will help you win will probably be elims, not dying, spike planting
    - If you are defender what will help you win is not dying, deacitivaitng spike
    - Depending on what is happning in the match (most obviously pre spike and post spike) the things you should focus on to win will change
    - Ideas: 
        - if you are an attacker and the spike is not planted, at least 2? or more of your teammates should focus on planting the spike (depending on your role!!)
        - if you are an attacker and the spike is planted, all teammates should focus on defending the spike and killing the other team
        - if you are a defender and the spike is not planted, the team should focus on kills
        - if you are a defender and the spike is planted, the team should focus on kills/deativating the spike
        - also just stay alive
    
### Questions:
-


In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import tree
import matplotlib.pyplot as plt

## Loading the data

In [2]:
import pandas as pd

# Read the CSV files into separate DataFrames
df = pd.read_csv('balanced_onehot.csv')

In [3]:
# Find rows with NaN values
nan_rows = df[df.isnull().any(axis=1)]

# Print the rows with NaN values
nan_rows

Unnamed: 0,user_id,round_info_round_end,round_info_round_won,round_info_round_start,ally4_elims,ally4_deaths,ally4_assists,ally4_headshots,ally4_wallbangs,ally4_first_bloods,...,self_longest_gun_primary_spectre,self_longest_gun_primary_stinger,self_longest_gun_primary_vandal,self_longest_gun_secondary_classic,self_longest_gun_secondary_frenzy,self_longest_gun_secondary_ghost,self_longest_gun_secondary_none,self_longest_gun_secondary_sheriff,self_longest_gun_secondary_shorty,round_info_round_length


In [14]:
df.iloc[2][:50]

user_id                       63.000000
round_info_round_end     1647064.000000
round_info_round_won           0.000000
round_info_round_start   1611891.000000
ally4_elims                    0.000000
ally4_deaths                   1.000000
ally4_assists                  0.000000
ally4_headshots                0.000000
ally4_wallbangs                0.000000
ally4_first_bloods             0.000000
self_elims                     0.000000
self_deaths                    1.000000
self_assists                   0.000000
self_headshots                 0.000000
self_wallbangs                 0.000000
self_first_bloods              0.000000
ally1_elims                    0.000000
ally1_deaths                   1.000000
ally1_assists                  0.000000
ally1_headshots                0.000000
ally1_wallbangs                0.000000
ally1_first_bloods             0.000000
ally2_elims                    1.000000
ally2_deaths                   1.000000
ally2_assists                  0.000000


In [15]:
df.iloc[2][50:100]

opponent2_wallbangs                0.000000
opponent2_first_bloods             1.000000
opponent3_elims                    0.000000
opponent3_deaths                   1.000000
opponent3_assists                  2.000000
opponent3_headshots                0.000000
opponent3_wallbangs                0.000000
opponent3_first_bloods             0.000000
opponent4_elims                    1.000000
opponent4_deaths                   0.000000
opponent4_assists                  0.000000
opponent4_headshots                0.000000
opponent4_wallbangs                0.000000
opponent4_first_bloods             0.000000
ally4_avg_health                  91.555556
self_avg_health                   64.500000
ally1_avg_health                  94.000000
ally2_avg_health                  87.428571
ally3_avg_health                  80.400000
self_avg_shield                   10.000000
self_avg_credits                 165.000000
self_avg_ammo_mag                 11.000000
ally4_ultimate_usage            

## splitting

In [None]:
def calculate_cumulative_features(df, start_time, end_time):
    # Filter dataframe to only include events between start_time and end_time
    df_time_filtered = df[(df['round_info_round_start'] >= start_time) & (df['round_info_round_end'] <= end_time)]

    # Calculate cumulative features
    cumulative_damage = df_time_filtered['damage'].sum()
    cumulative_eliminations = df_time_filtered['eliminations'].sum()

    # Create a dataframe to hold these features
    df_cumulative_features = pd.DataFrame({
        'cumulative_damage': [cumulative_damage],
        'cumulative_eliminations': [cumulative_eliminations],
    })

    return df_cumulative_features

In [None]:
calculate_cumulative_features(df, start_time, end_time)

In [4]:
df_pre_spike = pd.DataFrame()
df_post_spike = pd.DataFrame()

for round_idx in range(len(df)):
    round_data = df.iloc[round_idx]

    if round_data['spike_planted']:
        spike_time = round_data['spike_time']

        pre_spike_data = df.iloc[:round_idx+1]
        post_spike_data = df.iloc[round_idx+1:]

        pre_spike_features = calculate_cumulative_features(pre_spike_data, start_time, spike_time)
        post_spike_features = calculate_cumulative_features(post_spike_data, spike_time, end_time)

        df_pre_spike = df_pre_spike.append(pre_spike_features, ignore_index=True)
        df_post_spike = df_post_spike.append(post_spike_features, ignore_index=True)



user_id
round_info_round_end
round_info_round_won
round_info_round_start
ally4_elims
ally4_deaths
ally4_assists
ally4_headshots
ally4_wallbangs
ally4_first_bloods
self_elims
self_deaths
self_assists
self_headshots
self_wallbangs
self_first_bloods
ally1_elims
ally1_deaths
ally1_assists
ally1_headshots
ally1_wallbangs
ally1_first_bloods
ally2_elims
ally2_deaths
ally2_assists
ally2_headshots
ally2_wallbangs
ally2_first_bloods
ally3_elims
ally3_deaths
ally3_assists
ally3_headshots
ally3_wallbangs
ally3_first_bloods
opponent0_elims
opponent0_deaths
opponent0_assists
opponent0_headshots
opponent0_wallbangs
opponent0_first_bloods
opponent1_elims
opponent1_deaths
opponent1_assists
opponent1_headshots
opponent1_wallbangs
opponent1_first_bloods
opponent2_elims
opponent2_deaths
opponent2_assists
opponent2_headshots
opponent2_wallbangs
opponent2_first_bloods
opponent3_elims
opponent3_deaths
opponent3_assists
opponent3_headshots
opponent3_wallbangs
opponent3_first_bloods
opponent4_elims
opponent4_d

## Decision Tree Classifier

### All data

In [28]:
# train test split
X = downsampled_df.drop('round_info_round_won', axis=1)
y = downsampled_df['round_info_round_won']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [49]:
# Create a DecisionTreeClassifier object
dtc = DecisionTreeClassifier(random_state=1)

# Train the model using the training sets
dtc.fit(X_train_scaled, y_train)

# Predict the response for test dataset
y_pred = dtc.predict(X_test_scaled)


y_pred_train = dtc.predict(X_train_scaled)
# Model Accuracy
print("Train Accuracy:", accuracy_score(y_train, y_pred_train))

# Confusion Matrix
print(confusion_matrix(y_train, y_pred_train))

# Classification Report
print(classification_report(y_test, y_pred))

# Model Accuracy
print("Test Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
print(confusion_matrix(y_test, y_pred))

# Classification Report
print(classification_report(y_test, y_pred))

Train Accuracy: 1.0
[[11206     0]
 [    0 11069]]
              precision    recall  f1-score   support

           0       0.94      0.94      0.94      2716
           1       0.94      0.94      0.94      2853

    accuracy                           0.94      5569
   macro avg       0.94      0.94      0.94      5569
weighted avg       0.94      0.94      0.94      5569

Test Accuracy: 0.9389477464535824
[[2549  167]
 [ 173 2680]]
              precision    recall  f1-score   support

           0       0.94      0.94      0.94      2716
           1       0.94      0.94      0.94      2853

    accuracy                           0.94      5569
   macro avg       0.94      0.94      0.94      5569
weighted avg       0.94      0.94      0.94      5569



In [50]:
feature_importances = pd.DataFrame(dtc.feature_importances_,
                                   index = X_train.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)


In [51]:
feature_importances[:50]

Unnamed: 0,importance
self_deaths,0.290048
ally1_deaths,0.128782
ally4_deaths,0.062055
opponent0_deaths,0.059486
opponent1_deaths,0.055189
self_avg_health,0.053299
ally2_deaths,0.040181
opponent4_deaths,0.031365
opponent2_deaths,0.025491
ally2_elims,0.024368


In [52]:
threshold = 0.01

unimportant_features = feature_importances[feature_importances['importance'] < threshold].index

In [53]:
unimportant_features

Index(['round_info_round_end', 'ally1_avg_health', 'ally3_avg_health',
       'self_avg_shield', 'self_avg_ammo_mag', 'spike_time', 'ally4_elims',
       'round_info_round_start', 'self_total_firing_time', 'self_avg_credits',
       ...
       'opponent7_character_sage', 'ally3_character_viper',
       'opponent5_character_astra', 'opponent5_character_chamber',
       'opponent5_character_cypher', 'self_character_breach',
       'self_character_fade', 'opponent7_character_jett',
       'self_character_harbor', 'ally2_character_sova'],
      dtype='object', length=325)

In [56]:
X_train_reduced = X_train.drop(unimportant_features, axis=1)
X_test_reduced = X_test.drop(unimportant_features, axis=1)

In [54]:
threshold = 0.01

important_features = feature_importances[feature_importances['importance'] > threshold].index

In [55]:
important_features

Index(['self_deaths', 'ally1_deaths', 'ally4_deaths', 'opponent0_deaths',
       'opponent1_deaths', 'self_avg_health', 'ally2_deaths',
       'opponent4_deaths', 'opponent2_deaths', 'ally2_elims', 'ally3_deaths',
       'ally3_elims', 'opponent3_deaths', 'round_info_round_length'],
      dtype='object')

In [58]:
# Train the model using the training sets
dtc.fit(X_train_reduced, y_train)

# Predict the response for test dataset
y_pred = dtc.predict(X_test_reduced)


y_pred_train = dtc.predict(X_train_reduced)

# Model Accuracy
print("Train Accuracy:", accuracy_score(y_train, y_pred_train))

# Confusion Matrix
print(confusion_matrix(y_train, y_pred_train))

# Classification Report
print(classification_report(y_test, y_pred))

# Model Accuracy
print("Test Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
print(confusion_matrix(y_test, y_pred))

# Classification Report
print(classification_report(y_test, y_pred))

Train Accuracy: 1.0
[[11206     0]
 [    0 11069]]
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2716
           1       0.95      0.95      0.95      2853

    accuracy                           0.95      5569
   macro avg       0.95      0.95      0.95      5569
weighted avg       0.95      0.95      0.95      5569

Test Accuracy: 0.949901239001616
[[2579  137]
 [ 142 2711]]
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2716
           1       0.95      0.95      0.95      2853

    accuracy                           0.95      5569
   macro avg       0.95      0.95      0.95      5569
weighted avg       0.95      0.95      0.95      5569



In [59]:
feature_importances = pd.DataFrame(dtc.feature_importances_,
                                   index = X_train_reduced.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)

feature_importances

Unnamed: 0,importance
self_deaths,0.292928
ally1_deaths,0.132278
self_avg_health,0.078032
round_info_round_length,0.077378
ally4_deaths,0.069674
opponent0_deaths,0.06349
opponent1_deaths,0.059705
ally2_deaths,0.045105
opponent4_deaths,0.037354
ally2_elims,0.03463


In [65]:
fig, ax = plt.subplots(figsize=(20, 20)) 
tree.plot_tree(dtc, 
               feature_names=X_test_reduced.columns, 
               class_names=['Loss', 'Win'], 
               filled=True,
               rounded=True,
               ax=ax)

plt.show()

KeyboardInterrupt: 

## Attacker/defender
- unfortunatly some rows are unknown so this is a slightly smaller dataset (4k rows were unknown)

In [67]:
downsampled_df

Unnamed: 0,user_id,round_info_round_end,round_info_round_won,round_info_round_start,ally4_elims,ally4_deaths,ally4_assists,ally4_headshots,ally4_wallbangs,ally4_first_bloods,...,self_longest_gun_primary_spectre,self_longest_gun_primary_stinger,self_longest_gun_primary_vandal,self_longest_gun_secondary_classic,self_longest_gun_secondary_frenzy,self_longest_gun_secondary_ghost,self_longest_gun_secondary_none,self_longest_gun_secondary_sheriff,self_longest_gun_secondary_shorty,round_info_round_length
0,69,169029,0,127194,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,1,0,0,0,0,0,41835
1,65,1399018,1,1357408,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,41610
2,63,1647064,0,1611891,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,35173
3,71,1724602,1,1659100,0.0,1.0,1.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,65502
4,74,1502801,1,1471811,1.0,1.0,0.0,1.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,30990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27839,63,2226164,0,2128770,1.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,97394
27840,3804,1900614,1,1841602,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,59012
27841,69,410603,1,355602,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,55001
27842,997,711200,0,645601,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,65599


In [70]:
attack_df = downsampled_df[downsampled_df['round_info_ally_side_attacker'] == 1]
attack_df

Unnamed: 0,user_id,round_info_round_end,round_info_round_won,round_info_round_start,ally4_elims,ally4_deaths,ally4_assists,ally4_headshots,ally4_wallbangs,ally4_first_bloods,...,self_longest_gun_primary_spectre,self_longest_gun_primary_stinger,self_longest_gun_primary_vandal,self_longest_gun_secondary_classic,self_longest_gun_secondary_frenzy,self_longest_gun_secondary_ghost,self_longest_gun_secondary_none,self_longest_gun_secondary_sheriff,self_longest_gun_secondary_shorty,round_info_round_length
0,69,169029,0,127194,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,1,0,0,0,0,0,41835
2,63,1647064,0,1611891,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,35173
3,71,1724602,1,1659100,0.0,1.0,1.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,65502
5,3806,1559606,1,1495813,0.0,1.0,2.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,63793
11,57,144601,1,47599,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,97002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27829,3806,162003,0,111608,3.0,1.0,0.0,3.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,50395
27831,1012,1193195,0,1131000,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,1,1,0,0,0,0,0,62195
27837,61,112403,1,47800,2.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,64603
27838,69,1047795,1,964446,1.0,1.0,0.0,1.0,0.0,0.0,...,1,0,0,0,0,0,1,0,0,83349


In [71]:
defend_df = downsampled_df[downsampled_df['round_info_ally_side_defender'] == 1]
defend_df

Unnamed: 0,user_id,round_info_round_end,round_info_round_won,round_info_round_start,ally4_elims,ally4_deaths,ally4_assists,ally4_headshots,ally4_wallbangs,ally4_first_bloods,...,self_longest_gun_primary_spectre,self_longest_gun_primary_stinger,self_longest_gun_primary_vandal,self_longest_gun_secondary_classic,self_longest_gun_secondary_frenzy,self_longest_gun_secondary_ghost,self_longest_gun_secondary_none,self_longest_gun_secondary_sheriff,self_longest_gun_secondary_shorty,round_info_round_length
1,65,1399018,1,1357408,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,41610
4,74,1502801,1,1471811,1.0,1.0,0.0,1.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,30990
6,1007,240024,0,206985,1.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,33039
7,80,2926742,0,2856128,1.0,1.0,0.0,1.0,0.0,0.0,...,0,0,1,1,0,0,0,0,0,70614
9,3806,532598,1,493914,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,38684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27833,1001,2128364,1,2084153,2.0,1.0,2.0,2.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,44211
27835,1016,641030,1,589591,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,51439
27836,58,1866667,0,1808809,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,57858
27839,63,2226164,0,2128770,1.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,97394


In [72]:
# train test split
X_attack = attack_df.drop('round_info_round_won', axis=1)
y_attack = attack_df['round_info_round_won']

X_attack_train, X_attack_test, y_attack_train, y_attack_test = train_test_split(X_attack, y_attack, test_size=0.2, random_state=1)

# Scale the data
scaler = StandardScaler()
X_attack_train_scaled = scaler.fit_transform(X_attack_train)
X_attack_test_scaled = scaler.transform(X_attack_test)

In [73]:
# Train the model using the training sets
dtc.fit(X_attack_train, y_attack_train)

# Predict the response for test dataset
y_attack_pred = dtc.predict(X_attack_test)


y_attack_pred_train = dtc.predict(X_attack_train)

# Model Accuracy
print("Train Accuracy:", accuracy_score(y_attack_train, y_attack_pred_train))

# Confusion Matrix
print(confusion_matrix(y_attack_train, y_attack_pred_train))

# Classification Report
print(classification_report(y_attack_test, y_attack_pred))

# Model Accuracy
print("Test Accuracy:", accuracy_score(y_attack_test, y_attack_pred))

# Confusion Matrix
print(confusion_matrix(y_attack_test, y_attack_pred))

# Classification Report
print(classification_report(y_attack_test, y_attack_pred))

Train Accuracy: 1.0
[[4610    0]
 [   0 4583]]
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      1196
           1       0.94      0.94      0.94      1103

    accuracy                           0.94      2299
   macro avg       0.94      0.94      0.94      2299
weighted avg       0.94      0.94      0.94      2299

Test Accuracy: 0.9421487603305785
[[1126   70]
 [  63 1040]]
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      1196
           1       0.94      0.94      0.94      1103

    accuracy                           0.94      2299
   macro avg       0.94      0.94      0.94      2299
weighted avg       0.94      0.94      0.94      2299



In [74]:
feature_importances = pd.DataFrame(dtc.feature_importances_,
                                   index = X_train.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)

In [76]:
feature_importances[:50]

Unnamed: 0,importance
self_deaths,0.297159
ally1_deaths,0.126002
ally2_deaths,0.052805
self_avg_health,0.05079
opponent1_deaths,0.044883
ally4_deaths,0.037215
spike_planted,0.031853
ally2_elims,0.031458
opponent3_deaths,0.02999
ally4_elims,0.029619


In [77]:
threshold = 0.01

unimportant_features = feature_importances[feature_importances['importance'] < threshold].index

In [78]:
unimportant_features

Index(['ally1_avg_health', 'ally3_ultimate_usage', 'user_id',
       'ally2_headshots', 'ally3_elims', 'round_info_round_end',
       'self_avg_ammo_reserve', 'self_movement_metric', 'self_first_bloods',
       'self_avg_credits',
       ...
       'ally3_character_reyna', 'ally3_character_sage',
       'ally4_character_astra', 'map_unknown', 'ally3_character_viper',
       'ally3_character_yoru', 'opponent5_character_astra',
       'opponent5_character_brimstone', 'opponent5_character_chamber',
       'ally2_character_cypher'],
      dtype='object', length=323)

In [79]:
X_attack_train_reduced = X_attack_train.drop(unimportant_features, axis=1)
X_attack_test_reduced = X_attack_test.drop(unimportant_features, axis=1)

In [80]:
# Train the model using the training sets
dtc.fit(X_attack_train_reduced, y_attack_train)

# Predict the response for test dataset
y_attack_pred = dtc.predict(X_attack_test_reduced)


y_attack_pred_train = dtc.predict(X_attack_train_reduced)

# Model Accuracy
print("Train Accuracy:", accuracy_score(y_attack_train, y_attack_pred_train))

# Confusion Matrix
print(confusion_matrix(y_attack_train, y_attack_pred_train))

# Classification Report
print(classification_report(y_attack_test, y_attack_pred))

# Model Accuracy
print("Test Accuracy:", accuracy_score(y_attack_test, y_attack_pred))

# Confusion Matrix
print(confusion_matrix(y_attack_test, y_attack_pred))

# Classification Report
print(classification_report(y_attack_test, y_attack_pred))

Train Accuracy: 1.0
[[4610    0]
 [   0 4583]]
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      1196
           1       0.95      0.95      0.95      1103

    accuracy                           0.95      2299
   macro avg       0.95      0.95      0.95      2299
weighted avg       0.95      0.95      0.95      2299

Test Accuracy: 0.9491083079599826
[[1136   60]
 [  57 1046]]
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      1196
           1       0.95      0.95      0.95      1103

    accuracy                           0.95      2299
   macro avg       0.95      0.95      0.95      2299
weighted avg       0.95      0.95      0.95      2299



In [82]:
feature_importances = pd.DataFrame(dtc.feature_importances_,
                                   index = X_attack_train_reduced.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)
feature_importances

Unnamed: 0,importance
self_deaths,0.297981
ally1_deaths,0.128197
self_avg_health,0.072957
ally2_deaths,0.062492
opponent1_deaths,0.057163
round_info_round_length,0.056158
ally4_deaths,0.041886
ally2_elims,0.04041
spike_planted,0.038524
opponent3_deaths,0.037859


In [83]:
# train test split
X_defend = defend_df.drop('round_info_round_won', axis=1)
y_defend = defend_df['round_info_round_won']

X_defend_train, X_defend_test, y_defend_train, y_defend_test = train_test_split(X_defend, y_defend, test_size=0.2, random_state=1)

# Scale the data
scaler = StandardScaler()
X_defend_train_scaled = scaler.fit_transform(X_train)
X_defend_test_scaled = scaler.transform(X_test)

In [84]:
# Train the model using the training sets
dtc.fit(X_defend_train, y_defend_train)

# Predict the response for test dataset
y_defend_pred = dtc.predict(X_defend_test)


y_defend_pred_train = dtc.predict(X_defend_train)

# Model Accuracy
print("Train Accuracy:", accuracy_score(y_defend_train, y_defend_pred_train))

# Confusion Matrix
print(confusion_matrix(y_defend_train, y_defend_pred_train))

# Classification Report
print(classification_report(y_defend_test, y_defend_pred))

# Model Accuracy
print("Test Accuracy:", accuracy_score(y_defend_test, y_defend_pred))

# Confusion Matrix
print(confusion_matrix(y_defend_test, y_defend_pred))

# Classification Report
print(classification_report(y_defend_test, y_defend_pred))

Train Accuracy: 1.0
[[4643    0]
 [   0 4608]]
              precision    recall  f1-score   support

           0       0.93      0.95      0.94      1158
           1       0.94      0.93      0.94      1155

    accuracy                           0.94      2313
   macro avg       0.94      0.94      0.94      2313
weighted avg       0.94      0.94      0.94      2313

Test Accuracy: 0.9386078685689581
[[1095   63]
 [  79 1076]]
              precision    recall  f1-score   support

           0       0.93      0.95      0.94      1158
           1       0.94      0.93      0.94      1155

    accuracy                           0.94      2313
   macro avg       0.94      0.94      0.94      2313
weighted avg       0.94      0.94      0.94      2313



In [85]:
feature_importances = pd.DataFrame(dtc.feature_importances_,
                                   index = X_train.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)

In [86]:
feature_importances[:50]

Unnamed: 0,importance
self_avg_health,0.299398
ally4_deaths,0.149237
ally1_deaths,0.084401
opponent0_deaths,0.060168
opponent4_deaths,0.041811
ally2_deaths,0.032105
opponent2_deaths,0.025413
opponent3_deaths,0.024067
ally3_elims,0.023506
ally3_deaths,0.02308


In [87]:
threshold = 0.01

unimportant_features = feature_importances[feature_importances['importance'] < threshold].index

In [88]:
unimportant_features

Index(['self_prec_map_covered', 'spike_planted', 'ally3_avg_health',
       'round_info_round_length', 'self_avg_ammo_reserve', 'ally2_elims',
       'ally4_avg_health', 'self_total_firing_time', 'ally2_avg_health',
       'user_id',
       ...
       'ally2_character_fade', 'ally2_character_gekko',
       'ally2_character_harbor', 'ally2_character_jett',
       'ally2_character_kay/o', 'ally2_character_killjoy',
       'ally2_character_omen', 'ally2_character_phoenix',
       'ally2_character_raze', 'ally2_character_cypher'],
      dtype='object', length=327)

In [89]:
X_defend_train_reduced = X_defend_train.drop(unimportant_features, axis=1)
X_defend_test_reduced = X_defend_test.drop(unimportant_features, axis=1)

In [90]:
# Train the model using the training sets
dtc.fit(X_defend_train_reduced, y_defend_train)

# Predict the response for test dataset
y_defend_pred = dtc.predict(X_defend_test_reduced)


y_defend_pred_train = dtc.predict(X_defend_train_reduced)

# Model Accuracy
print("Train Accuracy:", accuracy_score(y_defend_train, y_defend_pred_train))

# Confusion Matrix
print(confusion_matrix(y_defend_train, y_defend_pred_train))

# Classification Report
print(classification_report(y_defend_test, y_defend_pred))

# Model Accuracy
print("Test Accuracy:", accuracy_score(y_defend_test, y_defend_pred))

# Confusion Matrix
print(confusion_matrix(y_defend_test, y_defend_pred))

# Classification Report
print(classification_report(y_defend_test, y_defend_pred))

Train Accuracy: 0.9910279969733001
[[4623   20]
 [  63 4545]]
              precision    recall  f1-score   support

           0       0.94      0.95      0.94      1158
           1       0.95      0.94      0.94      1155

    accuracy                           0.94      2313
   macro avg       0.94      0.94      0.94      2313
weighted avg       0.94      0.94      0.94      2313

Test Accuracy: 0.9429312581063554
[[1098   60]
 [  72 1083]]
              precision    recall  f1-score   support

           0       0.94      0.95      0.94      1158
           1       0.95      0.94      0.94      1155

    accuracy                           0.94      2313
   macro avg       0.94      0.94      0.94      2313
weighted avg       0.94      0.94      0.94      2313



In [93]:
feature_importances = pd.DataFrame(dtc.feature_importances_,
                                   index = X_defend_train_reduced.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)
feature_importances

Unnamed: 0,importance
self_avg_health,0.360931
ally4_deaths,0.160256
ally1_deaths,0.093785
opponent0_deaths,0.070062
opponent4_deaths,0.053211
opponent2_deaths,0.041622
ally2_deaths,0.041576
opponent3_deaths,0.038462
ally3_elims,0.038137
opponent1_deaths,0.037896


In [95]:
downsampled_df.to_csv('balanced_onehot.csv', index=False)

In [96]:
df1 = pd.read_csv('balanced_onehot.csv')

In [97]:
df1

Unnamed: 0,user_id,round_info_round_end,round_info_round_won,round_info_round_start,ally4_elims,ally4_deaths,ally4_assists,ally4_headshots,ally4_wallbangs,ally4_first_bloods,...,self_longest_gun_primary_spectre,self_longest_gun_primary_stinger,self_longest_gun_primary_vandal,self_longest_gun_secondary_classic,self_longest_gun_secondary_frenzy,self_longest_gun_secondary_ghost,self_longest_gun_secondary_none,self_longest_gun_secondary_sheriff,self_longest_gun_secondary_shorty,round_info_round_length
0,69,169029,0,127194,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,1,0,0,0,0,0,41835
1,65,1399018,1,1357408,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,41610
2,63,1647064,0,1611891,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,35173
3,71,1724602,1,1659100,0.0,1.0,1.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,65502
4,74,1502801,1,1471811,1.0,1.0,0.0,1.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,30990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27839,63,2226164,0,2128770,1.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,97394
27840,3804,1900614,1,1841602,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,59012
27841,69,410603,1,355602,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,1,0,0,0,1,0,0,55001
27842,997,711200,0,645601,0.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,65599
