# Import Libraries

In [1]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from scipy.stats import sem
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from keras.initializers import HeNormal
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from sklearn.utils import shuffle


# Connect to SQLite Database and Load Data

In [2]:
# Connect to SQLite database and load data into pandas DataFrame
conn = sqlite3.connect('valorant.sqlite')

# Load data from the 'Games' and 'Game_Scoreboard' tables
query_games = "SELECT * FROM Games"
query_scoreboard = "SELECT * FROM Game_Scoreboard"
df_games = pd.read_sql_query(query_games, conn)
df_scoreboard = pd.read_sql_query(query_scoreboard, conn)

# Close the connection
conn.close()

# Display the first few rows of both datasets
print("Games Table:")
print(df_games.head())
print("\nGame_Scoreboard Table:")
print(df_scoreboard.head())


Games Table:
  GameID MatchID     Map  Team1ID  Team2ID                Team1     Team2  \
0  60894   62393  Breeze     6903     6020  Booster Seat Gaming  Pho Real   
1  60895   62393    Bind     6903     6020  Booster Seat Gaming  Pho Real   
2  60896   62393   Haven     6903     6020  Booster Seat Gaming  Pho Real   
3  60924   62403  Icebox     7046     7047       Bjor's Kittens  Mugiwara   
4  60925   62403   Haven     7046     7047       Bjor's Kittens  Mugiwara   

                Winner  Team1_TotalRounds  Team2_TotalRounds  ...  \
0  Booster Seat Gaming                 13                  7  ...   
1             Pho Real                  2                 13  ...   
2  Booster Seat Gaming                 13                  8  ...   
3       Bjor's Kittens                 13                  6  ...   
4       Bjor's Kittens                 13                  9  ...   

  Team1_FullBuyWon Team2_PistolWon  Team2_Eco  Team2_EcoWon  Team2_SemiEco  \
0              8.0             

# Data Preprocessing

In [3]:
# Clean the data by removing rows with missing values
df_cleaned = df_games.dropna()

# Convert the 'Winner' column to binary (1 for Team1 win, 0 for Team2 win)
df_cleaned['Winner'] = df_cleaned['Winner'].apply(lambda x: 1 if x == df_cleaned['Team1'].iloc[0] else 0)

# Merge the cleaned Games table with the Game_Scoreboard table
df_cleaned = df_cleaned.merge(df_scoreboard, on='GameID')

# Display the cleaned DataFrame
print("Cleaned and Merged Data:")
print(df_cleaned.head())


Cleaned and Merged Data:
  GameID MatchID     Map  Team1ID  Team2ID                Team1     Team2  \
0  60894   62393  Breeze     6903     6020  Booster Seat Gaming  Pho Real   
1  60894   62393  Breeze     6903     6020  Booster Seat Gaming  Pho Real   
2  60894   62393  Breeze     6903     6020  Booster Seat Gaming  Pho Real   
3  60894   62393  Breeze     6903     6020  Booster Seat Gaming  Pho Real   
4  60894   62393  Breeze     6903     6020  Booster Seat Gaming  Pho Real   

   Winner  Team1_TotalRounds  Team2_TotalRounds  ... Num_4Ks Num_5Ks  OnevOne  \
0       1                 13                  7  ...     2.0     0.0      1.0   
1       1                 13                  7  ...     0.0     0.0      0.0   
2       1                 13                  7  ...     0.0     0.0      1.0   
3       1                 13                  7  ...     0.0     0.0      1.0   
4       1                 13                  7  ...     0.0     0.0      0.0   

   OnevTwo  OnevThree  On

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['Winner'] = df_cleaned['Winner'].apply(lambda x: 1 if x == df_cleaned['Team1'].iloc[0] else 0)


# Feature Selection and Target Variable

In [4]:
# Select relevant features for the model
X = df_cleaned[['ACS', 'Kills', 'Deaths', 'Assists', 'PlusMinus', 'KAST_Percent', 'ADR', 'HS_Percent',
                'FirstKills', 'FirstDeaths', 'FKFD_PlusMinus', 'Num_2Ks', 'Num_3Ks', 'Num_4Ks', 'Num_5Ks']]

# The target variable is the 'Winner' column
y = df_cleaned['Winner']

# Display the selected features and the target variable
print("Feature Matrix (X):")
print(X.head())
print("\nTarget Variable (y):")
print(y.head())


Feature Matrix (X):
     ACS  Kills  Deaths  Assists  PlusMinus  KAST_Percent    ADR  HS_Percent  \
0  313.0   24.0    10.0      3.0       14.0          0.65  195.0        0.31   
1  227.0   16.0    10.0      7.0        6.0          0.90  161.0        0.16   
2  226.0   17.0     9.0      8.0        8.0          0.85  148.0        0.27   
3  218.0   17.0    12.0      2.0        5.0          0.70  141.0        0.19   
4   80.0    5.0    13.0      3.0       -8.0          0.70   55.0        0.22   

   FirstKills  FirstDeaths  FKFD_PlusMinus  Num_2Ks  Num_3Ks  Num_4Ks  Num_5Ks  
0         4.0          4.0             0.0      2.0      2.0      2.0      0.0  
1         1.0          1.0             0.0      3.0      1.0      0.0      0.0  
2         3.0          0.0             3.0      1.0      2.0      0.0      0.0  
3         3.0          0.0             3.0      3.0      1.0      0.0      0.0  
4         3.0          1.0             2.0      1.0      0.0      0.0      0.0  

Target Varia

# Handle Missing Data

In [5]:
# Impute missing values with the mean of each column
imputer = SimpleImputer(strategy="mean")
X_imputed = imputer.fit_transform(X)

# Shuffle the data
X_imputed, y = shuffle(X_imputed, y, random_state=42)

# Display the first few rows of the imputed feature matrix
print("Imputed Feature Matrix (X_imputed):")
print(X_imputed[:5])


Imputed Feature Matrix (X_imputed):
[[ 2.98000000e+02  2.00000000e+01  1.10000000e+01  4.00000000e+00
   9.00000000e+00  6.97199287e-01  1.91000000e+02  3.40000000e-01
   6.00000000e+00  1.00000000e+00  5.00000000e+00  3.00000000e+00
   0.00000000e+00  1.00000000e+00  0.00000000e+00]
 [ 1.96000000e+02  1.40000000e+01  1.40000000e+01  2.00000000e+00
   0.00000000e+00  6.97199287e-01  1.39000000e+02  1.40000000e-01
   2.00000000e+00  3.00000000e+00 -1.00000000e+00  3.00000000e+00
   1.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 1.94000000e+02  1.50000000e+01  1.40000000e+01  4.00000000e+00
   1.00000000e+00  6.97199287e-01  1.43000000e+02  2.80000000e-01
   1.00000000e+00  2.00000000e+00 -1.00000000e+00  4.00000000e+00
   1.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 3.00000000e+02  2.40000000e+01  1.80000000e+01  1.00000000e+01
   6.00000000e+00  6.97199287e-01  2.07000000e+02  2.20000000e-01
   5.00000000e+00  3.00000000e+00  2.00000000e+00  5.00000000e+00
   2.00000000e+00  

# Standardize the Features

In [6]:
# Standardize the features (mean=0, variance=1)
scaler = StandardScaler()
X_processed = scaler.fit_transform(X_imputed)

# Display the standardized feature matrix
print("Standardized Feature Matrix (X_processed):")
print(X_processed[:5])


Standardized Feature Matrix (X_processed):
[[ 1.52073439e+00  1.00017938e+00 -9.03400618e-01 -3.89000817e-01
   1.42414841e+00  2.99926918e-11  1.52023229e+00  1.15948736e+00
   2.26139133e+00 -6.51672922e-01  2.33896814e+00  2.38833356e-01
  -8.60774273e-01  1.98134166e+00 -1.42671856e-01]
 [-1.07815447e-01 -9.05643186e-02 -1.29824668e-01 -1.03512991e+00
   8.37055596e-04  2.99926918e-11  2.09166270e-01 -1.04984261e+00
  -3.04780593e-02  5.87653154e-01 -4.68656378e-01  2.38833356e-01
   2.07755144e-01 -4.03843116e-01 -1.42671856e-01]
 [-1.39747797e-01  9.12262971e-02 -1.29824668e-01 -3.89000817e-01
   1.58982761e-01  2.99926918e-11  3.10017502e-01  4.96688370e-01
  -6.03445407e-01 -3.20098840e-02 -4.68656378e-01  8.38459426e-01
   2.07755144e-01 -4.03843116e-01 -1.42671856e-01]
 [ 1.55266674e+00  1.72734184e+00  9.01609933e-01  1.54938646e+00
   9.49711290e-01  2.99926918e-11  1.92363722e+00 -1.66110623e-01
   1.68842399e+00  5.87653154e-01  9.35155881e-01  1.43808550e+00
   1.2762845

# Define the DNN Model

In [7]:
# Define the DNN model architecture
def build_dnn_model(input_dim):
    initializer = HeNormal()
    model = Sequential(
        [
            Dense(128, activation="elu", kernel_initializer=initializer, input_shape=(input_dim,)),
            Dropout(0.1),
            BatchNormalization(),
            Dense(64, activation="elu", kernel_initializer=initializer),
            Dropout(0.1),
            BatchNormalization(),
            Dense(32, activation="elu", kernel_initializer=initializer),
            Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

# Display the DNN model summary
model = build_dnn_model(X_processed.shape[1])
model.summary()
model.save('model.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




 # Evaluate the DNN Model using K-Fold Cross-Validation

In [8]:
# Evaluate DNN with K-Fold Cross-Validation
def evaluate_with_kfold(X, y, model_builder, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=True)
    accuracy_scores = []
    
    for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]

        model = model_builder(input_dim=X.shape[1])
        model.fit(x_train_fold, y_train_fold, epochs=5, batch_size=512, verbose=0)

        y_pred_fold = model.predict(x_test_fold)
        y_pred_fold = np.round(y_pred_fold)
        score = accuracy_score(y_test_fold, y_pred_fold)
        accuracy_scores.append(score * 100)

    return accuracy_scores

# Evaluate the DNN model
dnn_accuracy = evaluate_with_kfold(X_processed, y, build_dnn_model, n_splits=5)
print("DNN Model Accuracy Scores:", dnn_accuracy)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 629us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 646us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 653us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 661us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 649us/step
DNN Model Accuracy Scores: [99.92554739585096, 99.93231581440996, 99.91201055873296, 99.91539476801246, 99.94246649519425]


# Evaluate the Gradient Boosting Model

In [9]:
# Evaluate Gradient Boosting Classifier
def evaluate_gboost_with_kfold(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=True)
    accuracy_scores = []
    
    for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]

        gbc = GradientBoostingClassifier(n_estimators=100, random_state=42)
        gbc.fit(x_train_fold, y_train_fold)

        y_pred_fold = gbc.predict(x_test_fold)
        score = accuracy_score(y_test_fold, y_pred_fold)
        accuracy_scores.append(score * 100)

    return accuracy_scores

# Evaluate the Gradient Boosting model
gboost_accuracy = evaluate_gboost_with_kfold(X_processed, y, n_splits=5)
print("Gradient Boosting Model Accuracy Scores:", gboost_accuracy)


Gradient Boosting Model Accuracy Scores: [99.91539476801246, 99.91539476801246, 99.90185793089445, 99.90862634945346, 99.91200758088534]


# Print Statistics

In [10]:
# Print model statistics (maximum, minimum, overall accuracy, etc.)
def print_statistics(accuracy_scores, model_name):
    print(f"\n{model_name} Model:")
    print(f"List of Accuracy Scores: {accuracy_scores}")
    print(f"Maximum Accuracy: {max(accuracy_scores):.2f}%")
    print(f"Minimum Accuracy: {min(accuracy_scores):.2f}%")
    print(f"Overall Accuracy: {np.mean(accuracy_scores):.2f}%")
    print(f"Standard Deviation: {np.std(accuracy_scores):.2f}%")
    print(f"Standard Error: {sem(accuracy_scores):.2f}%")

# Print DNN statistics
print_statistics(dnn_accuracy, "DNN")

# Print Gradient Boosting statistics
print_statistics(gboost_accuracy, "Gradient Boosting")



DNN Model:
List of Accuracy Scores: [99.92554739585096, 99.93231581440996, 99.91201055873296, 99.91539476801246, 99.94246649519425]
Maximum Accuracy: 99.94%
Minimum Accuracy: 99.91%
Overall Accuracy: 99.93%
Standard Deviation: 0.01%
Standard Error: 0.01%

Gradient Boosting Model:
List of Accuracy Scores: [99.91539476801246, 99.91539476801246, 99.90185793089445, 99.90862634945346, 99.91200758088534]
Maximum Accuracy: 99.92%
Minimum Accuracy: 99.90%
Overall Accuracy: 99.91%
Standard Deviation: 0.01%
Standard Error: 0.00%


# Retrieve Player Stats for Each Team

In [11]:
import random
import pandas as pd

# Function to retrieve and aggregate player stats for a given team based on player IDs
def get_team_stats(player_ids, df_scoreboard):
    # Filter the scoreboard for the given player IDs
    team_stats = df_scoreboard[df_scoreboard['PlayerID'].isin(player_ids)]
    
    # Aggregate the statistics by averaging the player's stats (ACS, Kills, Deaths, etc.)
    team_stats_mean = team_stats[['ACS', 'Kills', 'Deaths', 'Assists', 'PlusMinus', 
                                  'KAST_Percent', 'ADR', 'HS_Percent', 'FirstKills', 
                                  'FirstDeaths', 'FKFD_PlusMinus', 'Num_2Ks', 
                                  'Num_3Ks', 'Num_4Ks', 'Num_5Ks']].mean()
    
    return team_stats_mean

# Assuming df_scoreboard is already loaded as a DataFrame
# Retrieve unique player IDs from the scoreboard
unique_player_ids = df_scoreboard['PlayerID'].unique()

# Randomly select 5 player IDs for Team 1 and Team 2
team1_player_ids = random.sample(list(unique_player_ids), 5)
team2_player_ids = random.sample(list(set(unique_player_ids) - set(team1_player_ids)), 5)

# Display the selected player IDs
print(f"Team 1 Player IDs: {team1_player_ids}")
print(f"Team 2 Player IDs: {team2_player_ids}")

# Get stats for both teams using the randomly selected player IDs
team1_stats = get_team_stats(team1_player_ids, df_scoreboard)
team2_stats = get_team_stats(team2_player_ids, df_scoreboard)

# Display the aggregated stats for both teams
print("Team 1 Stats:")
print(team1_stats)
print("\nTeam 2 Stats:")
print(team2_stats)


Team 1 Player IDs: ['3145', '2083', '8669', '14799', '19697']
Team 2 Player IDs: ['8179', '2124', '450', '14991', '7119']
Team 1 Stats:
ACS               205.513514
Kills              14.864865
Deaths             14.702703
Assists             5.810811
PlusMinus           0.162162
KAST_Percent             NaN
ADR               137.055556
HS_Percent          0.226944
FirstKills          1.216216
FirstDeaths         1.388889
FKFD_PlusMinus     -0.194444
Num_2Ks             3.083333
Num_3Ks             0.555556
Num_4Ks             0.222222
Num_5Ks             0.027778
dtype: float64

Team 2 Stats:
ACS               176.019608
Kills              12.607843
Deaths             13.745098
Assists             5.686275
PlusMinus          -1.137255
KAST_Percent        0.605000
ADR               124.634146
HS_Percent          0.186750
FirstKills          1.313725
FirstDeaths         1.325000
FKFD_PlusMinus     -0.125000
Num_2Ks             2.200000
Num_3Ks             0.700000
Num_4Ks             0.

# Prepare the Input for Prediction

In [12]:
# Concatenate the statistics for both teams into a single feature vector
match_features = pd.concat([team1_stats, team2_stats], axis=0)  # This still gives 30 features

# Instead of using both team's stats, you can calculate differences or means
# For example, let's create a feature vector based on the difference of means
match_features = (team1_stats - team2_stats).values.reshape(1, -1)

# Ensure match_features has the same length as the original model's input (15 features)
print("Match Feature Vector (Differences):")
print(match_features)

# Standardize the features using the same scaler used for training
match_features_scaled = scaler.transform(match_features)

# Display the final feature matrix
print("Match Feature Vector (Standardized):")
print(match_features_scaled)


Match Feature Vector (Differences):
[[ 2.94939057e+01  2.25702173e+00  9.57604663e-01  1.24536301e-01
   1.29941706e+00             nan  1.24214092e+01  4.01944444e-02
  -9.75092740e-02  6.38888889e-02 -6.94444444e-02  8.83333333e-01
  -1.44444444e-01  9.72222222e-02  2.77777778e-03]]
Match Feature Vector (Standardized):
[[-2.76628087 -2.22532757 -3.49291913 -1.64102574  0.20633428         nan
  -2.98223544 -2.15235964 -1.23228239 -1.23174638 -0.03321461 -1.03037516
  -1.01511741 -0.17195015 -0.12335396]]


# Predict the Winner Using DNN Model

In [13]:
# Predict the winner using the trained DNN model
dnn_winner_prob = model.predict(match_features_scaled)
dnn_winner = np.round(dnn_winner_prob).astype(int)

# Print the DNN model's prediction
print(f"DNN Prediction: {'Team 1 wins' if dnn_winner == 1 else 'Team 2 wins'}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
DNN Prediction: Team 2 wins
