### Model Training

#### Imports

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the processed data
df = pd.read_csv('../../data/processed/tft_match_data_with_features.csv')

In [None]:
unit_tier_columns = [col for col in df.columns if col.endswith('_tier')]
unit_rarity_columns = [col for col in df.columns if col.endswith('_rarity')]
trait_columns = [col for col in df.columns if col.endswith('_style')]

# Select features and target
X = df.drop(columns=['match_id', 'puuid', 'augments', 'activated_traits_list', 'units_on_board'] + unit_tier_columns + unit_rarity_columns + trait_columns)
y = df['placement']

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize and train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# Feature importance
feature_importance = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(12, len(X.columns)/4))
sns.barplot(x=feature_importance.values, y=feature_importance.index)
plt.title('Feature Importance')
plt.show()

In [None]:
# Separate columns into different categories
value_columns = [col for col in X.columns if col.endswith('_value')]
activated_trait_columns = [col for col in X.columns if col.endswith('_style_activated')]
augment_columns = df.columns[df.columns.str.contains('Augment')].tolist()
other_columns = ['total_damage_to_players', 'last_round', 'time_eliminated', 'total_unit_values', 'avg_unit_tiers', 'avg_unit_rarities', 'gold_left', 'num_activated_traits', 'level']

# Function to plot feature importance
def plot_feature_importance(columns, title):
    feature_importance_subset = feature_importance[columns].sort_values(ascending=False)

    feature_importance_subset.index = [index.replace('_style_activated', '').replace('_value', '').replace('TFT', '') for index in feature_importance_subset.index] 

    plt.figure(figsize=(12, len(columns)/4))
    sns.barplot(x=feature_importance_subset.values, y=feature_importance_subset.index)
    plt.title(f'Feature Importance: {title}')
    plt.show()

# Plot feature importance for each category
plot_feature_importance(value_columns, 'Units\' Values')
plot_feature_importance(activated_trait_columns, 'Traits')
plot_feature_importance(augment_columns, 'Augments')
plot_feature_importance(other_columns, 'Other Variables')

#### Correlation Analysis

In [None]:
# Calculate correlations between features and placement
correlations = df.drop(columns=['match_id', 'puuid', 'augments', 'activated_traits_list', 'units_on_board'] + unit_tier_columns + unit_rarity_columns + trait_columns).corr()['placement'].sort_values()

correlations.index = [index.replace('_style_activated', '').replace('_value', '').replace('TFT', '') for index in correlations.index] 


# Display the top features negatively correlated with placement
print("Top features negatively correlated with placement (contributing to better performance):")
print(correlations.head(10))

# Display the top features positively correlated with placement
print("\nTop features positively correlated with placement (contributing to worse performance):")
print(correlations.tail(10))

# Plot the correlations
plt.figure(figsize=(12, len(correlations)/4))
sns.barplot(x=correlations.values, y=correlations.index)
plt.title('Correlation with Placement')
plt.xlabel('Correlation Coefficient')
plt.ylabel('Features')
plt.show()

In [None]:
# Example function to make predictions given a board configuration
def predict_placement(model, board_config, feature_columns):
    # Create a DataFrame with a single row representing the board configuration
    input_df = pd.DataFrame([board_config])
    
    # Identify missing columns
    missing_cols = list(set(feature_columns) - set(input_df.columns))
    
    # Create a DataFrame with missing columns set to 0
    missing_df = pd.DataFrame(0, index=input_df.index, columns=missing_cols)
    
    # Concatenate the input DataFrame with the missing columns DataFrame
    input_df = pd.concat([input_df, missing_df], axis=1)
    
    # Ensure the columns match the training data
    input_df = input_df[feature_columns]
    
    # Make the prediction
    prediction = model.predict(input_df)
    return prediction[0]

# Example board configuration (update this with real values)
example_board = {
    'total_damage_to_players': 44,
    'time_eliminated': 1743.713134765625,
    'gold_left': 1,
    'last_round': 28,
    'level': 8,
    'num_activated_traits': 3,
    'total_unit_values': 1000,
    'avg_unit_tiers': 2.5,
    'avg_unit_rarities': 3.5,
    'TFT9_Augment_WhatDoesntKillYou': 1,
    'TFT9_Augment_BigGrabBag': 1,
    'TFT9_Augment_JeweledLotus': 1,

    # ... add more features based on your model
}

feature_columns = X.columns.tolist()

# Predict the placement
predicted_placement = predict_placement(model, example_board, feature_columns)
print(f"Predicted Placement: {predicted_placement}")