# 0. Imports and Useful Values

In [1]:
import tftpredict
import os
import pickle
from collections import defaultdict
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


# Some useful global parameters
rng_seed = 42
api_key = "RGAPI-ba2fb131-9dfa-424f-b69f-44af41b5bc6f"
data_out = "Set10" # This is where all of your data will be stored

# 1a. Data Collection: High Rank IDs

In [None]:
%%time
puuids = list(set(tftpredict.get_player_puuids('challenger', api_key) + tftpredict.get_player_puuids('grandmaster')))# + tftpredict.get_player_puuids('master')))
print(len(puuids))

# 1b. Data Collection: Match IDs

In [None]:
%%time
match_ids = tftpredict.get_match_ids(puuids, 1701388800, api_key) # 1701388800 is Dec 01 2023; the goal was to give some time between set launch
print(len(match_ids))

# 1c. Data Collection: Match Data

In [None]:
%%time
tftpredict.get_match_data(match_ids, data_out, api_key)

# 1d. Data Collection: Data Compilation

In [4]:
data = tftpredict.compile_data(data_out)

# 2. Feature Engineering

In [5]:
def get_match_vector(p1, p2, q_features = True):
    match_vector = defaultdict(float)

    # Level
    match_vector["p1_level"] = p1['level']
    match_vector["p2_level"] = p2['level']

    # Traits: # units and tier
    for trait in p1['traits']:
        match_vector[f"p1_{trait['name']}_units"] = trait['num_units']
        match_vector[f"p1_{trait['name']}_tier"] = trait['tier_current']
        if q_features:
            match_vector[f"p1_{trait['name']}_tier_q"] = trait['tier_current'] ** 2
    for trait in p2['traits']:
        match_vector[f"p2_{trait['name']}_units"] = trait['num_units']
        match_vector[f"p2_{trait['name']}_tier"] = trait['tier_current']
        if q_features:
            match_vector[f"p2_{trait['name']}_tier_q"] = trait['tier_current'] ** 2

    # Units: Binary feature, tier, and item OHE
    for unit in p1['units']:
        match_vector[f"p1_{unit['character_id']}"] = 1
        match_vector[f"p1_{unit['character_id']}_tier"] = unit['tier']
        match_vector[f"p1_{unit['character_id']}_items"] = len(unit['itemNames'])
        if q_features:
            match_vector[f"p1_{unit['character_id']}_tier_q"] = unit['tier'] ** 2
            match_vector[f"p1_{unit['character_id']}_items_q"] = len(unit['itemNames']) ** 2
        for item in unit['itemNames']:
            match_vector[f"p1_{unit['character_id']}_{item}"] += 1
    for unit in p2['units']:
        match_vector[f"p2_{unit['character_id']}"] = 1
        match_vector[f"p2_{unit['character_id']}_tier"] = unit['tier']
        match_vector[f"p2_{unit['character_id']}_items"] = len(unit['itemNames'])
        if q_features:
            match_vector[f"p2_{unit['character_id']}_tier_q"] = unit['tier'] ** 2
            match_vector[f"p2_{unit['character_id']}_items_q"] = len(unit['itemNames']) ** 2
        for item in unit['itemNames']:
            match_vector[f"p2_{unit['character_id']}_{item}"] += 1

    # Augments: OHE
    for augment in p1['augments']:
        match_vector[f"p1_{augment}"] = 1
    for augment in p2['augments']:
        match_vector[f"p2_{augment}"] = 1

    return pd.Series(match_vector)


In [6]:
#y, X = tftpredict.fit_features(data, get_match_vector, data_out, 'linear_features')
y, X = tftpredict.fit_features(data, get_match_vector, data_out, 'quadratic_features')

# 3. Model Building - Preprocessing

In [7]:
X = X.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = rng_seed)

# 3. Model Building - Logistic Regression

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Create a Logistic Regression model
logreg_model = LogisticRegression()

# Train the model
logreg_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = logreg_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# Display confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print('Confusion Matrix:')
print(conf_matrix)

print('\nClassification Report:')
print(class_report)

Accuracy: 0.7720
Confusion Matrix:
[[2347  702]
 [ 662 2272]]

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.77      0.77      3049
           1       0.76      0.77      0.77      2934

    accuracy                           0.77      5983
   macro avg       0.77      0.77      0.77      5983
weighted avg       0.77      0.77      0.77      5983



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load a dataset (e.g., the famous Iris dataset)
X = X.values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVC model
svc_model = SVC(kernel='linear', C=1)

# Train the model
svc_model.fit(X_train, y_train)

# Make predictions
y_pred = svc_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
