### Modules

In [1]:
import archives_manager

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

pd.set_option('display.max_rows', None)
pd.set_option('display.width', 10000)

### Functions

In [3]:
def get_player_analysis(player_name, num_games):
    recent_archive = archives_manager.get_most_recent_games(player_name, num_games, time_class='rapid', filter_func=archives_manager.build_archive_filter(rated=True, exclude_draws=True, max_elo_diff=150))

    lst = []

    for archived_game in recent_archive:
        actual = archives_manager.get_won(archived_game, player_name)
        elo = archives_manager.get_elo(archived_game, player_name)

        lst.append({
            'player_name': player_name,
            'unix': archived_game['end_time'],
            'player_elo': elo['Player'],
            'opp_elo': elo['Opponent'],
            'elo_diff': elo['Player'] - elo['Opponent'],
            'won': actual,
            
        })

    df = pd.DataFrame(lst)
    df['ma20'] = df['player_elo'].rolling(window=20).mean()
    df['ma5'] = df['player_elo'].rolling(window=5).mean()
    df['x-ma20'] = df['player_elo'] - df['ma20']
    df['x-ma5'] = df['player_elo'] - df['ma5']
    df['ma5-ma20'] = df['ma5'] - df['ma20']

    return df, recent_archive

### Data Gathering/Compiling

In [4]:
num_games_per_player = 1000

analyses = []
stats_list = []

players = [
    'BIG_TONKA_T',
    'UnderTheBeer',
    'Dandres0_0',
    'Ale9800',
    'MrOGH'
]

for player_name in players:
    df, recent_archive = get_player_analysis(player_name, num_games=num_games_per_player)
    
    player_info = {
        'Name': player_name,
        'Current Rating': archives_manager.get_elo(recent_archive[0], player_name)['Player'],
    }
    
    analyses.append({
        'player_name': player_name,
        'stats': player_info,
        'data': df,
        'archive': recent_archive
    })
    
data_list = [analysis['data'] for analysis in analyses]
df = pd.concat(data_list)
    
df = df.dropna().reset_index()

In [5]:
df.head(5)

Unnamed: 0,index,player_name,unix,player_elo,opp_elo,elo_diff,won,ma20,ma5,x-ma20,x-ma5,ma5-ma20
0,19,BIG_TONKA_T,1697192205,1264,1258,6,0,1330.4,1280.0,-66.4,-16.0,-50.4
1,20,BIG_TONKA_T,1697192679,1256,1234,22,0,1325.4,1272.2,-69.4,-16.2,-53.2
2,21,BIG_TONKA_T,1697192866,1247,1257,-10,0,1319.6,1263.8,-72.6,-16.8,-55.8
3,22,BIG_TONKA_T,1697193148,1239,1254,-15,0,1313.05,1255.6,-74.05,-16.6,-57.45
4,23,BIG_TONKA_T,1697193242,1231,1237,-6,0,1305.75,1247.4,-74.75,-16.4,-58.35


### Logistic Regression

In [6]:
X = df[['elo_diff']]  # Predictor
y = df['won']         # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model
predictions = model.predict(X_test)

print(accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

0.5384615384615384
              precision    recall  f1-score   support

           0       0.57      0.23      0.32       444
           1       0.53      0.83      0.65       466

    accuracy                           0.54       910
   macro avg       0.55      0.53      0.49       910
weighted avg       0.55      0.54      0.49       910



In [7]:
X = df[['elo_diff', 'x-ma20']]  # Predictors

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print(accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

0.5175824175824176
              precision    recall  f1-score   support

           0       0.51      0.36      0.42       444
           1       0.52      0.67      0.59       466

    accuracy                           0.52       910
   macro avg       0.52      0.51      0.50       910
weighted avg       0.52      0.52      0.51       910



In [8]:
X = df[['elo_diff', 'x-ma5']]  # Predictors

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print(accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

0.5186813186813187
              precision    recall  f1-score   support

           0       0.51      0.40      0.45       444
           1       0.53      0.63      0.57       466

    accuracy                           0.52       910
   macro avg       0.52      0.52      0.51       910
weighted avg       0.52      0.52      0.51       910



In [9]:
df['interaction'] = df['x-ma20'] * df['x-ma5']
X = df[['elo_diff', 'x-ma20', 'interaction']]  # Predictors

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print(accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

0.5131868131868131
              precision    recall  f1-score   support

           0       0.50      0.29      0.36       444
           1       0.52      0.73      0.61       466

    accuracy                           0.51       910
   macro avg       0.51      0.51      0.48       910
weighted avg       0.51      0.51      0.49       910

