# CS179 Group Project
### Due: Wed, June 12 2024 (11:59 PM)

In [None]:
import matplotlib.pyplot as plt

import pyGMs as gm
import numpy as np
import torch
import random
import pandas as pd

import requests                                      # reading data
from io import StringIO
import time

import pyro
import pyro.infer
import pyro.optim
import pyro.distributions as dist
import torch.distributions.constraints as constraints
import pyro.poutine as poutine

from IPython.display import display, clear_output  # for iterative plotting

seed = 123
random.seed(seed)
pyro.set_rng_seed(seed)

### Data Processing
---

In [None]:
# READ DATA
file_path = '.\\2023_LoL_esports_match_data_from_OraclesElixir.csv'
data = pd.read_csv(file_path, low_memory=False)

In [None]:
# DATA ANALYSIS

# Completeness Information
completeness_categories = data['datacompleteness'].unique()
completeness_values = []
for completeness in completeness_categories:
    completeness_values.append(data[data['datacompleteness'] == completeness].shape[0])
    
plt.figure()
plt.bar(completeness_categories, completeness_values)
plt.xlabel("Completeness")
plt.ylabel("Frequency")
plt.title("Data Completeness")
plt.show()

# League Information
league_categories = data['league'].unique()
league_values = []
for league in league_categories:
    league_values.append(data[(data['league'] == league) & (data['datacompleteness'] == 'complete')].shape[0])

top_5_freq_leagues = sorted(zip(league_categories, league_values), key=lambda x: x[1], reverse=True)[:5]

league_categories = [league for league, _ in top_5_freq_leagues]
league_values = [count for _, count in top_5_freq_leagues]

plt.figure()
plt.bar(league_categories, league_values)
plt.xlabel("League")
plt.ylabel("Frequency")
plt.title("League Information")
plt.show()


# Position Information
position_categories = data['position'].unique()
position_values = []
for position in position_categories:
    position_values.append(data[(data['position'] == position) & (data['datacompleteness'] == 'complete') & (data['league'] == 'NACL')].shape[0])
    
plt.figure()
plt.bar(position_categories, position_values)
plt.xlabel("Position")
plt.ylabel("Frequency")
plt.title("Position Information")
plt.show()

In [None]:
# TEAM MATCHES (not used yet)
team_data = data[(data['datacompleteness'] == 'complete') & 
                     (data['league'] == 'LCK') & 
                     (data['position'] == 'team')]

teams = team_data['teamname'].value_counts()

game_results = []
game_ids = team_data['gameid'].unique()

for gameid in game_ids:
    game = team_data[team_data['gameid'] == gameid]
    team1 = game[game['result'] == 1]['teamname'].values[0]
    team2 = game[game['result'] == 0]['teamname'].values[0]
    # Team 1 beats Team 2
    game_results.append((team1, team2))

In [None]:
# PLAYER DATA CALCULATIONS 

def normalize_by_position(df, column, role):
    role_data = df[df['position'] == role]
    mean = role_data[column].mean()
    std = role_data[column].std()
    df.loc[df['position'] == role, column] = (df[df['position'] == role][column] - mean) / std


player_data = data[(data['datacompleteness'] == 'complete') & 
                   (data['league'] == 'LCK') & 
                   (data['position'].isin(['top', 'jng', 'mid', 'bot', 'sup']))]


relevant_columns = ['gameid', 'playername', 'teamname', 'position', 'xpdiffat15', 'golddiffat15', 'damageshare', 'result']
missing_values = player_data[relevant_columns].isnull().sum()
player_data = player_data[relevant_columns].dropna()


roles = player_data['position'].unique()
metrics = ['xpdiffat15', 'golddiffat15', 'damageshare']

for position in roles:
    for metric in metrics:
        normalize_by_position(player_data, metric, position)

X = torch.tensor(player_data[['xpdiffat15', 'golddiffat15', 'damageshare']].values, dtype=torch.float)

In [None]:
# MODEL
pyro.get_param_store().clear()

def model(xpdiffat15, golddiffat15, damageshare):
    with pyro.plate("data", len(xpdiffat15)):
        skill = pyro.sample('skill', dist.Normal(0, 1))
        pyro.sample('xpdiffat15', dist.Normal(skill, 1), obs=xpdiffat15)
        pyro.sample('golddiffat15', dist.Normal(skill, 1), obs=golddiffat15)
        pyro.sample('damageshare', dist.Normal(skill, 0.5), obs=damageshare)

def guide(xpdiffat15, golddiffat15, damageshare):
    skill_mean = pyro.param('skill_mean', torch.tensor(0.0))
    skill_std = pyro.param('skill_std', torch.tensor(1.0), constraint=dist.constraints.positive)
    with pyro.plate("data", len(xpdiffat15)):
        pyro.sample('skill', dist.Normal(skill_mean, skill_std))

In [None]:
# TRAINING
optimizer = pyro.optim.Adam({'lr': 0.01})
svi = pyro.infer.SVI(model, guide, optimizer, loss=pyro.infer.Trace_ELBO())

losses = []
start_time = time.time()

# Train loop
num_steps = 10000
for step in range(num_steps):
    loss = svi.step(X[:, 0], X[:, 1], X[:, 2])
    if step % 100 == 0:
        losses.append(loss)
        clear_output(wait=True)
        plt.figure()
        plt.plot(losses)
        plt.xlabel("Step Count (In 100's)")
        plt.ylabel("Loss")
        plt.title("Loss Over Steps")
        plt.show()

end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")


In [None]:
# ANALYSIS
skill_mean = pyro.param('skill_mean').item()
skill_std = pyro.param('skill_std').item()

print(f"Estimated skill: Mean = {skill_mean}, Std = {skill_std}")

In [None]:
# INDIVIDUAL PLAYER SKILL ANALYSIS
player_skills = []
for i in range(len(player_data)):
    xpdiffat15 = torch.tensor([X[i, 0]])
    golddiffat15 = torch.tensor([X[i, 1]])
    damageshare = torch.tensor([X[i, 2]])
    with torch.no_grad():
        guide_trace = pyro.poutine.trace(guide).get_trace(xpdiffat15, golddiffat15, damageshare)
        skill = guide_trace.nodes['skill']['value'].item()
        player_skills.append(skill)

player_data['estimated_skill'] = player_skills

aggregated_skills = player_data.groupby('playername').agg(
    estimated_skill=('estimated_skill', 'mean'),
    num_games=('playername', 'count'),
    wins=('result', lambda x: (x == 1).sum()),
    teamname = ('teamname', lambda x: ', '.join(x.unique())),
    xp_diff = ('xpdiffat15', 'mean'),
    good_diff = ('golddiffat15', 'mean'),
    damage_share = ('damageshare', 'mean')
).reset_index()

sorted_players = aggregated_skills.sort_values(by='estimated_skill', ascending=False)

print(sorted_players.to_string(index=False))

In [None]:
# Histogram (Just drawing based on the estimated skill mean and standard deviation, not the actual data)
skill_distribution = torch.normal(mean=skill_mean, std=skill_std, size=(10000,))
plt.hist(skill_distribution.numpy(), bins=50, label='Estimated Skill')
plt.xlabel('Skill')
plt.ylabel('Frequency')
plt.title('Estimated Skill')
plt.autoscale(enable=True, axis='x', tight=True)
plt.show()