In [338]:
from collections import defaultdict as ddict
from typing import DefaultDict, Set, List, Tuple, Dict
import itertools
import os

import freeman as fm
from redes import constraint

import pandas as pd
import numpy as np
import networkx as nx
import statsmodels.api as sm

In [339]:
bruno = pd.read_csv(
    "csv/players.csv",
     usecols=("date", "player_name", "team", "player_id", "match_id", "rating"),
)

In [340]:
bruno.loc[:, "date"] = pd.to_datetime(bruno["date"], format="%Y-%m-%d")
bruno.loc[:, "player_name"] = bruno["player_name"].apply(
    lambda x: str(x).encode('ascii', 'backslashreplace').decode('ascii')
)

In [341]:
cutoff_date = "2017-06-01"
if cutoff_date:
    bruno = bruno[bruno["date"] >= cutoff_date]

In [342]:
bruno.cabeca = bruno.head
print(bruno.shape)
bruno.cabeca()

(266769, 6)


Unnamed: 0,date,player_name,team,player_id,match_id,rating
0,2020-02-26,Brehze,Evil Geniuses,9136,2339385,1.04
1,2020-02-26,CeRq,Evil Geniuses,11219,2339385,0.98
2,2020-02-26,EliGE,Liquid,8738,2339385,1.08
3,2020-02-26,Ethan,Evil Geniuses,10671,2339385,0.83
4,2020-02-26,NAF,Liquid,8520,2339385,1.08


In [343]:
upid_rating = bruno.groupby(["player_id", "player_name"])["rating"]
# median = upid_rating.median()
# median.name = "median"

mean = upid_rating.mean()
mean.name = "mean"

# mdiff = median - mean
# mdiff.name = "mdiff"

# var = upid_rating.var().fillna(upid_rating.last())
# var.name = "var"

helionp = pd.concat([mean], axis=1)
helionp.ceara = helionp.head
helionp.ceara()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean
player_id,player_name,Unnamed: 2_level_1
7,Friis,1.085161
11,Vertigo,0.925
13,RashiE,1.294444
15,m1kkis,0.903333
17,Zyppe,1.019643


In [344]:
mdate = max(bruno["date"])

In [345]:
teams: DefaultDict[str, Set[str]] = ddict(set)
# teams_date: Dict[Tuple[str, str], str] = {}
player_date = ddict(list)

def create_team(x):
    global teams
    xp = x["player_id"]
    teams[x.name[0]].update(xp.values)
    for player1 in xp:
        for player2 in xp:
            if player1 != player2:
                player_date[(player1, player2)].append(x["date"].max())
#     player_date[x.name[0]].append(x["date"])

bruno.groupby(["team", "match_id"]).apply(
    lambda x: create_team(x)
);

In [346]:
if not os.path.isdir("gml"):
    os.mkdir("gml")

In [347]:
def cached_write(path: str, func, args: list=[]):
    if os.path.isfile(path):
        return
    data = func(*args)
    with open(path, "w+") as f:
        f.write(data)

In [348]:
def create_team_gml(helionp, teams) -> str:
    global player_date
    global mdate
    gml = "graph [\n    directed 0\n"
    for (p_id, p_name) in helionp.index:
        gml += "    node [\n"
        gml += f"        id {p_id}\n"
        gml += f"        label \"{p_name}\"\n"
        gml += "    ]\n"

    edges = set()
    for team in teams:
        for edge in itertools.combinations(teams[team], 2):
            edge = tuple(sorted(edge)) # Probably not necessary
            if edge not in edges:
                edges.add(edge)
                
                e01 = (edge[0], edge[1])
                ds = player_date[e01]
                if len(ds) == 0:
#                     print("E", e01)
                    continue
#                 print("OK", e01)
                
                d = (mdate - max(ds)).asm8.astype(int)/10**9
                d = int(d)
#                 if d != 0:
#                     d = 1/d                    

                gml += "    edge [\n"
                gml += f"        source {edge[0]}\n"
                gml += f"        target {edge[1]}\n"
                gml += f"        distance {d}\n"
                gml += "    ]\n"
    gml += "]\n"
    return gml

In [349]:
if cutoff_date:
    sm_filename = "same_team_" + cutoff_date.replace("-","_") + ".gml"
else:
    sm_filename = "same_team.gml"
cached_write(f"gml/{sm_filename}", create_team_gml, [helionp, teams])

In [350]:
gsame = fm.load(f"gml/{sm_filename}")

In [351]:
# gagainst.set_all_nodes(size=10, labpos='hover')
# gagainst.set_all_edges(color=(0, 0, 0, 0.25))
# # g.move("kamada_kawai")
# gagainst.draw()

In [352]:
cc = nx.closeness_centrality(gsame, distance='distance')

In [353]:
helionp.loc[:, "prox"] = pd.DataFrame.from_records(list(cc.items()))[1].values

In [354]:
helionp.ceara()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,prox
player_id,player_name,Unnamed: 2_level_1,Unnamed: 3_level_1
7,Friis,1.085161,7.990833e-09
11,Vertigo,0.925,5.733023e-09
13,RashiE,1.294444,7.394917e-09
15,m1kkis,0.903333,4.52067e-09
17,Zyppe,1.019643,5.797083e-09


In [355]:
# x = sm.add_constant(helionp[['ties', "challengers"]])
x = helionp[['prox']]
y = helionp['mean']
model = sm.OLS(y, x) # , missing='drop')
result = model.fit()
print(cutoff_date)
print(result.pvalues)
result.summary()

2017-06-01
prox    0.0
dtype: float64


0,1,2,3
Dep. Variable:,mean,R-squared (uncentered):,0.797
Model:,OLS,Adj. R-squared (uncentered):,0.797
Method:,Least Squares,F-statistic:,36250.0
Date:,"Sat, 12 Dec 2020",Prob (F-statistic):,0.0
Time:,01:08:34,Log-Likelihood:,-5032.6
No. Observations:,9242,AIC:,10070.0
Df Residuals:,9241,BIC:,10070.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
prox,1.384e+08,7.27e+05,190.390,0.000,1.37e+08,1.4e+08

0,1,2,3
Omnibus:,689.703,Durbin-Watson:,0.807
Prob(Omnibus):,0.0,Jarque-Bera (JB):,851.468
Skew:,0.734,Prob(JB):,1.2800000000000002e-185
Kurtosis:,3.242,Cond. No.,1.0


In [316]:
helionp.to_csv("helionp_" + cutoff_date.replace("-","_") + ".csv")

In [281]:
def create_against(bruno) -> List[Tuple[Tuple[str], Tuple[str]]]:
    against = []
    def f(x: pd.Series):
        try:
            t1, t2 = x["team"].unique()
        except ValueError:
            return
        for i in range(len(x["player_id"][x["team"] == t1].values)):
            for j in range(len(x["player_id"][x["team"] == t2].values)):
                against.append(
                    (
                        x["player_id"][x["team"] == t1].values[i],
                        x["player_id"][x["team"] == t2].values[j]
                    )
                )

    bruno.groupby(["match_id"])[["team", "player_id"]].apply(f);
    return against

In [None]:
def create_against_gml(helionp, bruno) -> str:
    gml = "graph [\n    directed 0\n"
    for (p_id, p_name) in helionp.index:
        gml += "    node [\n"
        gml += f"        id {p_id}\n"
        gml += f"        label \"{p_name}\"\n"
        gml += "    ]\n"

    against = create_against(bruno)

    edges = set()
    for ag in against:
        for edge in itertools.combinations(ag, 2):
            edge = tuple(sorted(edge)) # Probably not necessary
            if edge not in edges:
                edges.add(edge)

                gml += "    edge [\n"
                gml += f"        source {edge[0]}\n"
                gml += f"        target {edge[1]}\n"
                gml += "    ]\n"
    gml += "]\n"
    return gml

In [None]:
if cutoff_date:
    ag_filename = "against_" + cutoff_date.replace("-","_") + ".gml"
else:
    ag_filename = "against.gml"

cached_write(f"gml/{ag_filename}", create_against_gml, [helionp, bruno])

In [None]:
gagainst = fm.load(f"gml/{ag_filename}")

In [None]:
dc = nx.degree_centrality(gagainst)
helionp.loc[:, "challengers"] = pd.DataFrame.from_records(list(dc.items()))[1].values