In [1]:
from collections import defaultdict as ddict
from typing import DefaultDict, Set
import itertools
import os

import freeman as fm
from redes import constraint

import pandas as pd
import numpy as np
import networkx as nx
import statsmodels.api as sm

In [2]:
bruno = pd.read_csv(
    "csv/players.csv",
     usecols=("date", "player_name", "team", "player_id", "match_id", "rating"),
)

In [3]:
bruno.loc[:, "date"] = pd.to_datetime(bruno["date"], format="%Y-%m-%d")
bruno.loc[:, "player_name"] = bruno["player_name"].apply(
    lambda x: str(x).encode('ascii', 'backslashreplace').decode('ascii')
)

In [4]:
cutoff_date = "2017-06-01"
if cutoff_date:
    bruno = bruno[bruno["date"] >= cutoff_date]

In [5]:
bruno.cabeca = bruno.head
print(bruno.shape)
bruno.cabeca()

(266769, 6)


Unnamed: 0,date,player_name,team,player_id,match_id,rating
0,2020-02-26,Brehze,Evil Geniuses,9136,2339385,1.04
1,2020-02-26,CeRq,Evil Geniuses,11219,2339385,0.98
2,2020-02-26,EliGE,Liquid,8738,2339385,1.08
3,2020-02-26,Ethan,Evil Geniuses,10671,2339385,0.83
4,2020-02-26,NAF,Liquid,8520,2339385,1.08


In [6]:
upid_rating = bruno.groupby(["player_id", "player_name"])["rating"]
# median = upid_rating.median()
# median.name = "median"

mean = upid_rating.mean()
mean.name = "mean"

# mdiff = median - mean
# mdiff.name = "mdiff"

# var = upid_rating.var().fillna(upid_rating.last())
# var.name = "var"

helionp = pd.concat([mean], axis=1)
helionp.ceara = helionp.head
helionp.ceara()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean
player_id,player_name,Unnamed: 2_level_1
7,Friis,1.085161
11,Vertigo,0.925
13,RashiE,1.294444
15,m1kkis,0.903333
17,Zyppe,1.019643


In [7]:
teams: DefaultDict[str, Set[str]] = ddict(set)
bruno.groupby(["team", "match_id"])["player_id"].apply(
    lambda x: teams[x.name[0]].update(x.values)
);

In [81]:
against = []
def f(x: pd.Series):
    try:
        t1, t2 = x["team"].unique()
    except ValueError:
        return
    for i in range(len(x["player_id"][x["team"] == t1].values)):
        for j in range(len(x["player_id"][x["team"] == t2].values)):
            against.append(
                (
                    x["player_id"][x["team"] == t1].values[i],
                    x["player_id"][x["team"] == t2].values[j]
                )
            )

bruno.groupby(["match_id"])[["team", "player_id"]].apply(f);

In [None]:
if not os.path.isdir("gml"):
    os.mkdir("gml")

In [53]:
if cutoff_date:
    sm_filename = "same_team_" + cutoff_date.replace("-","_") + ".gml"
else:
    sm_filename = "same_team.gml"

if not os.path.isfile(f"gml/{sm_filename}"):
    gml = "graph [\n    directed 0\n"
    for (p_id, p_name) in helionp.index:
        gml += "    node [\n"
        gml += f"        id {p_id}\n"
        gml += f"        label \"{p_name}\"\n"
        gml += "    ]\n"

    edges = set()
    for team in teams:
        for edge in itertools.combinations(teams[team], 2):
            edge = tuple(sorted(edge)) # Probably not necessary
            if edge not in edges:
                edges.add(edge)

                gml += "    edge [\n"
                gml += f"        source {edge[0]}\n"
                gml += f"        target {edge[1]}\n"
                gml += "    ]\n"
    gml += "]\n"

    with open(f"gml/{sm_filename}", "w+") as f:
        f.write(gml)
    del gml
    del edges

In [90]:
if cutoff_date:
    ag_filename = "against_" + cutoff_date.replace("-","_") + ".gml"
else:
    ag_filename = "against.gml"

if not os.path.isfile(f"gml/{ag_filename}"):
    gml = "graph [\n    directed 0\n"
    for (p_id, p_name) in helionp.index:
        gml += "    node [\n"
        gml += f"        id {p_id}\n"
        gml += f"        label \"{p_name}\"\n"
        gml += "    ]\n"

    edges = set()
    for ag in against:
        for edge in itertools.combinations(ag, 2):
            edge = tuple(sorted(edge)) # Probably not necessary
            if edge not in edges:
                edges.add(edge)

                gml += "    edge [\n"
                gml += f"        source {edge[0]}\n"
                gml += f"        target {edge[1]}\n"
                gml += "    ]\n"
    gml += "]\n"

    with open(f"gml/{ag_filename}", "w+") as f:
        f.write(gml)
    del gml
    del edges

In [91]:
gsame = fm.load(f"gml/{sm_filename}")

In [116]:
# gagainst.set_all_nodes(size=10, labpos='hover')
# gagainst.set_all_edges(color=(0, 0, 0, 0.25))
# # g.move("kamada_kawai")
# gagainst.draw()

In [None]:
bc = nx.betweenness_centrality(gsame)
helionp.loc[:, "ties"] = pd.DataFrame.from_records(list(bc.items()))[1].values

In [92]:
gagainst = fm.load(f"gml/{ag_filename}")

In [95]:
dc = nx.degree_centrality(gagainst)
helionp.loc[:, "challengers"] = pd.DataFrame.from_records(list(dc.items()))[1].values

In [96]:
helionp.ceara()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,ties,challengers
player_id,player_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,Friis,1.085161,0.000284,0.015799
11,Vertigo,0.925,0.0,0.001082
13,RashiE,1.294444,0.0,0.00487
15,m1kkis,0.903333,0.0,0.001082
17,Zyppe,1.019643,1.8e-05,0.014392


In [103]:
# x = sm.add_constant(helionp[['ties', "challengers"]])
x = helionp[['ties', "challengers"]]
# x = list(map(lambda x: x[0], helionp.index))
y = [2.2*i for i in range(helionp.shape[0])]
y = helionp['mean']
model = sm.OLS(y, x) # , missing='drop')
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,mean,R-squared (uncentered):,0.313
Model:,OLS,Adj. R-squared (uncentered):,0.313
Method:,Least Squares,F-statistic:,2103.0
Date:,"Thu, 26 Nov 2020",Prob (F-statistic):,0.0
Time:,18:00:11,Log-Likelihood:,-10664.0
No. Observations:,9242,AIC:,21330.0
Df Residuals:,9240,BIC:,21350.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
ties,48.1553,7.483,6.435,0.000,33.486,62.825
challengers,32.5999,0.571,57.122,0.000,31.481,33.719

0,1,2,3
Omnibus:,4033.125,Durbin-Watson:,0.45
Prob(Omnibus):,0.0,Jarque-Bera (JB):,21393.847
Skew:,-2.071,Prob(JB):,0.0
Kurtosis:,9.197,Cond. No.,14.2


In [1]:
# # # helionpc = helionp.copy()
# # # helionpc = helionpc[['median', 'mean', 'mdiff', 'con']]
# # helionp[helionp.isna().any(axis=1)]
# # helionp.corr()
# #  0.7137
# x = helionp["con"]
# y = helionp["var"]
# import matplotlib.pyplot as plt
# plt.hist(x)
# plt.title("con")
# # plt.plot()
# plt.show()
# plt.hist(y)
# plt.title("var")
# # plt.plot()
# plt.show()

In [None]:
result.pvalues

In [None]:
print(bruno["player_name"].unique().shape)
print(bruno["player_id"].unique().shape)