Reading Match Data

In [42]:
import pandas as pd
matches = pd.read_csv('results.csv')
matches.head()

Unnamed: 0,date,team_1,team_2,_map,result_1,result_2,map_winner,starting_ct,ct_1,t_2,t_1,ct_2,event_id,match_id,rank_1,rank_2,map_wins_1,map_wins_2,match_winner
0,2020-03-18,Recon 5,TeamOne,Dust2,0,16,2,2,0,1,0,15,5151,2340454,62,63,0,2,2
1,2020-03-18,Recon 5,TeamOne,Inferno,13,16,2,2,8,6,5,10,5151,2340454,62,63,0,2,2
2,2020-03-18,New England Whalers,Station7,Inferno,12,16,2,1,9,6,3,10,5243,2340461,140,118,12,16,2
3,2020-03-18,Rugratz,Bad News Bears,Inferno,7,16,2,2,0,8,7,8,5151,2340453,61,38,0,2,2
4,2020-03-18,Rugratz,Bad News Bears,Vertigo,8,16,2,2,4,5,4,11,5151,2340453,61,38,0,2,2


In [43]:
matches.shape

(45773, 19)

Cleaning Match Data

In [44]:
matches.dtypes

Unnamed: 0,0
date,object
team_1,object
team_2,object
_map,object
result_1,int64
result_2,int64
map_winner,int64
starting_ct,int64
ct_1,int64
t_2,int64


In [45]:
matches["date"] = pd.to_datetime(matches["date"])
matches.dtypes

Unnamed: 0,0
date,datetime64[ns]
team_1,object
team_2,object
_map,object
result_1,int64
result_2,int64
map_winner,int64
starting_ct,int64
ct_1,int64
t_2,int64


Creating Match Predictors

In [46]:
matches["map_code"] = matches['_map'].astype("category").cat.codes
matches["opp_code"] = matches['team_2'].astype("category").cat.codes
matches["ct_code"] = matches['starting_ct'].astype("category").cat.codes
matches["target"] = matches['map_winner'].astype("category").cat.codes
matches

Unnamed: 0,date,team_1,team_2,_map,result_1,result_2,map_winner,starting_ct,ct_1,t_2,...,match_id,rank_1,rank_2,map_wins_1,map_wins_2,match_winner,map_code,opp_code,ct_code,target
0,2020-03-18,Recon 5,TeamOne,Dust2,0,16,2,2,0,1,...,2340454,62,63,0,2,2,3,1053,1,1
1,2020-03-18,Recon 5,TeamOne,Inferno,13,16,2,2,8,6,...,2340454,62,63,0,2,2,4,1053,1,1
2,2020-03-18,New England Whalers,Station7,Inferno,12,16,2,1,9,6,...,2340461,140,118,12,16,2,4,1014,0,1
3,2020-03-18,Rugratz,Bad News Bears,Inferno,7,16,2,2,0,8,...,2340453,61,38,0,2,2,4,136,1,1
4,2020-03-18,Rugratz,Bad News Bears,Vertigo,8,16,2,2,4,5,...,2340453,61,38,0,2,2,9,136,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45768,2015-11-05,G2,E-frag.net,Inferno,13,16,2,1,8,7,...,2299059,7,16,1,2,2,4,295,0,1
45769,2015-11-05,G2,E-frag.net,Dust2,16,13,1,1,10,5,...,2299059,7,16,1,2,2,3,295,0,0
45770,2015-11-04,CLG,Liquid,Inferno,16,12,1,1,7,8,...,2299011,10,14,16,12,1,4,620,0,0
45771,2015-11-03,NiP,Dignitas,Train,16,4,1,2,4,1,...,2299001,6,12,16,4,1,8,274,1,0


Creating Initial Model

In [52]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

train = matches[matches["date"] < '2018-01-01']
test = matches[matches["date"] >= '2018-01-01']

predictors = ["map_code", "opp_code", "ct_code"]
rf.fit(train[predictors], train["target"])

In [53]:
preds = rf.predict(test[predictors])

In [54]:
from sklearn.metrics import accuracy_score
accuracy_score(test["target"], preds)

0.515420981539847

In [59]:
combined = pd.DataFrame(dict(actual=test["target"], predicted=preds))
pd.crosstab(index=combined["actual"], columns=combined["predicted"])

predicted,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,8535,5747
1,7168,5202


In [56]:
from sklearn.metrics import precision_score
precision_score(test["target"], preds)

0.4751118823636862

Improving Model Precision

In [62]:
grouped_matches = matches.groupby("team_1")
group = grouped_matches.get_group("fnatic")
group
#

Unnamed: 0,date,team_1,team_2,_map,result_1,result_2,map_winner,starting_ct,ct_1,t_2,...,match_id,rank_1,rank_2,map_wins_1,map_wins_2,match_winner,map_code,opp_code,ct_code,target
9,2020-03-17,fnatic,BIG,Mirage,12,16,2,1,9,6,...,2340397,5,18,1,2,2,5,121,0,1
21,2020-03-17,fnatic,BIG,Overpass,16,7,1,2,4,4,...,2340397,5,18,1,2,2,7,121,1,0
26,2020-03-17,fnatic,BIG,Inferno,7,16,2,1,5,10,...,2340397,5,18,1,2,2,4,121,0,1
405,2020-03-07,fnatic,Dignitas,Overpass,16,3,1,1,15,0,...,2340160,4,44,2,1,1,7,274,0,0
406,2020-03-07,fnatic,Dignitas,Inferno,19,15,1,2,4,11,...,2340160,4,44,2,1,1,4,274,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45621,2015-11-18,fnatic,TSM,Dust2,16,11,1,2,5,7,...,2299361,4,3,2,0,1,3,1043,1,0
45629,2015-11-18,fnatic,TSM,Inferno,16,14,1,2,8,7,...,2299361,4,3,2,0,1,4,1043,1,0
45671,2015-11-17,fnatic,Envy,Inferno,16,10,1,1,7,8,...,2299339,4,1,16,10,1,4,338,0,0
45675,2015-11-16,fnatic,Natus Vincere,Dust2,16,3,1,2,3,1,...,2299286,4,5,2,0,1,3,711,1,0




In [63]:
def rolling_averages(group, cols, new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group

In [64]:
cols = ["ct_1", "ct_2", "t_1", "t_2"]
new_cols = [f"{c}_rolling" for c in cols]

rolling_averages(group, cols, new_cols)

Unnamed: 0,date,team_1,team_2,_map,result_1,result_2,map_winner,starting_ct,ct_1,t_2,...,map_wins_2,match_winner,map_code,opp_code,ct_code,target,ct_1_rolling,ct_2_rolling,t_1_rolling,t_2_rolling
45629,2015-11-18,fnatic,TSM,Inferno,16,14,1,2,8,7,...,0,1,4,1043,1,0,7.333333,1.333333,8.666667,4.000000
45621,2015-11-18,fnatic,TSM,Dust2,16,11,1,2,5,7,...,0,1,3,1043,1,0,6.000000,3.666667,10.000000,5.333333
45577,2015-11-24,fnatic,Dignitas,Cache,16,10,1,2,10,1,...,1,1,0,274,1,0,6.666667,4.333333,9.333333,7.333333
45579,2015-11-24,fnatic,Dignitas,Train,16,8,1,2,5,4,...,1,1,8,274,1,0,7.666667,6.666667,8.333333,5.000000
45578,2015-11-24,fnatic,Dignitas,Cobblestone,17,19,2,2,11,4,...,1,1,1,274,1,1,6.666667,5.666667,9.333333,4.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
406,2020-03-07,fnatic,Dignitas,Inferno,19,15,1,2,4,11,...,1,1,4,274,1,0,7.333333,4.333333,8.333333,5.000000
405,2020-03-07,fnatic,Dignitas,Overpass,16,3,1,1,15,0,...,1,1,7,274,0,0,5.666667,5.333333,9.666667,6.666667
26,2020-03-17,fnatic,BIG,Inferno,7,16,2,1,5,10,...,2,2,4,121,0,1,8.666667,4.666667,6.666667,6.333333
21,2020-03-17,fnatic,BIG,Overpass,16,7,1,2,4,4,...,2,2,7,121,1,0,8.000000,4.333333,4.666667,7.000000


In [68]:
matches_rolling = matches.groupby("team_1").apply(lambda x: rolling_averages(x, cols, new_cols))
matches_rolling = matches_rolling.droplevel('team_1')
matches_rolling.index = range(matches_rolling.shape[0])
matches_rolling

  matches_rolling = matches.groupby("team_1").apply(lambda x: rolling_averages(x, cols, new_cols))


Unnamed: 0,date,team_1,team_2,_map,result_1,result_2,map_winner,starting_ct,ct_1,t_2,...,map_wins_2,match_winner,map_code,opp_code,ct_code,target,ct_1_rolling,ct_2_rolling,t_1_rolling,t_2_rolling
0,2019-11-08,100 Thieves,ENCE,Inferno,16,9,1,2,7,3,...,0,1,4,302,1,0,6.333333,2.666667,9.666667,5.000000
1,2019-11-10,100 Thieves,Astralis,Train,3,16,2,2,0,4,...,3,2,8,107,1,1,7.333333,4.000000,8.666667,2.333333
2,2019-11-10,100 Thieves,Astralis,Nuke,5,16,2,1,4,11,...,3,2,6,107,0,1,6.000000,7.000000,5.666667,3.666667
3,2019-11-10,100 Thieves,Astralis,Vertigo,14,16,2,2,7,8,...,3,2,9,107,1,1,3.666667,7.666667,4.333333,6.000000
4,2019-11-17,100 Thieves,eUnited,Mirage,16,7,1,2,6,2,...,0,1,5,1262,1,0,3.666667,8.333333,3.666667,7.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42382,2017-02-06,zARLANS,E-Corp,Nuke,10,16,2,1,7,8,...,1,1,6,294,0,1,5.333333,8.000000,5.333333,6.666667
42383,2017-02-06,zARLANS,E-Corp,Overpass,16,8,1,1,11,4,...,1,1,7,294,0,0,5.666667,8.333333,5.333333,6.333333
42384,2017-02-22,zARLANS,spray'n'pray,Inferno,13,16,2,1,5,10,...,16,2,4,1383,0,1,9.000000,6.666667,5.000000,5.333333
42385,2017-05-09,zARLANS,spray'n'pray,Mirage,19,22,2,2,10,5,...,22,2,5,1383,1,1,7.666667,6.000000,5.333333,7.333333


Retraining Model

In [69]:
def make_predictions(data, predictors):
    train = data[data["date"] < '2018-01-01']
    test = data[data["date"] >= '2018-01-01']
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["target"], predicted=preds), index=test.index)
    precision = precision_score(test["target"], preds)
    return combined, precision

In [71]:
combined, precision = make_predictions(matches_rolling, predictors + new_cols)
precision

0.4894106256807455

In [72]:
combined = combined.merge(matches_rolling[["date", "team_1", "team_2", "map_winner"]], left_index=True, right_index=True)
combined

Unnamed: 0,actual,predicted,date,team_1,team_2,map_winner
0,0,0,2019-11-08,100 Thieves,ENCE,1
1,1,0,2019-11-10,100 Thieves,Astralis,2
2,1,0,2019-11-10,100 Thieves,Astralis,2
3,1,1,2019-11-10,100 Thieves,Astralis,2
4,0,1,2019-11-17,100 Thieves,eUnited,1
...,...,...,...,...,...,...
42364,1,0,2019-02-21,x6tence Galaxy,Sprout,2
42365,1,1,2019-02-21,x6tence Galaxy,Sprout,2
42366,1,0,2019-03-07,x6tence Galaxy,Epsilon,2
42367,0,1,2019-03-07,x6tence Galaxy,Epsilon,1


**Next Steps:**
Add more insightful data, such as team rank, kills, deaths, or assists