In [1]:
#import libraries and read in data
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import *

from IPython.display import clear_output

def read_dataset(template, start_idx, end_idx):
    frames = [ pd.read_json(f) for f in [template.format(i) for i in range(start_idx, end_idx+1)] ]
    return pd.concat(frames, ignore_index = True)

dftrain = read_dataset("../datasets/dataset_initial/dataset_{:02}.json", 0, 13)
y_train = dftrain.pop('round_winner').map(lambda s: 0 if s == "CT" else 1)

dfeval = read_dataset("../datasets/dataset_initial/dataset_{:02}.json", 13, 17)
y_eval = dfeval.pop('round_winner').map(lambda s: 0 if s == "CT" else 1)

dftest = read_dataset("../datasets/dataset_second/dataset_{:02}.json", 0, 6 )
y_test = dftest.pop('round_winner').map(lambda s: 0 if s == "CT" else 1)

In [3]:
game_map_coords = {
    "de_cache":    (-2000, 3250),
    "de_dust2":    (-2476, 3239),
    "de_inferno":  (-2087, 3870),
    "de_mirage":   (-3230, 1713),
    "de_nuke":     (-3453, 2887),
    "de_overpass": (-4831, 1781),
    "de_train":    (-2477, 2392),
    "de_vertigo":  (-3168, 1762),
}
game_map_scales = {
    "de_cache":    5.5,
    "de_dust2":    4.4,
    "de_inferno":  4.9,
    "de_mirage":   5.0,
    "de_nuke":     7.0,
    "de_overpass": 5.2,
    "de_train":    4.7,
    "de_vertigo":  4.0,
}
map_names = ["de_cache","de_dust2","de_inferno","de_mirage","de_nuke","de_overpass","de_train","de_vertigo"]


def coordx(x, map_name):
    '''Calculate radar coordinates from game coordinates for the chosen map'''
    return int(round((x['x']-game_map_coords[map_name][0]) / game_map_scales[map_name]))
    
def coordy(y, map_name):
    return int(abs(round((y['y']-game_map_coords[map_name][1]) / game_map_scales[map_name])))

In [4]:
#feature helper functions and lists of features and vocabs
CATEGORICAL_COLUMNS = ['round_status', 'map']
NUMERIC_COLUMNS = ['round_status_time_left']
INTEGER_COLUMNS = ['alive_players_t', 'alive_players_ct', "health_t", "health_ct", "armor_t",\
                   "armor_ct", "helmets_t", "helmets_ct", "has_defuser", "money_t", "money_ct", \
                  "primary0_t", "primary0_ct","primary1_t", "primary1_ct","primary2_t", "primary2_ct",\
                   "primary3_t", "primary3_ct","primary4_t", "primary4_ct", "secondary0_t", "secondary0_ct", \
                  "secondary1_t", "secondary1_ct", "secondary2_t", "secondary2_ct", "secondary3_t", "secondary3_ct",\
                  "secondary4_t", "secondary4_ct", 'planted', 'posx0_t', 'posx0_ct', 'posx1_t', 'posx1_ct', \
                   'posx2_t', 'posx3_ct', 'posx4_t', 'posx4_ct', 'posy0_t', 'posy0_ct', 'posy1_t', 'posy1_ct',\
                  'posy2_t', 'posy2_ct', 'posy3_t', 'posy3_ct', 'posy4_t', 'posy4_ct', 'map_num', 'grenades_t', \
                  'grenades_ct']

feature_names =  NUMERIC_COLUMNS + INTEGER_COLUMNS

#features used in example doc from competition
features_original = ['round_status_time_left', 'alive_players_t', 'alive_players_ct', "health_t", "health_ct", "armor_t",\
                   "armor_ct", "helmets_t", "helmets_ct", "money_t", "money_ct" ]

#a set of features that has basically the same info as feature_names but in a compacted form.
feature_names_reduced = ['round_status_time_left', 'alive_players_t', 'alive_players_ct', "health_t",\
                         "health_ct", "armor_t", "armor_ct", "helmets_t", "helmets_ct", "has_defuser",\
                         "money_t", "money_ct", "best_gun0_t", "best_gun0_ct","best_gun1_t", "best_gun1_ct",\
                         "best_gun2_t", "best_gun2_ct", "best_gun3_t", "best_gun3_ct","best_gun4_t",\
                         "best_gun4_ct", 'planted', 'map_num', 'pos0_t', 'pos0_ct', 'pos1_t', 'pos1_ct',\
                         'pos2_t', 'pos2_ct', 'pos3_t', 'pos3_ct','pos4_t', 'pos4_ct', 'grenades_t',\
                         'grenades_ct', 'smokes_t', 'smokes_ct', 'flashes_t','flashes_ct', "damage_grenades_t",\
                         "damage_grenades_ct"]

#equipment slots: 0-8 to get all equipment, 0-3 to get all guns.
primaries = ['Ak47', 'Aug', 'Awp', 'Bizon', 'Famas', 'G3sg1', 'GalilAr','M4a1S',\
             'M4a4', 'Mac10', 'Mag7','Mp5sd', 'Mp7', 'Mp9', 'Negev','Nova', 'P90', 'Sawedoff', 'Scar20',\
             'Sg553', 'Ssg08', 'Ump45', 'Xm1014' ]
pistols = ['Cz75Auto', 'Deagle', 'Elite', 'FiveSeven', 'Glock', 'P2000', 'P250','R8Revolver', 'Tec9', 'UspS']

# melee = ['ZeusX27']

grenades = ['MolotovGrenade', 'HeGrenade', 'DecoyGrenade', 'Flashbang', 'IncendiaryGrenade', 'SmokeGrenade']

equipment = ['C4']

def get_attr(ds, team, attr=None):
    """helper function provided by competition"""
    team_players = map(lambda players: filter(lambda p: p["team"] == team, players), ds['alive_players'])
    if attr:
        team_players = map(lambda players: map(lambda p: p[attr], players), team_players)

    return list(map(lambda p: list(p), team_players))

#finds primary and secondary (pistol) weapons players have. returns -1 if no primary or secondary. weapons in 
#alphabetical order and numbered on index. ex. primary 0 is ak47, pistol 2 is elite 
index = 0
def get_primary(inventory):
    """searches the inventory of a player to see if they are holding a non pistol weapon"""
    if len(inventory) > index:
        for i in range(0,4):
            if len(inventory[index]) > i and inventory[index][i] and inventory[index][i]['item_type'] in primaries:
                return primaries.index(inventory[index][i]['item_type']) + len(pistols)
    return -1

def get_secondary(inventory):
    """searches the inventory for the pistol the player is carrying"""
    if len(inventory) > index:
        for i in range(0,4):
            if len(inventory[index]) > i and inventory[index][i] and inventory[index][i]['item_type'] in pistols:
                return pistols.index(inventory[index][i]['item_type']) 
    return -1
            
def get_best_gun(inventory):
    """returns the primary gun if one exists. if not, returns secondary. This gets the gun most likely to be used"""
    x = get_primary(inventory)
    if x > -1:
        return x
    else:
        return get_secondary(inventory)

def is_planted(l):
    """converts to number"""
    if l:
        return 1
    return -1

def get_posx(pos):
    """gets the x-coord of the player. returns -1 if player is dead"""
    if len(pos[0]) > index and pos[0][index]:
        if coordx(pos[0][index][-1], pos[1]):
            return coordx(pos[0][index][-1], pos[1])
    return -1

def get_posy(pos):
    """gets the y-coord of the player. returns -1 if player is dead"""
    if len(pos[0]) > index and pos[0][index]:
        return coordy(pos[0][index][-1], pos[1])
    return -1

def get_pos(pos):
    """encodes the coord of the player as 1000*y + x. reduces number of features."""
    if len(pos[0]) > index and pos[0][index]:
        return coordx(pos[0][index][-1], pos[1]) + coordy(pos[0][index][-1], pos[1]) * 1000
    return -1

def get_map(m):
    """converts map name to a number associated with the list included above"""
    return map_names.index(m)

def get_gren(inventory):
    count = 0
    for x in range(0,5): 
        if len(inventory) > x:
            for i in range(0,11):
                if len(inventory[x]) > i and inventory[x][i] and inventory[x][i]['item_type'] in grenades:
                    count+=1
    return count

def get_smokes(inventory):
    count = 0
    for x in range(0,5): 
        if len(inventory) > x:
            for i in range(0,11):
                if len(inventory[x]) > i and inventory[x][i] and inventory[x][i]['item_type'] =='SmokeGrenade':
                    count+=1
    return count

def get_flashes(inventory):
    count = 0
    for x in range(0,5): 
        if len(inventory) > x:
            for i in range(0,11):
                if len(inventory[x]) > i and inventory[x][i] and inventory[x][i]['item_type'] == 'Flashbang':
                    count+=1
    return count
def get_damage_grenades(inventory):
    count = 0
    for x in range(0,5): 
        if len(inventory) > x:
            for i in range(0,11):
                if len(inventory[x]) > i and inventory[x][i] and inventory[x][i]['item_type'] in ['MolotovGrenade', 'HeGrenade','IncendiaryGrenade']:
                    count+=1
    return count

In [5]:
#build features
for ds in [dftrain, dfeval]:
    ds['alive_players_t']  = list(map(len ,get_attr(ds, "Terrorist")))
    ds['alive_players_ct'] = list(map(len, get_attr(ds, "CT")))
    ds['health_ct']        = list(map(sum, get_attr(ds, "CT", "health")))
    ds['health_t']         = list(map(sum, get_attr(ds, "Terrorist", "health")))
    ds['armor_t']          = list(map(sum, get_attr(ds, "Terrorist", "armor")))
    ds['armor_ct']         = list(map(sum, get_attr(ds, "CT", "armor")))
    ds['helmets_t']        = list(map(sum, get_attr(ds, "Terrorist", "has_helmet")))
    ds['helmets_ct']       = list(map(sum, get_attr(ds, "CT", "has_helmet")))
    ds['has_defuser']      = list(map(sum, get_attr(ds, "has_defuser")))
    ds['money_t']          = list(map(sum, get_attr(ds, "Terrorist", "money")))
    ds['money_ct']         = list(map(sum, get_attr(ds, "CT", "money")))
    ds['planted']          = list(map(is_planted, list(ds['planted_bomb'])))
    ds['map_num']          = list(map(get_map, list(ds['map'])))
    ds['grenades_t']         = list(map(get_gren, get_attr(ds, "Terrorist", 'inventory')))
    ds['grenades_ct']        = list(map(get_gren, get_attr(ds, "CT", 'inventory')))
    ds['smokes_t']         = list(map(get_smokes, get_attr(ds, "Terrorist", 'inventory')))
    ds['smokes_ct']        = list(map(get_smokes, get_attr(ds, "CT", 'inventory')))
    ds['flashes_t']         = list(map(get_flashes, get_attr(ds, "Terrorist", 'inventory')))
    ds['flashes_ct']        = list(map(get_flashes, get_attr(ds, "CT", 'inventory')))
    ds['damage_grenades_t']         = list(map(get_damage_grenades, get_attr(ds, "Terrorist", 'inventory')))
    ds['damage_grenades_ct']        = list(map(get_damage_grenades, get_attr(ds, "CT", 'inventory')))
#     for i in range(0,5):
#         index = i
#         ds['primary{}_t'.format(i)]  = list(map(get_primary, get_attr(ds, "Terrorist", "inventory")))
#         ds['primary{}_ct'.format(i)] = list(map(get_primary, get_attr(ds, "CT", "inventory")))
#     for i in range(0,5):
#         index = i
#         ds['secondary{}_t'.format(i)]  = list(map(get_secondary, get_attr(ds, "Terrorist", "inventory")))
#         ds['secondary{}_ct'.format(i)] = list(map(get_secondary, get_attr(ds, "CT", "inventory"))) 
    for i in range(0,5):
        index = i
        ds['best_gun{}_t'.format(i)]  = list(map(get_best_gun, get_attr(ds, "Terrorist", "inventory")))
        ds['best_gun{}_ct'.format(i)] = list(map(get_best_gun, get_attr(ds, "CT", "inventory"))) 
    for i in range(0,5):
        index = i
        ds['pos{}_t'.format(i)]  = list(map(get_pos, zip(get_attr(ds, "Terrorist", "position_history"), ds['map']))) 
        ds['pos{}_ct'.format(i)] = list(map(get_pos, zip(get_attr(ds, "CT", "position_history"), ds['map'])))

print(dftrain.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71445 entries, 0 to 71444
Data columns (total 50 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   map                     71445 non-null  object 
 1   current_score           71445 non-null  object 
 2   round_status            71445 non-null  object 
 3   round_status_time_left  71445 non-null  float64
 4   alive_players           71445 non-null  object 
 5   active_smokes           71445 non-null  object 
 6   active_molotovs         71445 non-null  object 
 7   previous_kills          71445 non-null  object 
 8   planted_bomb            8070 non-null   object 
 9   alive_players_t         71445 non-null  int64  
 10  alive_players_ct        71445 non-null  int64  
 11  health_ct               71445 non-null  int64  
 12  health_t                71445 non-null  int64  
 13  armor_t                 71445 non-null  int64  
 14  armor_ct                71445 non-null

In [7]:
# guns = []
# for i in range(0,5):
#     guns = np.concatenate([guns, dftrain['primary{}_t'.format(i)].unique()])
#     guns = np.concatenate([guns, dftrain['primary{}_ct'.format(i)].unique()])
    
# l = []
# for datum in list(dftrain['posx0_t']) + list(dftrain['posx0_ct']):
#     l.append(datum)
# print(max(l), min(l))

print(dftrain['alive_players'][0])


[{'health': 100, 'armor': 0, 'has_helmet': False, 'has_defuser': False, 'money': 800, 'team': 'CT', 'position_history': [{'x': 160.123, 'y': 2369.68, 'z': -119.91875}], 'inventory': [{'item_type': 'UspS', 'clip_ammo': 12, 'reserve_ammo': 24}]}, {'health': 100, 'armor': 0, 'has_helmet': False, 'has_defuser': False, 'money': 800, 'team': 'CT', 'position_history': [{'x': 182.25, 'y': 2439.01, 'z': -120.96875}], 'inventory': [{'item_type': 'UspS', 'clip_ammo': 12, 'reserve_ammo': 24}]}, {'health': 100, 'armor': 0, 'has_helmet': False, 'has_defuser': False, 'money': 800, 'team': 'CT', 'position_history': [{'x': 334.369, 'y': 2433.73, 'z': -120.36257}], 'inventory': [{'item_type': 'UspS', 'clip_ammo': 12, 'reserve_ammo': 24}]}, {'health': 100, 'armor': 0, 'has_helmet': False, 'has_defuser': False, 'money': 800, 'team': 'CT', 'position_history': [{'x': 351.392, 'y': 2352.94, 'z': -120.50442}], 'inventory': [{'item_type': 'P2000', 'clip_ammo': 13, 'reserve_ammo': 52}]}, {'health': 100, 'armor'

In [9]:
optimized_features = ['round_status_time_left', 'alive_players_t', 'alive_players_ct', "health_t",\
                         "health_ct", "armor_t", "armor_ct", "helmets_t", "helmets_ct", 'map_num', \
                         "money_t", "money_ct", "best_gun0_t", "best_gun0_ct","best_gun1_t", "best_gun1_ct",\
                         "best_gun2_t", "best_gun2_ct", "best_gun3_t", "best_gun3_ct","best_gun4_t",\
                         "best_gun4_ct", 'planted', 'smokes_t', 'smokes_ct']


model1 = ensemble.ExtraTreesClassifier(n_estimators=75, max_depth=93, criterion='entropy') #.769

#model1 = tree.DecisionTreeClassifier() #.73
#model1 = tree.DecisionTreeClassifier() #0.7
#model1 = naive_bayes.GaussianNB() #.67
#model1 = naive_bayes.MultinomialNB() #.67
#model1 = naive_bayes.BernoulliNB() #.67

# model1.fit(df_train[optimized_features], y_train)

#model2 = neighbors.KNeighborsClassifier() #.72
#model2.fit(principalDf, y_train)

#model3 = naive_bayes.GaussianNB()
#model4 = linear_model.SGDClassifier() #.67
#model = ensemble.VotingClassifier(estimators=[('etc', model1), ('knn', model2), ('gaus', model3), ('sgd', model4)],\
#                                 weights=[.4,.2,.2,.2])

#model.fit(df_train, y_train)
#print(metrics.accuracy_score(model1.predict(df_eval[optimized_features]), y_eval) )

In [15]:
cv = model_selection.ShuffleSplit(n_splits=10, test_size=0.2)#, random_state=0
scores = model_selection.cross_val_score(model1, pd.concat([df_train, df_eval, df_test])[optimized_features], pd.concat([y_train, y_eval, y_test]), cv=cv)

import statistics 

print(scores, scores.mean(), statistics.variance(scores))
#w/o pos: [0.83238652 0.83162906 0.82886977] 0.8309617847030605 3.4258208461162907e-06
#full features: [0.81501921 0.81853595 0.81069091] 0.8147486879835525 1.5441075160863312e-05

[0.84274415 0.84337066 0.8430574  0.83730128 0.84505443 0.84082544
 0.8440755  0.84309656 0.84481948 0.84548516] 0.8429830057169708 5.808963146058534e-06


In [29]:
for ds in [dftest]:
    ds['alive_players_t']  = list(map(len ,get_attr(ds, "Terrorist")))
    ds['alive_players_ct'] = list(map(len, get_attr(ds, "CT")))
    ds['health_ct']        = list(map(sum, get_attr(ds, "CT", "health")))
    ds['health_t']         = list(map(sum, get_attr(ds, "Terrorist", "health")))
    ds['armor_t']          = list(map(sum, get_attr(ds, "Terrorist", "armor")))
    ds['armor_ct']         = list(map(sum, get_attr(ds, "CT", "armor")))
    ds['helmets_t']        = list(map(sum, get_attr(ds, "Terrorist", "has_helmet")))
    ds['helmets_ct']       = list(map(sum, get_attr(ds, "CT", "has_helmet")))
    ds['has_defuser']      = list(map(sum, get_attr(ds, "has_defuser")))
    ds['money_t']          = list(map(sum, get_attr(ds, "Terrorist", "money")))
    ds['money_ct']         = list(map(sum, get_attr(ds, "CT", "money")))
    ds['planted']          = list(map(is_planted, list(ds['planted_bomb'])))
    ds['map_num']          = list(map(get_map, list(ds['map'])))
    ds['smokes_t']         = list(map(get_smokes, get_attr(ds, "Terrorist", 'inventory')))
    ds['smokes_ct']        = list(map(get_smokes, get_attr(ds, "CT", 'inventory')))
    ds['flashes_t']         = list(map(get_flashes, get_attr(ds, "Terrorist", 'inventory')))
    ds['flashes_ct']        = list(map(get_flashes, get_attr(ds, "CT", 'inventory')))
 
    for i in range(0,5):
        index = i
        ds['best_gun{}_t'.format(i)]  = list(map(get_best_gun, get_attr(ds, "Terrorist", "inventory")))
        ds['best_gun{}_ct'.format(i)] = list(map(get_best_gun, get_attr(ds, "CT", "inventory"))) 

0.7745981780893786


In [None]:
model1.fit(pd.concat([dftrain, dfeval])[optimized_features], pd.concat([y_train, y_eval]))

print(metrics.accuracy_score(model1.predict(dftest[optimized_features]), y_test) )

In [32]:
import pickle
model1.fit(pd.concat([dftrain, dftest, dfeval])[optimized_features], pd.concat([y_train, y_test, y_eval]))
pickle.dump(model1, open("StaccAttacc_model.sav", "wb") )

In [13]:
#not done with this yet but it is a paramater optimizer for the ExtraTreesClassifier. I don't think it
# will make a big difference, but it might help push it a little better. 
#so one run i got: n_estimators=158, max_depth=24, criterion='gini'
#and another: 'criterion': 'entropy', 'max_depth': 55, 'n_estimators': 111
#so the hyper params seem to not really be a factor. 

# from scipy.stats import uniform
# from scipy.stats import randint
# params = {"n_estimators": randint(25,175), "max_depth": randint(10, 100), "criterion": ['gini', 'entropy']}

# opt_param = model_selection.RandomizedSearchCV(estimator=model1, param_distributions=params, cv = 5, n_iter = 20, n_jobs=-1)
# opt_param.fit(pd.concat([df_train, df_eval])[optimized_features], pd.concat([y_train, y_eval]) )
# print("\n========================================================")
# print(" Results from Random Search " )
# print("========================================================")
# print("\n The best estimator across ALL searched params:\n",
#           opt_param.best_estimator_)
# print("\n The best score across ALL searched params:\n",
#           opt_param.best_score_)
# print("\n The best parameters across ALL searched params:\n",
#           opt_param.best_params_)
# print("\n ========================================================")

KeyboardInterrupt: 