In [1]:
import copy
import lightgbm as lgb
import os
import numpy as np
import sklearn.metrics

from preprocess import *


In [19]:
train_base = 'gameevents_0[0-7][0-9].csv'
test_base = 'gameevents_0[8-9][0-9].csv'
expt_suffix = 'no_team_diff_'

def load_vectors(pattern):
    X = np.load(f'{pattern}_{expt_suffix}game_states.npy')
    y = np.load(f'{pattern}_{expt_suffix}labels.npy')
    return X, y

train_X, train_y = load_vectors(train_base)
test_X, test_y = load_vectors(test_base)

In [20]:
def train(t_y, num_leaves, objective='binary'):
    train_data = lgb.Dataset(train_X, t_y)
    param = {'num_leaves': num_leaves, 'objective': objective, 'metric': 'binary_logloss', 'boosting': 'gbdt'}
    return lgb.train(param, train_data, 200)

In [21]:
bst = train(train_y, 1000)

[LightGBM] [Info] Number of positive: 433643, number of negative: 441060
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 488
[LightGBM] [Info] Number of data points in the train set: 874703, number of used features: 77
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.495760 -> initscore=-0.016959
[LightGBM] [Info] Start training from score -0.016959


In [5]:
new_train_y = bst.predict(train_X)

In [6]:
#print(list(zip(train_y[:100], new_train_Y[:100])))

In [8]:
def avg(l):
    return sum(l)/len(l)

print(avg(train_y), avg(new_train_y))
print(avg([new_y for old_y, new_y in zip(train_y, new_train_y) if old_y == 0]))
print(avg([new_y for old_y, new_y in zip(train_y, new_train_y) if old_y == 1]))

0.49576027520198285 0.4957446793531138
0.27992061485335434
0.7152601837847815


In [9]:
bst2 = train(new_train_y, 1000, objective='regression')

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 622
[LightGBM] [Info] Number of data points in the train set: 874703, number of used features: 109
[LightGBM] [Info] Start training from score 0.495745


In [22]:
print(sklearn.metrics.classification_report(test_y, bst.predict(test_X) > .5))

              precision    recall  f1-score   support

           0       0.68      0.70      0.69    629982
           1       0.68      0.66      0.67    622564

    accuracy                           0.68   1252546
   macro avg       0.68      0.68      0.68   1252546
weighted avg       0.68      0.68      0.68   1252546



In [12]:
print(sklearn.metrics.classification_report(test_y, bst2.predict(test_X) > .5))

              precision    recall  f1-score   support

           0       0.68      0.70      0.69    629982
           1       0.69      0.67      0.68    622564

    accuracy                           0.68   1252546
   macro avg       0.68      0.68      0.68   1252546
weighted avg       0.68      0.68      0.68   1252546



In [13]:
def iterate_unseen_states():
    csv_path = 'validated_all_gameevent_partitioned/gameevents_0[8-9][0-9].csv'

    events = iterate_events_from_csv(csv_path)
    map_structure_infos = map_structure.MapStructureInfos()

    yield from iterate_game_events_with_state(events, map_structure_infos)

In [14]:
for x in iterate_unseen_states():
    print(x)
    break

(642637, <preprocess.MapStartEvent object at 0x7f13759563e0>, <preprocess.GameState object at 0x7f134788fca0>, [<preprocess.MapStartEvent object at 0x7f13759563e0>, <preprocess.SpawnEvent object at 0x7f13759575b0>, <preprocess.SpawnEvent object at 0x7f1375956d10>, <preprocess.SpawnEvent object at 0x7f1375955c00>, <preprocess.SpawnEvent object at 0x7f1375957460>, <preprocess.SpawnEvent object at 0x7f1375956530>, <preprocess.SpawnEvent object at 0x7f1375956860>, <preprocess.SpawnEvent object at 0x7f1375956d40>, <preprocess.SpawnEvent object at 0x7f1375956fb0>, <preprocess.SpawnEvent object at 0x7f1375955930>, <preprocess.BlessMaidenEvent object at 0x7f1375956140>, <preprocess.BlessMaidenEvent object at 0x7f1375956350>, <preprocess.SpawnEvent object at 0x7f1375956020>, <preprocess.GameStartEvent object at 0x7f13759573d0>, <preprocess.CarryFoodEvent object at 0x7f1375956e00>, <preprocess.CarryFoodEvent object at 0x7f1375956c80>, <preprocess.CarryFoodEvent object at 0x7f1375955780>, <prepro

In [15]:
def compute_seq():
    count = 0

    for game_id, event, game_state, all_game_events in iterate_unseen_states():
        if game_state.get_team(Team.BLUE).eggs != 2:
            if (count % 100 == 0):
                print(count)
            count += 1
            if count > 1000:
                break
            new_gs = copy.deepcopy(game_state)
            
            new_gs.get_team(Team.BLUE).eggs += 1
            new_encoded = vectorize_game_state(new_gs, event)
            old_encoded = vectorize_game_state(game_state, event)
            preds = bst.predict([old_encoded, new_encoded])
            pred_diff = preds[1] - preds[0]
            yield(pred_diff)

deltas = list(compute_seq())

0
100
200
300
400
500
600
700
800
900
1000


In [16]:
sum(d >= 0 for d in deltas)

963