In [1]:
import copy
import lightgbm as lgb
import os
import numpy as np
import sklearn.metrics

from preprocess import *

In [2]:
train_base = 'gameevents_0[0-7][0-9].csv'
test_base = 'gameevents_0[8-9][0-9].csv'
expt_suffix = 'gold_symmetry_no_id'

In [3]:
def load_vectors(pattern):
    X = np.load(f'{pattern}_{expt_suffix}_states.npy')
    y = np.load(f'{pattern}_{expt_suffix}_labels.npy')
    return X, y

train_X, train_y = load_vectors(train_base)
test_X, test_y = load_vectors(test_base)

In [4]:
def train(t_y, num_leaves, objective='binary'):
    train_data = lgb.Dataset(train_X, t_y)
    param = {'num_leaves': num_leaves, 'objective': objective, 'metric': 'binary_logloss', 'boosting': 'gbdt'}
    return lgb.train(param, train_data, 200)

In [5]:
bst = train(train_y, 1000)

[LightGBM] [Info] Number of positive: 433643, number of negative: 441060
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 486
[LightGBM] [Info] Number of data points in the train set: 874703, number of used features: 76
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.495760 -> initscore=-0.016959
[LightGBM] [Info] Start training from score -0.016959


In [6]:
#new_train_y = bst.predict(train_X)
#def avg(l):
#    return sum(l)/len(l)

#print(avg(train_y), avg(new_train_y))
#print(avg([new_y for old_y, new_y in zip(train_y, new_train_y) if old_y == 0]))
#print(avg([new_y for old_y, new_y in zip(train_y, new_train_y) if old_y == 1]))

In [7]:
print(sklearn.metrics.classification_report(test_y, bst.predict(test_X) > .5))

              precision    recall  f1-score   support

           0       0.68      0.70      0.69    629982
           1       0.69      0.66      0.67    622564

    accuracy                           0.68   1252546
   macro avg       0.68      0.68      0.68   1252546
weighted avg       0.68      0.68      0.68   1252546



In [8]:
bst_preds = bst.predict(test_X)
#bst2_preds = bst2.predict(test_X)

In [9]:
np.save(expt_suffix + '_preds.npy', bst_preds)
# np.save(expt_suffix + '_preds2.npy', bst2_preds)



# gold symettry - 0.5824847835677649
# gold symettry no id - 0.5821129501048026

In [10]:
ll1 = sklearn.metrics.log_loss(test_y, bst_preds)
#ll2 = sklearn.metrics.log_loss(test_y, bst2_preds)

print(ll1)
#print(ll2)

0.5821129501048026


In [11]:
import math
sklearn.metrics.log_loss(test_y, np.ones_like(test_y) * .5) / math.log(2)

1.0000000000000013

In [12]:
no_diff_preds = np.load('no_team_diff__preds.npy')

In [13]:
sklearn.metrics.log_loss(test_y, no_diff_preds)

0.5830473591163567

In [14]:
def iterate_unseen_states():
    csv_path = 'validated_all_gameevent_partitioned/gameevents_0[8-9][0-9].csv'

    events = iterate_events_from_csv(csv_path)
    map_structure_infos = map_structure.MapStructureInfos()

    yield from iterate_game_events_with_state(events, map_structure_infos)

def compute_seq():
    count = 0

    for game_id, event, game_state, all_game_events in iterate_unseen_states():
        if game_state.get_team(Team.BLUE).eggs != 2:
            if (count % 100 == 0):
                print(count)
            count += 1
            if count > 1000:
                break
            new_gs = copy.deepcopy(game_state)
            
            new_gs.get_team(Team.BLUE).eggs += 1
            new_encoded = vectorize_game_state(new_gs, event)
            old_encoded = vectorize_game_state(game_state, event)
            preds = bst.predict([old_encoded, new_encoded])
            pred_diff = preds[1] - preds[0]
            yield(pred_diff)

deltas = list(compute_seq())

0
100
200
300
400
500
600
700
800
900
1000


In [15]:
sum(d >= 0 for d in deltas)

991