In [1]:
import copy
import lightgbm as lgb
import os
import numpy as np
import sklearn.metrics

from preprocess import *

In [2]:
expts_subdir = 'model_experiments'
expt_suffix = 'sort_workers_by_power_drop_90'
train_base = f'{expts_subdir}/{expt_suffix}/gameevents_0[0-7][0-9].csv'
test_base =  f'{expts_subdir}/{expt_suffix}/gameevents_0[8-9][0-9].csv'
model_filename = f'{expts_subdir}/{expt_suffix}/model.mdl'

In [3]:
def load_vectors(pattern):
    X = np.load(f'{pattern}_states.npy')
    y = np.load(f'{pattern}_labels.npy')
    return X, y

train_X, train_y = load_vectors(train_base)
test_X, test_y = load_vectors(test_base)

In [4]:
def train(t_y, num_leaves, num_trees, objective='binary'):
    train_data = lgb.Dataset(train_X, t_y)
    param = {'num_leaves': num_leaves, 'objective': objective, 'metric': 'binary_logloss', 'boosting': 'gbdt'}
    return lgb.train(param, train_data, num_trees)

In [5]:
num_leaves = 100
num_trees = 100
bst = train(train_y, num_leaves, num_trees)

[LightGBM] [Info] Number of positive: 394109, number of negative: 400478
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 439
[LightGBM] [Info] Number of data points in the train set: 794587, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.495992 -> initscore=-0.016031
[LightGBM] [Info] Start training from score -0.016031


In [6]:
bst.save_model(model_filename)

<lightgbm.basic.Booster at 0x7f8ca9b289d0>

In [7]:
print(model_filename)

model_experiments/sort_workers_by_power_drop_90/model.mdl


In [8]:
print(sklearn.metrics.classification_report(test_y, bst.predict(test_X) > .5))

              precision    recall  f1-score   support

           0       0.70      0.71      0.71    573947
           1       0.70      0.70      0.70    566930

    accuracy                           0.70   1140877
   macro avg       0.70      0.70      0.70   1140877
weighted avg       0.70      0.70      0.70   1140877



In [9]:
bst_preds = bst.predict(test_X)


In [15]:
def iterate_unseen_states():
    csv_path = 'validated_all_gameevent_partitioned/gameevents_0[8-9][0-9].csv'

    events = iterate_events_from_csv(csv_path)
    map_structure_infos = map_structure.MapStructureInfos()

    yield from iterate_game_events_with_state(events, map_structure_infos)

def compute_seq():
    count = 0

    for game_id, event, game_state, all_game_events in iterate_unseen_states():
        if game_state.get_team(Team.BLUE).eggs != 2:
            if (count % 100 == 0):
                print(count)
            count += 1
            if count > 2000:
                break
            new_gs = copy.deepcopy(game_state)
            
            new_gs.get_team(Team.BLUE).eggs += 1
            new_encoded = vectorize_game_state(new_gs, event)
            old_encoded = vectorize_game_state(game_state, event)
            preds = bst.predict([old_encoded, new_encoded])
            pred_diff = preds[1] - preds[0]
            yield(pred_diff)

deltas = list(compute_seq())

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000


In [16]:
inversions = sum(d < 0 for d in deltas)
ll1 = sklearn.metrics.log_loss(test_y, bst_preds )
acc = sklearn.metrics.accuracy_score(test_y, bst_preds > .5) 
egginv = float(inversions) / len(deltas)

print(f'`{expt_suffix:25}_l{num_leaves}_t{num_trees} ll:{ll1:,.3f} acc:{acc:,.3f} egginv:{egginv}`')

`sort_workers_by_power_drop_90_l100_t100 ll:0.556 acc:0.704 egginv:0.0055`


`fresh_baseline            ll:0.572 acc:0.696 egginv:0.042`

`no_food_dep               ll:0.572 acc:0.695 egginv:0.02`

`ema_halflife_2_seconds_50_20    ll:0.588 acc:0.690 egginv:0.021`

`ema_halflife_2_seconds_100_50    ll:0.565 acc:0.699 egginv:0.021`

`no_food_dep_100_50               ll:0.561 acc:0.702 egginv:0.005`

`sort_workers_by_power     ll:0.559 acc:0.703 egginv:0.012`

`sort_workers_by_power_more_data ll:0.559 acc:0.703 egginv:0.002`

`sort_workers_by_power_less_data ll:0.559 acc:0.703 egginv:0.013`

`sort_workers_by_power_full_data ll:0.559 acc:0.703 egginv:0.004`

`sort_workers_by_power_strided_20 ll:0.559 acc:0.702 egginv:0.014`

`sort_workers_by_power_strided_100 ll:0.562 acc:0.702 egginv:0.026`

`sort_workers_by_power_full_200_leaves_100_trees ll:0.555 acc:0.704 egginv:0.004`

`sort_workers_by_power_full_400_leaves_200_trees ll:0.558 acc:0.702 egginv:0.013`

`sort_workers_by_power_full_200_leaves_200_trees ll:0.556 acc:0.703 egginv:0.006`

`sort_workers_by_power_drop_90_200_leaves_100_trees ll:0.557 acc:0.703 egginv:0.001`

`sort_workers_by_power_drop_90_l100_t100 ll:0.556 acc:0.704 egginv:0.0055`
















