In [1]:
%load_ext autoreload
%autoreload 2

In [18]:
import os
from target_encoding import TargetEncoderClassifier, TargetEncoder
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from functools import reduce
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import cross_val_score
import json
from functools import partial

from dsb2019.data.validation import InstallationFold, cross_validate, quad_kappa
from dsb2019.visualization import session_browser
from dsb2019.data import DATA_DIR
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm
tqdm.pandas()
pd.options.display.max_rows=999

In [3]:
train = pd.read_csv(DATA_DIR / 'raw/train.csv')
test = pd.read_csv(DATA_DIR / 'raw/test.csv')
train_labels = pd.read_csv(DATA_DIR / 'raw/train_labels.csv')
submission = pd.read_csv(DATA_DIR / 'raw/sample_submission.csv')

In [22]:
games = ['Scrub-A-Dub', 'All Star Sorting', 'Mushroom Sorter (Assessment)',
       'Air Show', 'Crystals Rule', 'Bird Measurer (Assessment)',
       'Dino Drink', 'Bubble Bath', 'Dino Dive', 'Chow Time',
       'Cauldron Filler (Assessment)', 'Pan Balance', 'Happy Camel',
       'Cart Balancer (Assessment)', 'Chest Sorter (Assessment)',
       'Leaf Leader']


def unwrap_event_data(df):
    unwrapped=pd.DataFrame(data=list(df.event_data.apply(json.loads).values))
    return pd.concat([unwrapped.reset_index(),df.reset_index()],axis=1)


def process_installations(train_labels, train, process_log):
    result = []
    train=train.sort_values("timestamp")
    installations = train.groupby("installation_id")
    for i, game_session, title, installation_id, accuracy_group in tqdm(train_labels[["game_session", "title", "installation_id", "accuracy_group"]].itertuples(), 
                                                              total=len(train_labels)):
        player_log = installations.get_group(installation_id).reset_index()
        log_length = player_log[(player_log.game_session==game_session) & (player_log.title==title)].index[0]
        player_log = player_log.iloc[:(log_length + 1)]
        player_log["accuracy_group"] = accuracy_group
        player_log["target_game_session"] = game_session
        features = process_log(player_log)
        features["installation_id"] = installation_id
        features["accuracy_group"] = accuracy_group
        result.append(features)
    return pd.DataFrame(data=result)
 
    
def process_log(df):
    assessment_title=df.title.iloc[-1]    

    history = df.iloc[:-1]
    history = history[history.type.isin(["Game", "Assessment"])].copy()

    def calculate_ratios(df):
        n_correct=df.correct_move.sum()
        n_incorrect=df.wrong_move.sum()
        ratio=n_correct/(n_correct+n_incorrect)
        return n_correct, n_incorrect, ratio
    
    def make_move_stats(df, title,n_lags=2):
        df=df.copy()
        if len(df):
            df = unwrap_event_data(df)
        if "correct" in df.columns:
            df["correct_move"] = df.correct == True
            df["wrong_move"] = df.correct == False
        else:
            df["correct_move"]=False
            df["wrong_move"]=False
        result = []
        result.extend(zip([f"n_correct {title}", f"n_incorrect {title}", f"global_ratio {title}"], calculate_ratios(df)))
        if n_lags:
            last_sessions = df.game_session.unique()[-n_lags:]
            for i in range(n_lags):
                if i < len(last_sessions): 
                    result.extend(zip([f"n_correct {title} {i}", f"n_incorrect {title} {i}",f"ratio {title} {i}"], 
                                      calculate_ratios(df[df.game_session==last_sessions[i]])))
                else:
                    result.extend(zip([f"n_correct {title} {i}", f"n_incorrect {title} {i}",f"ratio {title} {i}"], [None, None, None]))
        return {k: v for k, v in result}
    result = {"title": games.index(assessment_title)}
    for game in games:
        stats=history[history.title==game]
        stats=make_move_stats(stats, game)
        result.update(stats)
    return result


#train_features = process_installations(train_labels, train)

In [21]:
train_features.to_csv(DATA_DIR / "interim/train_features_game_counts.csv")

In [10]:
train_features=train_features.fillna(-1)

In [12]:
train_features.columns

Index(['title', 'n_correct Scrub-A-Dub', 'n_incorrect Scrub-A-Dub',
       'global_ratio Scrub-A-Dub', 'n_correct Scrub-A-Dub 0',
       'n_incorrect Scrub-A-Dub 0', 'ratio Scrub-A-Dub 0',
       'n_correct Scrub-A-Dub 1', 'n_incorrect Scrub-A-Dub 1',
       'ratio Scrub-A-Dub 1',
       ...
       'n_incorrect Leaf Leader', 'global_ratio Leaf Leader',
       'n_correct Leaf Leader 0', 'n_incorrect Leaf Leader 0',
       'ratio Leaf Leader 0', 'n_correct Leaf Leader 1',
       'n_incorrect Leaf Leader 1', 'ratio Leaf Leader 1', 'installation_id',
       'accuracy_group'],
      dtype='object', length=147)

In [15]:
def train_baseline(x_train,y_train):
    x_train_all, x_val_all,y_train_all,y_val_all = train_test_split(
        x_train,y_train,
        test_size=0.15,
        random_state=2019,
    )
    train_set = lgb.Dataset(x_train_all, y_train_all)
    val_set = lgb.Dataset(x_val_all, y_val_all)

    params = {
        'learning_rate': 0.01,
        'bagging_fraction': 0.9,
        'feature_fraction': 0.9,
        'num_leaves': 14,
        'lambda_l1': 0.1,
        'lambda_l2': 1,
        'metric': 'multiclass',
        'objective': 'multiclass',
        'num_classes': 4,
        'random_state': 2019
    }

    return lgb.train(params, train_set, num_boost_round=10000, early_stopping_rounds=300, valid_sets=[train_set, val_set], verbose_eval=100)


def make_features_wrapper(train, test):
    def make_features(df):
        return df.drop(["installation_id", "accuracy_group"], axis=1), df.accuracy_group.values
    
    return make_features(train), make_features(test) 


def make_predictions(model,x_test_all,y_test):
    pred=model.predict(x_test_all).argmax(axis=1)
    return pred,y_test

In [16]:
predictions = cross_validate(train_features, train_features.accuracy_group, make_features_wrapper, train_baseline, make_predictions)

Training until validation scores don't improve for 300 rounds
[100]	training's multi_logloss: 1.07328	valid_1's multi_logloss: 1.07759
[200]	training's multi_logloss: 1.01715	valid_1's multi_logloss: 1.03323
[300]	training's multi_logloss: 0.984614	valid_1's multi_logloss: 1.01267
[400]	training's multi_logloss: 0.96036	valid_1's multi_logloss: 1.00071
[500]	training's multi_logloss: 0.941326	valid_1's multi_logloss: 0.992596
[600]	training's multi_logloss: 0.925133	valid_1's multi_logloss: 0.987378
[700]	training's multi_logloss: 0.911415	valid_1's multi_logloss: 0.983922
[800]	training's multi_logloss: 0.898807	valid_1's multi_logloss: 0.981824
[900]	training's multi_logloss: 0.887318	valid_1's multi_logloss: 0.980442
[1000]	training's multi_logloss: 0.876624	valid_1's multi_logloss: 0.979759
[1100]	training's multi_logloss: 0.866517	valid_1's multi_logloss: 0.979412
[1200]	training's multi_logloss: 0.856904	valid_1's multi_logloss: 0.979122
[1300]	training's multi_logloss: 0.847715	

In [20]:
np.mean([quad_kappa(true, pred) for pred, true in predictions]), [quad_kappa(true, pred) for pred, true in predictions]

(0.5157051296362365,
 [0.5472283470243167,
  0.4797202169826231,
  0.47854404188486,
  0.5041272227182371,
  0.5055032393246791,
  0.5232401248520073,
  0.5510630168932551,
  0.5215215852330887,
  0.5147618813899562,
  0.531341620059342])