In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from functools import reduce
from sklearn.metrics import cohen_kappa_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
import json
from functools import partial
from dsb2019.models.coeff import ThresholdClassifier

from dsb2019.models.tracking import track_experiment, track_submission_info
from dsb2019.data.validation import InstallationFold, cross_validate, quad_kappa
from dsb2019.visualization import session_browser
from dsb2019.data import DATA_DIR
from dsb2019.data import adv_validation
from dsb2019.models import MODELS_DIR
from dsb2019.models.nn import NN, make_nn_trainer
from dsb2019.models.lr_finder import LRFinder
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm
import hyperopt
from hyperopt import hp, fmin, Trials, tpe, STATUS_OK
from multiprocessing import cpu_count
from joblib import Parallel, delayed
from pathlib import Path
import os

tqdm.pandas()
pd.options.display.max_rows=999

In [3]:
(MODELS_DIR/ "nn_regressor.model").resolve()

PosixPath('/code/dsb2019/models/nn_regressor.model')

In [4]:
DATA_DIR = Path("/home/bfilippov/oss/dsb2019/dsb2019/data")
MODELS_DIR=Path("/home/bfilippov/oss/dsb2019/dsb2019/models")
train = pd.read_csv(DATA_DIR / 'raw/train.csv')
test = pd.read_csv(DATA_DIR / 'raw/test.csv')
train_labels = pd.read_csv(DATA_DIR / 'raw/train_labels.csv')
submission = pd.read_csv(DATA_DIR / 'raw/sample_submission.csv')

In [5]:
os.chdir("/home/bfilippov/oss/dsb2019/dsb2019/notebooks")

In [6]:
games = ['Scrub-A-Dub', 'All Star Sorting',
       'Air Show', 'Crystals Rule', 
       'Dino Drink', 'Bubble Bath', 'Dino Dive', 'Chow Time',
       'Pan Balance', 'Happy Camel',
       'Leaf Leader']
assessments = ['Mushroom Sorter (Assessment)',
       'Bird Measurer (Assessment)',
       'Cauldron Filler (Assessment)',
       'Cart Balancer (Assessment)', 'Chest Sorter (Assessment)']
worlds = ['NONE', 'MAGMAPEAK', 'TREETOPCITY', 'CRYSTALCAVES']

def unwrap_event_data(df):
    unwrapped=pd.DataFrame(data=list(df.event_data.apply(json.loads).values))
    unwrapped.drop("event_code", axis=1, inplace=True)
    return pd.concat([unwrapped.reset_index(),df.reset_index()],axis=1)


event_data_features = ["duration", "round", "level", "position", "total_duration", "weight", "misses"]


def extract_basic_stats(df):
    stats = ["min", "max", "mean", "std"]
    df = df[[f for f in event_data_features if f in df.columns]].reindex(columns=event_data_features)
    result = []
    for column, stats in df.agg(stats).to_dict().items():
        result.extend([(k+"_"+column, v) for k,v in stats.items()])
    return result


def extract_chow_time(df):
    cols = ["round", "event_id", "resources", "target_weight", "game_session"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="63f13dd7"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"optimum":None, "complete": 0})
        resources = data["resources"]
        target = data["target_weight"]
        optimum = 0
        cnt = 0
        for v in sorted(resources)[::-1]:
            if v+cnt>target:
                continue
            else:
                cnt+=v
                optimum+=1
                if cnt==target:
                    break
        n_turns = sum(df.event_id=="4ef8cdd3")
        complete = sum(df.event_id=="56817e2b")
        return pd.Series({"optimum":n_turns / optimum, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("chowtime_optimum", None), ("chowtime_complete", None)]
    return [("chowtime_optimum", feature["optimum"]), ("chowtime_complete", feature["complete"])]


def extract_leaf_leader(df):
    cols = ["round", "event_id", "target_weight", "game_session"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="f32856e4"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"optimum": None, "complete": 0})
        target = data["target_weight"]
        optimum = 0
        cnt = 0
        for v in [4, 4, 2, 2, 2, 2, 1, 1]:
            if v+cnt>target:
                continue
            else:
                cnt+=v
                optimum+=1
                if cnt==target:
                    break
        n_turns = sum(df.event_id=="262136f4")
        complete = sum(df.event_id=="b012cd7f")
        return pd.Series({"optimum": n_turns / optimum, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("leafleader_optimum", None), ("leafleader_complete", None)]
    return [("leafleader_optimum", feature["optimum"]), ("leafleader_complete", feature["complete"])]


def extract_happy_camel(df):
    cols = ["round", "event_id", "has_toy", "bowls", "game_session"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="c2baf0bd"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({'optimum':None, 'n_toy_detected':None, "complete":0})
    
        optimum = len(data["bowls"])
        turns = df[df.event_id=="6bf9e3e1"]
        n_turns = len(turns)
        n_toy_detected = turns["has_toy"].sum()
        complete = sum(df.event_id=="36fa3ebe")
        return pd.Series({'optimum':(n_turns / optimum), 'n_toy_detected':n_toy_detected, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("happycamel_optimum", None), ("happycamel_detections", None), 
                ("happycamel_complete", None)]

    return [("happycamel_optimum", feature["optimum"]), ("happycamel_detections", feature["n_toy_detected"]), 
            ("happycamel_complete", feature["complete"])]


def extract_pan_balance(df):
    cols = ["round", "event_id", "target_weight", "starting_weights", "game_session"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="a592d54e"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"optimum": None, "complete": 0})
        target = data["target_weight"]
        start = data["starting_weights"]
        optimum = max(abs(target - start), 1)
        n_turns = sum(df.event_id.isin(("e7561dd2", "804ee27f")))
        complete = sum(df.event_id=="1c178d24")
        return pd.Series({"optimum": n_turns / optimum, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("panbalance_optimum", None), ("panbalance_complete", None)] 
    return [("panbalance_optimum", feature["optimum"]), ("panbalance_complete", feature["complete"])]


def extract_scrubadub(df):
    cols = ["round", "event_id", "game_session", "correct", "event_code", "animals"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="26fd2d99"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"performance":None, "precision": None, "complete": 0})
        n_animals = len(data["animals"])
        complete = sum(df.event_id=="08fd73f3")
        df = df[(df.event_id=="5c3d2b2f")&(df.event_code==4020)]
        return pd.Series({"performance": len(df) / n_animals, "precision": df["correct"].sum()/n_animals, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("scrubadub_performance", None), ("scrubadub_precision", None), ("scrubadub_complete", None)]
    return [("scrubadub_performance", feature["performance"]), ("scrubadub_precision", feature["precision"]),
            ("scrubadub_complete", feature["complete"])]    


def extract_allstarsorting(df):
    cols = ["round", "event_id", "game_session", "correct", "event_code", "houses"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="2c4e6db0"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"performance":None, "precision": None, "complete": 0})
        n_animals = len(data["houses"])
        complete = sum(df.event_id=="ca11f653")
        df = df[df.event_id=="2dc29e21"]
        return pd.Series({"performance": len(df) / n_animals, "precision": df["correct"].sum()/n_animals, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("allstarsorting_performance", None), ("allstarsorting_precision", None), ("allstarsorting_complete", None)]
    return [("allstarsorting_performance", feature["performance"]), ("allstarsorting_precision", feature["precision"]),
            ("allstarsorting_complete", feature["complete"])]  


def extract_dinodrink(df):
    cols = ["round", "event_id", "game_session", "correct", "event_code", "holes"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="f806dc10"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"performance": None, "precision": None, "complete": 0})
        n_animals = len(data["holes"])
        complete = sum(df.event_id=="16dffff1")
        df = df[df.event_id=="74e5f8a7"]
        return pd.Series({"performance": len(df) / n_animals, "precision": df["correct"].sum()/n_animals, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("dinodrink_performance", None), ("dinodrink_precision", None), ("dinodrink_complete", None)]
    return [("dinodrink_performance", feature["performance"]), ("dinodrink_precision", feature["precision"]),
            ("dinodrink_complete", feature["complete"])]  


def extract_bubblebath(df):
    cols = ["round", "event_id", "game_session", "containers", "target_containers"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="1beb320a"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"performance": None, "complete": 0})
        target = data["target_containers"]
        complete = sum(df.event_id=="895865f3")
        df = df[df.event_id=="3bb91dda"]
        if len(df):
            return pd.Series({"performance": abs(target - df.iloc[0]["containers"]), "complete": complete})
        else:
            return pd.Series({"performance": None, "complete": 0})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if isinstance(feature, pd.Series):
        return [("bubblebath_performance", None), ("bubblebath_complete", None)]
    return [("bubblebath_performance", feature["performance"]), ("bubblebath_complete", feature["complete"])] 


def extract_dinodive(df):
    cols = ["round", "event_id", "game_session", "correct", "target_water_level"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        data = df[df.event_id=="7961e599"]
        if len(data):
            data=data.iloc[0]
        else:
            return pd.Series({"performance": None, "precision": None, "complete": 0})
        target = data["target_water_level"]
        dinos = [6, 6, 3, 3, 3, 3, 1, 1]
        opt=0
        n_animals=0
        for d in dinos:
            if opt+d>target:
                continue
            else:
                opt+=d
                n_animals+=1
                if opt==target:
                    break
        complete = sum(df.event_id=="00c73085")
        df = df[df.event_id=="c0415e5c"]
        return pd.Series({"performance": len(df) / n_animals, "precision": df["correct"].sum()/n_animals, "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("dinodive_performance", None), ("dinodive_precision", None), ("dinodive_complete", None)]
    return [("dinodive_performance", feature["performance"]), ("dinodive_precision", feature["precision"]),
            ("dinodive_complete", feature["complete"])]


def extract_airshow(df):
    cols = ["round", "event_id", "game_session", "correct"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        complete =sum(df.event_id=="f5b8c21a")
        df = df[df.event_id=="28f975ea"]
        return pd.Series({"performance": len(df), "precision": df["correct"].sum(), "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("airshow_performance", None), ("airshow_precision", None), ("airshow_complete", None)]
    return [("airshow_performance", feature["performance"]), ("airshow_precision", feature["precision"]),
            ("airshow_complete", feature["complete"])]      


def extract_crystalsrule(df):
    cols = ["round", "event_id", "game_session", "correct"]
    df = df[[c for c in cols if c in df.columns]]
    df = df.reindex(columns=cols)
    df = df[df["round"]>0]
    
    def calculate_features(df):
        complete = sum(df.event_id=="3323d7e9")
        df = df[df.event_id=="86c924c4"]
        return pd.Series({"performance": len(df), "precision": df["correct"].sum(), "complete": complete})
    feature =  df.groupby(["game_session", "round"]).apply(calculate_features).mean()
    if len(feature)==0:
        return [("crystalsrule_performance", None), ("crystalsrule_precision", None), ("crystalsrule_complete", None)]
    return [("crystalsrule_performance", feature["performance"]), ("crystalsrule_precision", feature["precision"]),
            ("crystalsrule_complete", feature["complete"])]          


game_funcs={
    "Chow Time": extract_chow_time,
    "Leaf Leader": extract_leaf_leader,
    "Happy Camel": extract_happy_camel,
    "Pan Balance": extract_pan_balance,
    "Scrub-A-Dub": extract_scrubadub,
    "All Star Sorting": extract_allstarsorting,
    "Dino Drink": extract_dinodrink,
    "Bubble Bath": extract_bubblebath,
    "Dino Dive": extract_dinodive,
    "Air Show": extract_airshow,
    "Crystals Rule": extract_crystalsrule,
}


def extract_game_stats(df, title):
    return game_funcs[title](df)


def make_counters(df, column):
    return {k: v for k, v in df.groupby(column)[column].count().to_dict().items()}

    
def process_log(df):
    assessment_title=df.title.iloc[-1]   
    world=df.world.iloc[-1]

    history = df.iloc[:-1]
    history = history[history.type.isin(["Game", "Assessment"])].copy()

    def calculate_ratios(df):
        n_correct=df.correct_move.sum()
        n_incorrect=df.wrong_move.sum()
        ratio=n_correct/(n_correct+n_incorrect)
        return n_correct, n_incorrect, ratio
    
    def make_move_stats(df, title,n_lags=2):
        df=df.copy()
        if len(df):
            df = unwrap_event_data(df)
        if "correct" in df.columns:
            df["correct_move"] = df.correct == True
            df["wrong_move"] = df.correct == False
        else:
            df["correct_move"]=False
            df["wrong_move"]=False
        result = []
        result.extend(zip([f"n_correct_{title}", f"n_incorrect_{title}", f"global_ratio_{title}"], calculate_ratios(df)))
        result.extend(extract_game_stats(df, title))
        #result.extend(extract_basic_stats(df))
        if n_lags:
            last_sessions = df.game_session.unique()[-n_lags:]
            for i in range(n_lags):
                if i < len(last_sessions): 
                    result.extend(zip([f"n_correct_{title}_{i}", f"n_incorrect_{title} {i}",f"ratio_{title}_{i}"], 
                                      calculate_ratios(df[df.game_session==last_sessions[i]])))
                else:
                    result.extend(zip([f"n_correct_{title}_{i}", f"n_incorrect_{title}_{i}",f"ratio_{title}_{i}"], [None, None, None]))
        return {k: v for k, v in result}
    
    
    result = {"title": assessments.index(assessment_title),
              "world": worlds.index(world),
              "n_activities": df[df.type=="Activity"].game_session.nunique(),
              "n_games": df[df.type=="Game"].game_session.nunique(),
              "event_code_count": df.event_code.nunique(),
              "event_id_count": df.event_id.nunique(),
              "title_count": df.title.nunique(),
              "n_actions": len(df),
              "world_title_count": df[df.world==world].title.nunique(),
             }
    
    def make_game_features(game):
        result = {}
        stats=history[history.title==game]
        stats_features=make_move_stats(stats, game)
        stats_features[f"{game}_event_code_count"] = stats.event_code.nunique()
        stats_features[f"{game}_event_id_count"] = stats.event_id.nunique()
        stats_features[f"{game}_session_id_count"] = stats.game_session.nunique()
        stats_features[f"{game}_n_actions"] = len(stats)
        result.update(stats_features)
        result.update({f"{game}_{k}": v for k, v in make_counters(stats, "event_id").items()})
        result.update({f"{game}_{k}": v for k, v in make_counters(stats, "event_code").items()})
        return result
    
    #for f in Parallel(n_jobs=cpu_count())(delayed(make_game_features)(game) for game in games):
    #    result.update(f)
    for game in games:
        result.update(make_game_features(game))
    world_games = history[history.world==world]
    
    def make_world_features(game):
        result = {}
        stats=world_games[world_games.title==game]
        stats_features=make_move_stats(stats, game)
        stats_features = {f"world_{k}": v for k, v in stats_features.items()}
        stats_features[f"world_{game}_event_code_count"] = stats.event_code.nunique()
        stats_features[f"world_{game}_event_id_count"] = stats.event_id.nunique()
        stats_features[f"world_{game}_session_id_count"] = stats.game_session.nunique()
        stats_features[f"world_{game}_n_actions"] = len(stats)
        result.update(stats_features)
        result.update({f"world_{game}_{k}": v for k, v in make_counters(stats, "event_id").items()})
        result.update({f"world_{game}_{k}": v for k, v in make_counters(stats, "event_code").items()})
        return result
    
    #for f in Parallel(n_jobs=cpu_count())(delayed(make_world_features)(game) for game in games):
        result.update(f)
    for game in games:
        result.update(make_world_features(game))
    make_history_counters = partial(make_counters, history)
    result.update(make_counters(history, "event_id"))
    result.update(make_counters(history, "event_code"))
    return result

In [7]:
def process_installations(train_labels, train, process_log):
    result = []
    train=train.sort_values("timestamp")
    installations = train.groupby("installation_id")
    for i, game_session, title, installation_id, accuracy_group in tqdm(train_labels[["game_session", "title", "installation_id", "accuracy_group"]].itertuples(), 
                                                              total=len(train_labels), position=0):
        player_log = installations.get_group(installation_id).reset_index()
        player_log = player_log.sort_values("timestamp")
        log_length = player_log[(player_log.game_session==game_session) & (player_log.title==title)].index[0]
        original_log = player_log
        player_log = player_log.iloc[:(log_length + 1)]
        #player_log["target_game_session"] = game_session
        features = process_log(player_log)
        features["installation_id"] = installation_id
        features["accuracy_group"] = accuracy_group
        result.append(features)
    return pd.DataFrame(data=result).fillna(-1)

In [8]:
pd.set_option('mode.chained_assignment', 'raise')
#train_features = process_installations(train_labels, train, process_log)
train_features = pd.read_csv(DATA_DIR/"processed/train_features_final.csv")

In [9]:
bad_features = ["session_id_count", "event_id_count", "mean_round", "n_actions", "n_activities"]

In [10]:
def get_duplicate_features(features, bad_features):
    to_remove = set([])
    counter = 0
    feature_names=[f for f in features.columns if f not in ("installation_id", "game_session", "accuracy_group")]
    for feat_a in tqdm(feature_names):
        for feat_b in feature_names:
            if feat_a != feat_b and feat_a not in to_remove and feat_b not in to_remove:
                c = np.corrcoef(features[feat_a], features[feat_b])[0][1]
                if c > 0.995:
                    counter += 1
                    to_remove.add(feat_b)
                    if feat_b in bad_features or feat_a in bad_features:
                        to_remove.add(feat_a)
                    #print('{}: FEAT_A: {} FEAT_B: {} - Correlation: {}'.format(counter, feat_a, feat_b, c))
    for bf in bad_features:
        to_remove.add(bf)
    print(f"{len(to_remove)} features were removed ({round(len(to_remove)/len(feature_names)*100, 2)}% of all features)")
    return list(to_remove)
    
# duplicate_features = get_duplicate_features(train_features, bad_features)

# useful_features = [f for f in train_features.columns if f not in duplicate_features]
# useful_features
#duplicate_features = get_duplicate_features(train_features, bad_features)

#useful_features = [f for f in train_features.columns if f not in duplicate_features]
#useful_features
useful_features=['title', 'world', 'n_games', 'event_code_count', 'title_count', 'world_title_count', 'n_correct_Scrub-A-Dub', 'n_incorrect_Scrub-A-Dub', 'global_ratio_Scrub-A-Dub', 'scrubadub_performance', 'scrubadub_complete', 'n_correct_Scrub-A-Dub_0', 'n_incorrect_Scrub-A-Dub 0', 'ratio_Scrub-A-Dub_0', 'n_correct_Scrub-A-Dub_1', 'n_incorrect_Scrub-A-Dub_1', 'ratio_Scrub-A-Dub_1', 'Scrub-A-Dub_session_id_count', 'Scrub-A-Dub_n_actions', 'Scrub-A-Dub_4a09ace1', 'Scrub-A-Dub_5a848010', 'Scrub-A-Dub_5c3d2b2f', 'Scrub-A-Dub_6d90d394', 'Scrub-A-Dub_7040c096', 'Scrub-A-Dub_c1cac9a2', 'Scrub-A-Dub_cf82af56', 'Scrub-A-Dub_f71c4741', 'n_correct_All Star Sorting', 'n_incorrect_All Star Sorting', 'global_ratio_All Star Sorting', 'allstarsorting_performance', 'allstarsorting_precision', 'n_correct_All Star Sorting_0', 'n_incorrect_All Star Sorting 0', 'ratio_All Star Sorting_0', 'n_correct_All Star Sorting_1', 'n_incorrect_All Star Sorting 1', 'ratio_All Star Sorting_1', 'All Star Sorting_event_code_count', 'All Star Sorting_session_id_count', 'All Star Sorting_n_actions', 'All Star Sorting_1cc7cfca', 'All Star Sorting_2c4e6db0', 'All Star Sorting_2dc29e21', 'All Star Sorting_363d3849', 'All Star Sorting_4b5efe37', 'All Star Sorting_587b5989', 'All Star Sorting_6043a2b4', 'All Star Sorting_b120f2ac', 'All Star Sorting_d02b7a8e', 'n_correct_Air Show', 'n_incorrect_Air Show', 'global_ratio_Air Show', 'airshow_performance', 'airshow_precision', 'airshow_complete', 'n_correct_Air Show_0', 'n_incorrect_Air Show 0', 'ratio_Air Show_0', 'n_correct_Air Show_1', 'n_incorrect_Air Show_1', 'ratio_Air Show_1', 'Air Show_event_code_count', 'Air Show_session_id_count', 'Air Show_n_actions', 'Air Show_06372577', 'Air Show_14de4c5d', 'Air Show_1575e76c', 'Air Show_15ba1109', 'Air Show_28f975ea', 'Air Show_58a0de5c', 'Air Show_65abac75', 'Air Show_7423acbc', 'Air Show_a1bbe385', 'Air Show_bcceccc6', 'Air Show_d88ca108', 'Air Show_dcb55a27', 'n_correct_Crystals Rule', 'n_incorrect_Crystals Rule', 'global_ratio_Crystals Rule', 'crystalsrule_performance', 'crystalsrule_precision', 'n_correct_Crystals Rule_0', 'n_incorrect_Crystals Rule 0', 'ratio_Crystals Rule_0', 'n_correct_Crystals Rule_1', 'n_incorrect_Crystals Rule_1', 'ratio_Crystals Rule_1', 'Crystals Rule_event_code_count', 'Crystals Rule_session_id_count', 'Crystals Rule_n_actions', 'Crystals Rule_44cb4907', 'Crystals Rule_48349b14', 'Crystals Rule_5e3ea25a', 'Crystals Rule_86c924c4', 'Crystals Rule_cc5087a3', 'n_correct_Dino Drink', 'n_incorrect_Dino Drink', 'global_ratio_Dino Drink', 'dinodrink_performance', 'dinodrink_precision', 'dinodrink_complete', 'n_correct_Dino Drink_0', 'n_incorrect_Dino Drink_0', 'ratio_Dino Drink_0', 'n_correct_Dino Drink_1', 'n_incorrect_Dino Drink_1', 'ratio_Dino Drink_1', 'Dino Drink_event_code_count', 'Dino Drink_session_id_count', 'Dino Drink_n_actions', 'n_correct_Bubble Bath', 'n_incorrect_Bubble Bath', 'global_ratio_Bubble Bath', 'bubblebath_performance', 'bubblebath_complete', 'n_correct_Bubble Bath_0', 'n_incorrect_Bubble Bath_0', 'ratio_Bubble Bath_0', 'n_correct_Bubble Bath_1', 'n_incorrect_Bubble Bath_1', 'ratio_Bubble Bath_1', 'Bubble Bath_event_code_count', 'Bubble Bath_session_id_count', 'Bubble Bath_n_actions', 'n_correct_Dino Dive', 'n_incorrect_Dino Dive', 'global_ratio_Dino Dive', 'dinodive_performance', 'dinodive_precision', 'dinodive_complete', 'n_correct_Dino Dive_0', 'n_incorrect_Dino Dive_0', 'ratio_Dino Dive_0', 'n_correct_Dino Dive_1', 'n_incorrect_Dino Dive_1', 'ratio_Dino Dive_1', 'Dino Dive_event_code_count', 'Dino Dive_session_id_count', 'Dino Dive_n_actions', 'n_correct_Chow Time', 'n_incorrect_Chow Time', 'global_ratio_Chow Time', 'chowtime_optimum', 'chowtime_complete', 'n_correct_Chow Time_0', 'n_incorrect_Chow Time_0', 'ratio_Chow Time_0', 'n_correct_Chow Time_1', 'n_incorrect_Chow Time_1', 'ratio_Chow Time_1', 'Chow Time_event_code_count', 'Chow Time_session_id_count', 'Chow Time_n_actions', 'n_correct_Pan Balance', 'n_incorrect_Pan Balance', 'global_ratio_Pan Balance', 'panbalance_optimum', 'panbalance_complete', 'n_correct_Pan Balance_0', 'n_incorrect_Pan Balance_0', 'ratio_Pan Balance_0', 'n_correct_Pan Balance_1', 'n_incorrect_Pan Balance_1', 'ratio_Pan Balance_1', 'Pan Balance_event_code_count', 'Pan Balance_session_id_count', 'Pan Balance_n_actions', 'n_correct_Happy Camel', 'n_incorrect_Happy Camel', 'global_ratio_Happy Camel', 'happycamel_optimum', 'happycamel_detections', 'happycamel_complete', 'n_correct_Happy Camel_0', 'n_incorrect_Happy Camel_0', 'ratio_Happy Camel_0', 'n_correct_Happy Camel_1', 'n_incorrect_Happy Camel_1', 'ratio_Happy Camel_1', 'Happy Camel_event_code_count', 'Happy Camel_session_id_count', 'Happy Camel_n_actions', 'n_correct_Leaf Leader', 'n_incorrect_Leaf Leader', 'global_ratio_Leaf Leader', 'leafleader_optimum', 'leafleader_complete', 'n_correct_Leaf Leader_0', 'n_incorrect_Leaf Leader_0', 'ratio_Leaf Leader_0', 'n_correct_Leaf Leader_1', 'n_incorrect_Leaf Leader_1', 'ratio_Leaf Leader_1', 'Leaf Leader_event_code_count', 'Leaf Leader_session_id_count', 'Leaf Leader_n_actions', 'world_n_correct_Scrub-A-Dub', 'world_n_incorrect_Scrub-A-Dub', 'world_global_ratio_Scrub-A-Dub', 'world_scrubadub_performance', 'world_n_correct_Scrub-A-Dub_0', 'world_n_incorrect_Scrub-A-Dub_0', 'world_ratio_Scrub-A-Dub_0', 'world_n_correct_Scrub-A-Dub_1', 'world_n_incorrect_Scrub-A-Dub_1', 'world_ratio_Scrub-A-Dub_1', 'world_Scrub-A-Dub_event_code_count', 'world_Scrub-A-Dub_session_id_count', 'world_Scrub-A-Dub_n_actions', 'world_n_correct_All Star Sorting', 'world_n_incorrect_All Star Sorting', 'world_global_ratio_All Star Sorting', 'world_allstarsorting_performance', 'world_allstarsorting_precision', 'world_n_correct_All Star Sorting_0', 'world_n_incorrect_All Star Sorting 0', 'world_ratio_All Star Sorting_0', 'world_n_correct_All Star Sorting_1', 'world_n_incorrect_All Star Sorting 1', 'world_ratio_All Star Sorting_1', 'world_All Star Sorting_event_code_count', 'world_All Star Sorting_session_id_count', 'world_All Star Sorting_n_actions', 'world_All Star Sorting_1cc7cfca', 'world_All Star Sorting_2c4e6db0', 'world_All Star Sorting_2dc29e21', 'world_All Star Sorting_363d3849', 'world_All Star Sorting_4b5efe37', 'world_All Star Sorting_587b5989', 'world_All Star Sorting_6043a2b4', 'world_All Star Sorting_b120f2ac', 'world_All Star Sorting_d02b7a8e', 'world_n_correct_Air Show', 'world_n_incorrect_Air Show', 'world_global_ratio_Air Show', 'world_airshow_performance', 'world_airshow_precision', 'world_airshow_complete', 'world_n_correct_Air Show_0', 'world_n_incorrect_Air Show 0', 'world_ratio_Air Show_0', 'world_n_correct_Air Show_1', 'world_n_incorrect_Air Show_1', 'world_ratio_Air Show_1', 'world_Air Show_event_code_count', 'world_Air Show_session_id_count', 'world_Air Show_n_actions', 'world_Air Show_06372577', 'world_Air Show_14de4c5d', 'world_Air Show_1575e76c', 'world_Air Show_15ba1109', 'world_Air Show_28f975ea', 'world_Air Show_58a0de5c', 'world_Air Show_65abac75', 'world_Air Show_7423acbc', 'world_Air Show_a1bbe385', 'world_Air Show_bcceccc6', 'world_Air Show_d88ca108', 'world_Air Show_dcb55a27', 'world_n_correct_Crystals Rule', 'world_n_incorrect_Crystals Rule', 'world_global_ratio_Crystals Rule', 'world_crystalsrule_performance', 'world_crystalsrule_precision', 'world_n_correct_Crystals Rule_0', 'world_n_incorrect_Crystals Rule 0', 'world_ratio_Crystals Rule_0', 'world_n_correct_Crystals Rule_1', 'world_n_incorrect_Crystals Rule_1', 'world_ratio_Crystals Rule_1', 'world_Crystals Rule_event_code_count', 'world_Crystals Rule_session_id_count', 'world_Crystals Rule_n_actions', 'world_Crystals Rule_44cb4907', 'world_Crystals Rule_48349b14', 'world_Crystals Rule_5e3ea25a', 'world_Crystals Rule_86c924c4', 'world_Crystals Rule_cc5087a3', 'world_n_correct_Dino Drink', 'world_n_incorrect_Dino Drink', 'world_global_ratio_Dino Drink', 'world_dinodrink_performance', 'world_dinodrink_precision', 'world_n_correct_Dino Drink_0', 'world_n_incorrect_Dino Drink_0', 'world_ratio_Dino Drink_0', 'world_n_correct_Dino Drink_1', 'world_n_incorrect_Dino Drink_1', 'world_ratio_Dino Drink_1', 'world_Dino Drink_event_code_count', 'world_Dino Drink_session_id_count', 'world_Dino Drink_n_actions', 'world_n_correct_Bubble Bath', 'world_n_incorrect_Bubble Bath', 'world_global_ratio_Bubble Bath', 'world_bubblebath_performance', 'world_bubblebath_complete', 'world_n_correct_Bubble Bath_0', 'world_n_incorrect_Bubble Bath_0', 'world_ratio_Bubble Bath_0', 'world_n_correct_Bubble Bath_1', 'world_n_incorrect_Bubble Bath_1', 'world_ratio_Bubble Bath_1', 'world_Bubble Bath_event_code_count', 'world_Bubble Bath_session_id_count', 'world_Bubble Bath_n_actions', 'world_n_correct_Dino Dive', 'world_n_incorrect_Dino Dive', 'world_global_ratio_Dino Dive', 'world_dinodive_performance', 'world_dinodive_precision', 'world_dinodive_complete', 'world_n_correct_Dino Dive_0', 'world_n_incorrect_Dino Dive_0', 'world_ratio_Dino Dive_0', 'world_n_correct_Dino Dive_1', 'world_n_incorrect_Dino Dive_1', 'world_ratio_Dino Dive_1', 'world_Dino Dive_event_code_count', 'world_Dino Dive_session_id_count', 'world_Dino Dive_n_actions', 'world_n_correct_Chow Time', 'world_n_incorrect_Chow Time', 'world_global_ratio_Chow Time', 'world_chowtime_optimum', 'world_chowtime_complete', 'world_n_correct_Chow Time_0', 'world_n_incorrect_Chow Time_0', 'world_ratio_Chow Time_0', 'world_n_correct_Chow Time_1', 'world_n_incorrect_Chow Time_1', 'world_ratio_Chow Time_1', 'world_Chow Time_event_code_count', 'world_Chow Time_session_id_count', 'world_Chow Time_n_actions', 'world_n_correct_Pan Balance', 'world_n_incorrect_Pan Balance', 'world_global_ratio_Pan Balance', 'world_panbalance_optimum', 'world_panbalance_complete', 'world_n_correct_Pan Balance_0', 'world_n_incorrect_Pan Balance_0', 'world_ratio_Pan Balance_0', 'world_n_correct_Pan Balance_1', 'world_n_incorrect_Pan Balance_1', 'world_ratio_Pan Balance_1', 'world_Pan Balance_event_code_count', 'world_Pan Balance_session_id_count', 'world_Pan Balance_n_actions', 'world_n_correct_Happy Camel', 'world_n_incorrect_Happy Camel', 'world_global_ratio_Happy Camel', 'world_happycamel_optimum', 'world_happycamel_detections', 'world_happycamel_complete', 'world_n_correct_Happy Camel_0', 'world_n_incorrect_Happy Camel_0', 'world_ratio_Happy Camel_0', 'world_n_correct_Happy Camel_1', 'world_n_incorrect_Happy Camel_1', 'world_ratio_Happy Camel_1', 'world_Happy Camel_event_code_count', 'world_Happy Camel_session_id_count', 'world_Happy Camel_n_actions', 'world_n_correct_Leaf Leader', 'world_n_incorrect_Leaf Leader', 'world_global_ratio_Leaf Leader', 'world_leafleader_optimum', 'world_leafleader_complete', 'world_n_correct_Leaf Leader_0', 'world_n_incorrect_Leaf Leader_0', 'world_ratio_Leaf Leader_0', 'world_n_correct_Leaf Leader_1', 'world_n_incorrect_Leaf Leader_1', 'world_ratio_Leaf Leader_1', 'world_Leaf Leader_event_code_count', 'world_Leaf Leader_session_id_count', 'world_Leaf Leader_n_actions', '1375ccb7', '17113b36', '25fa8af4', '28ed704e', '3bfd1a65', '4a4c3d21', '51102b85', '5f0eb72c', '6c930e6e', '7da34a02', 'a16a373e', 'a1e4395d', 'a5be6304', 'c7128948', 'ec138c1c', 'f56e0afc', 'fbaf3456', 2000, 2010, 2020, 2025, 2035, 2060, 2070, 2080, 2081, 2083, 3010, 3020, 3021, 4010, 4020, 4025, 4030, 4035, 4040, 4070, 4090, 4100, 4110, 'installation_id', 'accuracy_group', 'n_incorrect_Air Show_0', 'n_incorrect_Crystals Rule_0', 'world_n_incorrect_Air Show_0', 'world_n_incorrect_Crystals Rule_0', 'n_incorrect_Scrub-A-Dub 1', 'n_incorrect_Dino Drink 0', 'Dino Drink_1996c610', 'Dino Drink_4d6737eb', 'Dino Drink_51311d7a', 'Dino Drink_5be391b5', 'Dino Drink_6c517a88', 'Dino Drink_74e5f8a7', 'Dino Drink_792530f8', 'Dino Drink_7f0836bf', 'Dino Drink_c6971acf', 'Dino Drink_f806dc10', 'n_incorrect_Bubble Bath 0', 'Bubble Bath_0413e89d', 'Bubble Bath_1340b8d7', 'Bubble Bath_1beb320a', 'Bubble Bath_1cf54632', 'Bubble Bath_3bb91dda', 'Bubble Bath_55115cbd', 'Bubble Bath_5859dfb6', 'Bubble Bath_857f21c0', 'Bubble Bath_8d84fa81', 'Bubble Bath_99abe2bb', 'Bubble Bath_99ea62f3', 'Bubble Bath_a0faea5d', 'Bubble Bath_ecc36b7f', 4045, 4095, 'n_incorrect_Air Show 1', 'Air Show_d2659ab4', 'n_incorrect_Crystals Rule 1', 'world_n_incorrect_Air Show 1', 'world_Air Show_d2659ab4', 'world_n_incorrect_Crystals Rule 1', '160654fd', 2075, 'n_incorrect_All Star Sorting_0', 'n_incorrect_All Star Sorting_1', 'Bubble Bath_85de926c', 'n_incorrect_Dino Dive 0', 'Dino Dive_00c73085', 'Dino Dive_28a4eb9a', 'Dino Dive_29bdd9ba', 'Dino Dive_6088b756', 'Dino Dive_709b1251', 'Dino Dive_76babcde', 'Dino Dive_7d5c30a2', 'Dino Dive_832735e1', 'Dino Dive_87d743c1', 'Dino Dive_c0415e5c', 'n_incorrect_Chow Time 0', 'Chow Time_0330ab6a', 'Chow Time_0d1da71f', 'Chow Time_47026d5f', 'Chow Time_4ef8cdd3', 'Chow Time_63f13dd7', 'Chow Time_7372e1a5', 'Chow Time_7ec0c298', 'Chow Time_cfbd47c8', 'Chow Time_d185d3ea', 'Chow Time_f93fc684', 'world_n_incorrect_Scrub-A-Dub 0', 'world_Scrub-A-Dub_2b9272f4', 'world_Scrub-A-Dub_4a09ace1', 'world_Scrub-A-Dub_5a848010', 'world_Scrub-A-Dub_5c3d2b2f', 'world_Scrub-A-Dub_6d90d394', 'world_Scrub-A-Dub_7040c096', 'world_Scrub-A-Dub_ac92046e', 'world_Scrub-A-Dub_c1cac9a2', 'world_Scrub-A-Dub_cf82af56', 'world_Scrub-A-Dub_f71c4741', 'world_n_incorrect_All Star Sorting_0', 'world_n_incorrect_All Star Sorting_1', 'world_n_incorrect_Bubble Bath 0', 'world_Bubble Bath_0413e89d', 'world_Bubble Bath_1340b8d7', 'world_Bubble Bath_1beb320a', 'world_Bubble Bath_1cf54632', 'world_Bubble Bath_3bb91dda', 'world_Bubble Bath_55115cbd', 'world_Bubble Bath_857f21c0', 'world_Bubble Bath_85de926c', 'world_Bubble Bath_8d84fa81', 'world_Bubble Bath_99abe2bb', 'world_Bubble Bath_99ea62f3', 'world_Bubble Bath_a0faea5d', 'world_Bubble Bath_ecc36b7f', 'world_n_incorrect_Dino Dive 0', 'world_Dino Dive_00c73085', 'world_Dino Dive_28a4eb9a', 'world_Dino Dive_29bdd9ba', 'world_Dino Dive_6088b756', 'world_Dino Dive_709b1251', 'world_Dino Dive_76babcde', 'world_Dino Dive_7d5c30a2', 'world_Dino Dive_832735e1', 'world_Dino Dive_87d743c1', 'world_Dino Dive_c0415e5c', 'n_incorrect_Pan Balance 0', 'Pan Balance_0086365d', 'Pan Balance_6cf7d25c', 'Pan Balance_9c5ef70c', 'Pan Balance_a592d54e', '28520915', '2dcad279', '30614231', '392e14df', '3ee399c3', '532a2afb', '90d848e0', 'n_incorrect_Scrub-A-Dub_0', '0d18d96c', 'Pan Balance_2a444e03', 'Pan Balance_804ee27f', 'Pan Balance_907a054b', 'Pan Balance_a5e9da97', 'Pan Balance_bc8f2793', 'Pan Balance_e7561dd2', 'Pan Balance_f3cd5473', 'n_incorrect_Happy Camel 0', 'Happy Camel_1af8be29', 'Happy Camel_3bb91ced', 'Happy Camel_3d8c61b0', 'Happy Camel_69fdac0a', 'Happy Camel_6bf9e3e1', 'Happy Camel_8af75982', 'Happy Camel_a7640a16', 'Happy Camel_a8a78786', 'Happy Camel_abc5811c', 'Happy Camel_c2baf0bd', 'Happy Camel_d51b1749', 'Happy Camel_d9c005dd', 'world_n_incorrect_Chow Time 0', 'world_Chow Time_0330ab6a', 'world_Chow Time_0d1da71f', 'world_Chow Time_4ef8cdd3', 'world_Chow Time_63f13dd7', 'world_Chow Time_7372e1a5', 'world_Chow Time_7d093bf9', 'world_Chow Time_7ec0c298', 'world_Chow Time_cfbd47c8', 'world_Chow Time_d185d3ea', 'world_Chow Time_f93fc684', 'world_n_incorrect_Pan Balance 0', 'world_Pan Balance_0086365d', 'world_Pan Balance_15f99afc', 'world_Pan Balance_2a444e03', 'world_Pan Balance_907a054b', 'world_Pan Balance_9c5ef70c', 'world_Pan Balance_a592d54e', 'world_Pan Balance_a5e9da97', 'world_Pan Balance_bc8f2793', 'world_Pan Balance_e7561dd2', 'world_Pan Balance_f3cd5473', 'world_n_incorrect_Happy Camel 0', 'world_Happy Camel_1af8be29', 'world_Happy Camel_3bb91ced', 'world_Happy Camel_3d8c61b0', 'world_Happy Camel_69fdac0a', 'world_Happy Camel_6bf9e3e1', 'world_Happy Camel_8af75982', 'world_Happy Camel_a7640a16', 'world_Happy Camel_a8a78786', 'world_Happy Camel_abc5811c', 'world_Happy Camel_c2baf0bd', 'world_Happy Camel_d51b1749', 'world_Happy Camel_d9c005dd', '5c2f29ca', '5e109ec3', '65a38bf7', '795e4a37', '828e68f9', 'a8876db3', 'b2e5b0f1', 'd122731b', 'All Star Sorting_b1d5101d', 'Crystals Rule_a1192f43', 'world_All Star Sorting_b1d5101d', 'world_Crystals Rule_a1192f43', 'Air Show_6f4bd64e', 'n_incorrect_Chow Time 1', 'Chow Time_9e6b7fb5', 'Happy Camel_37db1c2f', 'Happy Camel_c189aaf2', 'n_incorrect_Leaf Leader 0', 'Leaf Leader_262136f4', 'Leaf Leader_29f54413', 'Leaf Leader_2a512369', 'Leaf Leader_3afde5dd', 'Leaf Leader_67aa2ada', 'Leaf Leader_763fc34e', 'Leaf Leader_7dfe6d8a', 'Leaf Leader_86ba578b', 'Leaf Leader_8ac7cce4', 'Leaf Leader_f32856e4', 'Leaf Leader_fd20ea40', 'world_n_incorrect_Chow Time 1', 'world_Chow Time_47026d5f', 'world_Chow Time_9e6b7fb5', 'world_Happy Camel_37db1c2f', 'world_Happy Camel_c189aaf2', 'world_n_incorrect_Leaf Leader 0', 'world_Leaf Leader_262136f4', 'world_Leaf Leader_29f54413', 'world_Leaf Leader_2a512369', 'world_Leaf Leader_3afde5dd', 'world_Leaf Leader_67aa2ada', 'world_Leaf Leader_763fc34e', 'world_Leaf Leader_7dfe6d8a', 'world_Leaf Leader_86ba578b', 'world_Leaf Leader_8ac7cce4', 'world_Leaf Leader_f32856e4', 'world_Leaf Leader_fd20ea40', 'Pan Balance_e080a381', '0db6d71d', '155f62a4', '3ccd3f02', '3d0b9317', '562cec5f', '93b353f2', 'a8efe47b', 'bd612267', 'df4fe8b6', 'world_Pan Balance_e080a381', '070a5291', '3393b68b', '45d01abe', '8fee50e2', 'f6947f54', '2b058fe3', '91561152', '9ce586dd', '9d4e7b25', 'acf5c23f', 'cb1178ad', 'world_n_incorrect_Dino Drink 0', 'world_Dino Drink_1996c610', 'world_Dino Drink_4d6737eb', 'world_Dino Drink_51311d7a', 'world_Dino Drink_5be391b5', 'world_Dino Drink_6c517a88', 'world_Dino Drink_74e5f8a7', 'world_Dino Drink_792530f8', 'world_Dino Drink_7f0836bf', 'world_Dino Drink_c6971acf', 'world_Dino Drink_f806dc10', 'world_Bubble Bath_5859dfb6', 'n_incorrect_Dino Dive 1', '3d63345e', 'Leaf Leader_53c6e11a', 'Scrub-A-Dub_92687c59', 'n_incorrect_Dino Drink 1', 'Dino Drink_6f8106d9', 'Dino Drink_9ed8f6da', 'n_incorrect_Happy Camel 1', 'Happy Camel_a2df0760', '04df9b66', '3edf6747', 'a76029ee', 'world_n_incorrect_Dino Drink 1', 'world_Dino Drink_6f8106d9', 'world_Dino Drink_9ed8f6da', 'world_n_incorrect_Happy Camel 1', 'world_Happy Camel_a2df0760', 'world_n_incorrect_Scrub-A-Dub 1', '31973d56', '4e5fc6f5', 'n_incorrect_Leaf Leader 1', 'Leaf Leader_3b2048ee', 'world_n_incorrect_Leaf Leader 1', 'world_Leaf Leader_3b2048ee', 'world_Leaf Leader_53c6e11a', '222660ff', '3afb49e6', '5348fd84', 'd38c2fd7', '731c0cbe', 'n_incorrect_Pan Balance 1', 'world_n_incorrect_Pan Balance 1', 'n_incorrect_Bubble Bath 1', 'world_n_incorrect_Bubble Bath 1', 'world_n_incorrect_Dino Dive 1', 'Happy Camel_46b50ba8', 'Chow Time_19967db1', 'world_Chow Time_19967db1', 'Happy Camel_05ad839b', 'world_Happy Camel_05ad839b', 'world_Happy Camel_46b50ba8', 'world_Air Show_6f4bd64e', 'eb2c19cd', 'Crystals Rule_93edfe2e', 'world_Crystals Rule_93edfe2e', 'Dino Dive_d3640339', 'world_Dino Dive_d3640339', '77c76bc5', 'Bubble Bath_29a42aea', 4080, 'Chow Time_6f445b57', 'world_Chow Time_6f445b57', 'All Star Sorting_26a5a3dd', 'world_All Star Sorting_26a5a3dd', 'Bubble Bath_6aeafed4', 'world_Bubble Bath_6aeafed4', '13f56524', 'world_Scrub-A-Dub_92687c59', 'bfc77bd6', 'world_Bubble Bath_29a42aea', 'Dino Drink_ab4ec3a4', 'world_Dino Drink_ab4ec3a4', 'Happy Camel_0ce40006', '9554a50b', 'Dino Dive_119b5b02', 'world_Dino Dive_119b5b02', 'ecc6157f', '6077cc36', 'Pan Balance_e4d32835', 'world_Pan Balance_e4d32835', 'Leaf Leader_01ca3a3c', 'world_Leaf Leader_01ca3a3c', 'world_Happy Camel_0ce40006']
useful_features

In [11]:
useful_features = [str(f) for f in useful_features]
train_features.columns = [str(f) for f in train_features.columns]

In [12]:
train_features=train_features[useful_features].copy()

In [13]:
def lgb_quad_kappa(preds, true):
    true = true.get_label()
    #preds = preds.reshape((4, -1)).argmax(axis=0)
    preds = np.rint(preds)
    preds = np.maximum(0, preds)
    preds = np.minimum(3, preds)
    return "quad_kappa", quad_kappa(true, preds), True


def make_features(df):
    return df.drop(["installation_id", "accuracy_group"], axis=1), df.accuracy_group.values

def make_features_wrapper(train, test):
    def make_features(df):
        return df.drop(["installation_id", "accuracy_group"], axis=1), df.accuracy_group.values
    
    return make_features(train), make_features(test) 


def make_predictions(model,x_test_all,y_test):
    x_test_all_processed = model.preprocess_features(x_test_all, useful_features=useful_features, assessments=assessments, worlds=worlds)
    x_test_all_processed = x_test_all_processed.drop([f for f in ["installation_id", "accuracy_group"] if f in x_test_all_processed.columns], axis=1)
    preds=model.predict(x_test_all_processed)
    #preds = np.rint(preds)
    #preds = np.maximum(0, preds)
    #preds = np.minimum(3, preds)
    return preds,y_test

In [14]:
train_nn = make_nn_trainer(DATA_DIR / "nn_regression.w8", useful_features, assessments, worlds)

In [15]:
subtrain_installations=pd.Series(train_features.installation_id.unique()).sample(frac=1., random_state=2019)
subtrain_features=train_features[train_features.installation_id.isin(subtrain_installations.values)].copy()
def check_hyperparams(params):
    print(params)
    if "max_depth" in params:
        params["max_depth"] = int(params["max_depth"])
    if "num_leaves" in params:
        params["num_leaves"] = int(params["num_leaves"])

    train_baseline_with_params = partial(train_nn, params=params)
    cv=InstallationFold(n_splits=3)
    predictions = cross_validate(subtrain_features, subtrain_features.accuracy_group, make_features_wrapper, train_baseline_with_params, make_predictions,
                                cv=cv)
    return {
        "loss": np.mean([mean_squared_error(true, pred) for pred, true in predictions]),
        "status": STATUS_OK,
        "params": params
    }


def tune(check_params, learning_rate, param_space, n_tries=25):        
    trials = Trials()
    param_space = param_space.copy()
    param_space["learning_rate"] = learning_rate
    fmin(check_params,
         param_space, tpe.suggest, n_tries, trials=trials)
    best_params = trials.best_trial["result"]["params"]
    return best_params


def find_learning_rate(best_params, train_features):
    lr_finder = LRFinder(min_lr=1e-4, max_lr=0.2)
    
    best_params=best_params.copy()
    best_params["scaler"]=StandardScaler()
    best_params["file_path"]=""
    
    nn = NN(**best_params)
    X, y = make_features(train_features)
    X = nn.scaler.fit_transform(X.values.astype(np.float64))
    
    nn.model.compile(loss='mse', optimizer='sgd')
    nn.model.fit(X, y, batch_size=128, callbacks=[lr_finder], epochs=2)

In [16]:
# param_space = {
#     "input_size": input_size,
#     "dense_size": hp.choice("dense_size", [50, 100, 150, 200, 250, 300, 350, 400]),
#     "dropout_prob": hp.uniform("dropout_prob", 1e-10, 1), 
#     "n_layers": hp.choice("n_layers", [1, 2, 3, 4, 5, 6]),
# }
with open("../dsb2019/models/nn_params.json", "r") as f:
    best_params=json.load(f)
#best_params=tune(check_hyperparams, 0.055, param_space, n_tries=100)

In [17]:
print(best_params)

{'dense_size': 350, 'dropout_prob': 0.4548952418902598, 'input_size': 751, 'learning_rate': 0.055, 'n_layers': 2}


What was selected on 100% of the data

```
{'feature_fraction': 0.53,
 'lambda_l1': 0.922950554822482,
 'lambda_l2': 0.835047934936944,
 'learning_rate': 0.006,
 'max_depth': 11,
 'metric': 'rmse',
 'n_estimators': 10000,
 'num_leaves': 31,
 'objective': 'rmse',
 'random_state': 2019,
 'subsample': 0.9500000000000001}

```

In [18]:
best_params["n_layers"]=3

In [19]:
predictions, models = cross_validate(train_features, train_features.accuracy_group, make_features_wrapper, partial(train_nn, params=best_params), 
                             make_predictions)
print(np.mean([mean_squared_error(true, pred) for pred, true in predictions]), [mean_squared_error(true, pred) for pred, true in predictions])

Epoch 0 loss=4.052499771118164 val_loss=5.033443450927734
Epoch 50 loss=1.5126101970672607 val_loss=1.6504101753234863
Epoch 100 loss=1.161517858505249 val_loss=1.2475923299789429
Epoch 150 loss=1.0683653354644775 val_loss=1.1087310314178467
Epoch 200 loss=0.9962726831436157 val_loss=1.0952494144439697
Epoch 250 loss=0.9360323548316956 val_loss=1.0842304229736328
Epoch 300 loss=0.8883640170097351 val_loss=1.0817553997039795
Epoch 350 loss=0.845758318901062 val_loss=1.075546145439148
Epoch 400 loss=0.8190929293632507 val_loss=1.0651012659072876
Epoch 450 loss=0.7721925973892212 val_loss=1.0931164026260376
Epoch 500 loss=0.7568361759185791 val_loss=1.110696792602539
Epoch 550 loss=0.7198644876480103 val_loss=1.1431987285614014
Epoch 600 loss=0.6956369280815125 val_loss=1.142138123512268
Epoch 650 loss=0.6752492189407349 val_loss=1.1582838296890259
Epoch 700 loss=0.6252625584602356 val_loss=1.1467633247375488
Epoch 750 loss=0.6182378530502319 val_loss=1.1452360153198242
Epoch 800 loss=0.6

1.1123123577656406 [1.0691445438249711, 1.1142983185564332, 1.0937423490417977, 1.1182921561607022, 1.166084421244299] 3 450

1.0981274257994507 [1.048533588143293, 1.1020374621872964, 1.0528247220253835, 1.1244143435231289, 1.1628270131181515] 3

1.1097180246970066 [1.0694056513256083, 1.1316065740652044, 1.076980375272876, 1.112707761878452, 1.1578897609428926] 2

In [22]:
for i, model in enumerate(models):
    model.save_model(str(MODELS_DIR / f"nn_regressor.model_fold_{i}"))