- use information from discussion (last assess title, last assess accuracy, first assess or not)

In [1]:
import pandas as pd
import numpy as np
import warnings
import datetime
from time import time
from tqdm import tqdm_notebook as tqdm
from collections import Counter
from sklearn import preprocessing
from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score, f1_score, confusion_matrix, cohen_kappa_score
import lightgbm as lgb
from functools import partial
import json
import copy
import time
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows",1000)
np.set_printoptions(precision=8)
warnings.filterwarnings("ignore")
import random

In [2]:
def qwk(a1, a2):
    max_rat = 3
    a1 = np.asarray(a1, dtype=int)
    a2 = np.asarray(a2, dtype=int)
    hist1 = np.zeros((max_rat + 1, ))
    hist2 = np.zeros((max_rat + 1, ))
    o = 0
    for k in range(a1.shape[0]):
        i, j = a1[k], a2[k]
        hist1[i] += 1
        hist2[j] += 1
        o +=  (i - j) * (i - j)
    e = 0
    for i in range(max_rat + 1):
        for j in range(max_rat + 1):
            e += hist1[i] * hist2[j] * (i - j) * (i - j)
    e = e / a1.shape[0]
    return np.round(1 - o / e, 8)

In [3]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3])
        return -qwk(y, X_p)
    
    def fit(self, X, y, random_flg = False):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        if random_flg:
            initial_coef = [np.random.uniform(0.4,0.5), np.random.uniform(0.5,0.6), np.random.uniform(0.6,0.7)]
        else:
            initial_coef = [0.5, 1.5, 2.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead') #Powell
        
    def predict(self, X, coef):
        return pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3])

    def coefficients(self):
        return self.coef_['x']

# install

In [4]:
%%time
train = pd.read_csv('../input/data-science-bowl-2019/train.csv')
train_labels = pd.read_csv('../input/data-science-bowl-2019/train_labels.csv')
test = pd.read_csv('../input/data-science-bowl-2019/test.csv')
#specs = pd.read_csv('../input/data-science-bowl-2019/specs.csv')
sample_submission = pd.read_csv('../input/data-science-bowl-2019/sample_submission.csv')

CPU times: user 1min 12s, sys: 10.3 s, total: 1min 23s
Wall time: 1min 23s


In [5]:
keep_id = train[train.type == "Assessment"][['installation_id']].drop_duplicates()
train = pd.merge(train, keep_id, on="installation_id", how="inner")
train = train[train.installation_id.isin(train_labels.installation_id.unique())]

In [6]:
assess_title = ['Mushroom Sorter (Assessment)', 'Bird Measurer (Assessment)',
       'Cauldron Filler (Assessment)', 'Cart Balancer (Assessment)', 'Chest Sorter (Assessment)']
def remove_index_calc(df):
    additional_remove_index = []
    for i, session in df.groupby('installation_id', sort=False):
        last_row = session.index[-1]
        session = session[session.title.isin(assess_title)]
        first_row = session.index[-1] + 1
        for j in range(first_row, last_row+1):
            additional_remove_index.append(j)                
    return additional_remove_index
additional_remove_index = remove_index_calc(train)
train = train[~train.index.isin(additional_remove_index)]

# Preprocess and Feature engineering

In [7]:
%%time
def encode_title(train, test):
    train['title_event_code'] = list(map(lambda x, y: str(x) + '_' + str(y), train['title'], train['event_code']))
    test['title_event_code'] = list(map(lambda x, y: str(x) + '_' + str(y), test['title'], test['event_code']))
    list_of_title_eventcode = sorted(list(set(train['title_event_code'].unique()).union(set(test['title_event_code'].unique()))))

    list_of_user_activities = sorted(list(set(train['title'].unique()).union(set(test['title'].unique()))))
    list_of_event_code = sorted(list(set(train['event_code'].unique()).union(set(test['event_code'].unique()))))
    list_of_worlds = sorted(list(set(train['world'].unique()).union(set(test['world'].unique()))))
    activities_map = dict(zip(list_of_user_activities, np.arange(len(list_of_user_activities))))
    activities_labels = dict(zip(np.arange(len(list_of_user_activities)), list_of_user_activities))
    activities_world = dict(zip(list_of_worlds, np.arange(len(list_of_worlds))))
    assess_titles = sorted(list(set(train[train['type'] == 'Assessment']['title'].value_counts().index).union(set(test[test['type'] == 'Assessment']['title'].value_counts().index))))

    train['title'] = train['title'].map(activities_map)
    test['title'] = test['title'].map(activities_map)
    train['world'] = train['world'].map(activities_world)
    test['world'] = test['world'].map(activities_world)

    win_code = dict(zip(activities_map.values(), (4100*np.ones(len(activities_map))).astype('int')))
    win_code[activities_map['Bird Measurer (Assessment)']] = 4110
    
    train['timestamp'] = pd.to_datetime(train['timestamp'])
    test['timestamp'] = pd.to_datetime(test['timestamp'])
    
    train["misses"] = train["event_data"].apply(lambda x: json.loads(x)["misses"] if "\"misses\"" in x else np.nan)
    test["misses"] = test["event_data"].apply(lambda x: json.loads(x)["misses"] if "\"misses\"" in x else np.nan)
    
    #train["level"] = train["event_data"].apply(lambda x: json.loads(x)["level"] if "\"level\"" in x else np.nan)
    #test["level"] = test["event_data"].apply(lambda x: json.loads(x)["level"] if "\"level\"" in x else np.nan)
    
    #train["round"] = train["event_data"].apply(lambda x: json.loads(x)["round"] if "\"round\"" in x else np.nan)
    #test["round"] = test["event_data"].apply(lambda x: json.loads(x)["round"] if "\"round\"" in x else np.nan)
    
    train["true"] = train["event_data"].apply(lambda x: 1 if "true" in x and "correct" in x else 0)
    test["true"] = test["event_data"].apply(lambda x: 1 if "true" in x and "correct" in x else 0)

    train["false"] = train["event_data"].apply(lambda x: 1 if "false" in x and "correct" in x else 0)
    test["false"] = test["event_data"].apply(lambda x: 1 if "false" in x and "correct" in x else 0)
    
    train['hour'] = train['timestamp'].dt.hour
    test['hour'] = test['timestamp'].dt.hour    
    train["morning"] = train["hour"].apply(lambda x: 1 if x>=5 and x <=10 else 0)
    test["morning"] = test["hour"].apply(lambda x: 1 if x>=5 and x <=10 else 0)
                
    return train, test, win_code, list_of_user_activities, list_of_event_code, activities_labels, assess_titles, activities_world, list_of_title_eventcode

train, test, win_code, list_of_user_activities, list_of_event_code, activities_labels, assess_titles, activities_world, list_of_title_eventcode = encode_title(train, test)

CPU times: user 59 s, sys: 3.88 s, total: 1min 2s
Wall time: 1min 2s


In [8]:
def get_data(user_sample, test_set=False):
    last_activity = 0
    user_activities_count = {'Clip':0, 'Activity': 0, 'Assessment': 0, 'Game':0}
    title_eventcode_count = {str(ele): 0 for ele in list_of_title_eventcode}
    user_world_count = {"world_"+str(wor) : 0 for wor in activities_world.values()}
    event_code_count = {str(ev): 0 for ev in list_of_event_code}
    title_count = {actv: 0 for actv in list_of_user_activities}
    morning_play = 0
    
    last_session_time_sec = 0
    all_assessments = []
    accuracy_groups = {"0":0, "1":0, "2":0, "3":0}
    accumulated_accuracy_group = 0
    accumulated_correct_attempts = 0 
    accumulated_uncorrect_attempts = 0 
    accumulated_actions = 0
    counter = 0
    time_first_activity = float(user_sample['timestamp'].values[0])
    miss = 0
    crys_game_true = 0; crys_game_false = 0; crys_game_true_bet = 0; crys_game_false_bet = 0
    tree_game_true = 0; tree_game_false = 0; tree_game_true_bet = 0; tree_game_false_bet = 0
    magma_game_true = 0; magma_game_false = 0; magma_game_true_bet = 0; magma_game_false_bet = 0
    crys_game_acc = []; tree_game_acc = []; magma_game_acc = []
    durations = []
    prev_assess_title = -999
    assess_count = 1
    last_accuracy = -999
    
    for i, session in user_sample.groupby('game_session', sort=False):      
        session_type = session['type'].iloc[0]
        session_title = session['title'].iloc[0]
        session_title_text = activities_labels[session_title]
        session_world = session["world"].iloc[0]
        game_session = session["game_session"].iloc[0]
        
        if session_type != 'Assessment':
            if session_type == "Game":
                true = session['true'].sum()
                false = session['false'].sum() 
                if session_world == activities_world["CRYSTALCAVES"]:
                    crys_game_true += true
                    crys_game_false += false
                    crys_game_acc.append(true / (true + false) if (true + false) != 0 else 0)
                elif session_world == activities_world["TREETOPCITY"]:
                    tree_game_true += true
                    tree_game_false += false
                    tree_game_acc.append(true / (true + false) if (true + false) != 0 else 0)
                elif session_world == activities_world["MAGMAPEAK"]:
                    magma_game_true += true
                    magma_game_false += false
                    magma_game_acc.append(true / (true + false) if (true + false) != 0 else 0)
                else:
                    pass
                
        if (session_type == 'Assessment') & (test_set or len(session)>1): # test set or session in train_label
            all_attempts = session.query(f'event_code == {win_code[session_title]}')
            true_attempts = all_attempts['event_data'].str.contains('true').sum() # true in target assess
            false_attempts = all_attempts['event_data'].str.contains('false').sum() # false in target assessment
            
            # from start of installation_id to the start of target assessment ------------------------
            features = user_activities_count.copy() # appearance of each type without duplicates
            features.update(title_eventcode_count.copy()) # apperance of combi of title and event_code
            features.update(user_world_count.copy()) # appearance of world with duplicates
            features.update(event_code_count.copy())
            features.update(title_count.copy())
            features['accumulated_correct_attempts'] = accumulated_correct_attempts
            features['accumulated_uncorrect_attempts'] = accumulated_uncorrect_attempts
            accumulated_correct_attempts += true_attempts 
            accumulated_uncorrect_attempts += false_attempts
            features["misses"] = miss
            features['accumulated_actions'] = accumulated_actions
            features["morning_play"] = morning_play

            if session_world == activities_world["CRYSTALCAVES"]:
                features["game_true"] = crys_game_true
                features["game_false"] = crys_game_false
                features['game_accuracy'] = crys_game_true / (crys_game_true + crys_game_false) if (crys_game_true + crys_game_false) != 0 else 0
                features["game_accuracy_std"] = np.std(crys_game_acc) if len(crys_game_acc) >=1 else 0
                features["last_game_acc"] = crys_game_acc[-1] if len(crys_game_acc) >=1 else 0
            elif session_world == activities_world["TREETOPCITY"]:
                features["game_true"] = tree_game_true
                features["game_false"] = tree_game_false
                features['game_accuracy'] = tree_game_true / (tree_game_true + tree_game_false) if (tree_game_true + tree_game_false) != 0 else 0
                features["game_accuracy_std"] = np.std(tree_game_acc) if len(tree_game_acc) >=1 else 0
                features["last_game_acc"] = tree_game_acc[-1] if len(tree_game_acc) >=1 else 0
            elif session_world == activities_world["MAGMAPEAK"]:
                features["game_true"] = magma_game_true
                features["game_false"] = magma_game_false
                features['game_accuracy'] = magma_game_true / (magma_game_true + magma_game_false) if (magma_game_true + magma_game_false) != 0 else 0
                features["game_accuracy_std"] = np.std(magma_game_acc) if len(magma_game_acc) >=1 else 0
                features["last_game_acc"] = magma_game_acc[-1] if len(magma_game_acc) >=1 else 0
            
            features['installation_id'] = session['installation_id'].iloc[-1]
            features['session_title'] = session_title
            features['game_session'] = game_session
            features["prev_assess_title"] = prev_assess_title
            prev_assess_title = session_title
            features["first_assessment"] = 1 if assess_count == 1 else 0
            assess_count += 1
            
            if durations == []: #span of timestamp in target assessment
                features['duration_mean'] = 0
                features['duration_std'] = 0
                features['duration_max'] = 0
            else:
                features['duration_mean'] = np.mean(durations)
                features['duration_std'] = np.std(durations)
                features['duration_max'] = np.max(durations)
            durations.append((session.iloc[-1, 2] - session.iloc[0, 2]).seconds) 
            
            accuracy = true_attempts/(true_attempts+false_attempts) if (true_attempts+false_attempts) != 0 else 0
            features['last_assess_acc'] = last_accuracy
            last_accuracy = accuracy
            if accuracy == 0:
                features['accuracy_group'] = 0
            elif accuracy == 1:
                features['accuracy_group'] = 3
            elif accuracy == 0.5:
                features['accuracy_group'] = 2
            else:
                features['accuracy_group'] = 1
            features.update(accuracy_groups)
            accuracy_groups[str(features['accuracy_group'])] += 1
            features['accumulated_accuracy_group'] = accumulated_accuracy_group/counter if counter > 0 else 0
            accumulated_accuracy_group += features['accuracy_group']
            
            if test_set:
                all_assessments.append(features)
            elif true_attempts+false_attempts > 0:
                all_assessments.append(features)
                
            counter += 1
            
        n_of_title = Counter(session['title']) 
        for key in n_of_title.keys():
            title_count[activities_labels[key]] += n_of_title[key]
            
        n_of_eventcode = Counter(session['event_code']) 
        for key in n_of_eventcode.keys():
            event_code_count[str(key)] += n_of_eventcode[key]
                        
        n_of_title_eventcode = Counter(session['title_event_code']) 
        for key in n_of_title_eventcode.keys():
            title_eventcode_count[str(key)] += n_of_title_eventcode[key]
        miss += np.sum(session["misses"])
        morning_play += np.sum(session["morning"])  
        user_world_count["world_"+str(session_world)] += session.shape[0]

        accumulated_actions += len(session)
        if last_activity != session_type:
            user_activities_count[session_type] += 1
            last_activitiy = session_type
    if test_set:
        return all_assessments[-1], all_assessments[:-1]
    return all_assessments

In [9]:
def get_train_and_test(train, test):
    compiled_train = []
    compiled_test = []
    compiled_val = []

    for i, (ins_id, user_sample) in tqdm(enumerate(train.groupby('installation_id', sort=False)), total=train.installation_id.nunique(), desc='Installation_id', position=0):
        compiled_train += get_data(user_sample)
    del train
    for ins_id, user_sample in tqdm(test.groupby('installation_id', sort=False), total=test.installation_id.nunique(), desc='Installation_id', position=0):
        test_data, val_data = get_data(user_sample, test_set=True)

        compiled_test.append(test_data)
        compiled_val += val_data
    del test
    reduce_train = pd.DataFrame(compiled_train)
    reduce_test = pd.DataFrame(compiled_test)
    reduce_val = pd.DataFrame(compiled_val)

    categoricals = ['session_title']
    return reduce_train, reduce_test, reduce_val, categoricals

In [10]:
new_train, new_test, new_val, categoricals = get_train_and_test(train, test)

HBox(children=(IntProgress(value=0, description='Installation_id', max=3614, style=ProgressStyle(description_w…




HBox(children=(IntProgress(value=0, description='Installation_id', max=1000, style=ProgressStyle(description_w…




In [11]:
new_train.head(10)

Unnamed: 0,Clip,Activity,Assessment,Game,12 Monkeys_2000,Air Show_2000,Air Show_2020,Air Show_2030,Air Show_2060,Air Show_2070,Air Show_2075,Air Show_3010,Air Show_3020,Air Show_3021,Air Show_3110,Air Show_3120,Air Show_3121,Air Show_4010,Air Show_4020,Air Show_4070,Air Show_4090,Air Show_4100,Air Show_4110,All Star Sorting_2000,All Star Sorting_2020,All Star Sorting_2025,All Star Sorting_2030,All Star Sorting_3010,All Star Sorting_3020,All Star Sorting_3021,All Star Sorting_3110,All Star Sorting_3120,All Star Sorting_3121,All Star Sorting_4010,All Star Sorting_4020,All Star Sorting_4030,All Star Sorting_4035,All Star Sorting_4070,All Star Sorting_4080,All Star Sorting_4090,All Star Sorting_4095,Balancing Act_2000,Bird Measurer (Assessment)_2000,Bird Measurer (Assessment)_2010,Bird Measurer (Assessment)_2020,Bird Measurer (Assessment)_2030,Bird Measurer (Assessment)_3010,Bird Measurer (Assessment)_3020,Bird Measurer (Assessment)_3021,Bird Measurer (Assessment)_3110,Bird Measurer (Assessment)_3120,Bird Measurer (Assessment)_3121,Bird Measurer (Assessment)_4020,Bird Measurer (Assessment)_4025,Bird Measurer (Assessment)_4030,Bird Measurer (Assessment)_4035,Bird Measurer (Assessment)_4040,Bird Measurer (Assessment)_4070,Bird Measurer (Assessment)_4080,Bird Measurer (Assessment)_4090,Bird Measurer (Assessment)_4100,Bird Measurer (Assessment)_4110,Bottle Filler (Activity)_2000,Bottle Filler (Activity)_2020,Bottle Filler (Activity)_2030,Bottle Filler (Activity)_3010,Bottle Filler (Activity)_3110,Bottle Filler (Activity)_4020,Bottle Filler (Activity)_4030,Bottle Filler (Activity)_4035,Bottle Filler (Activity)_4070,Bottle Filler (Activity)_4080,Bottle Filler (Activity)_4090,Bubble Bath_2000,Bubble Bath_2020,Bubble Bath_2025,Bubble Bath_2030,Bubble Bath_2035,Bubble Bath_2080,Bubble Bath_2083,Bubble Bath_3010,Bubble Bath_3020,Bubble Bath_3021,Bubble Bath_3110,Bubble Bath_3120,Bubble Bath_3121,Bubble Bath_4010,Bubble Bath_4020,Bubble Bath_4040,Bubble Bath_4045,Bubble Bath_4070,Bubble Bath_4080,Bubble Bath_4090,Bubble Bath_4095,Bubble Bath_4220,Bubble Bath_4230,Bubble Bath_4235,Bug Measurer (Activity)_2000,Bug Measurer (Activity)_3010,Bug Measurer (Activity)_3110,Bug Measurer (Activity)_4025,Bug Measurer (Activity)_4030,Bug Measurer (Activity)_4035,Bug Measurer (Activity)_4070,Bug Measurer (Activity)_4080,Bug Measurer (Activity)_4090,Cart Balancer (Assessment)_2000,Cart Balancer (Assessment)_2010,Cart Balancer (Assessment)_2020,Cart Balancer (Assessment)_2030,Cart Balancer (Assessment)_3010,Cart Balancer (Assessment)_3020,Cart Balancer (Assessment)_3021,Cart Balancer (Assessment)_3110,Cart Balancer (Assessment)_3120,Cart Balancer (Assessment)_3121,Cart Balancer (Assessment)_4020,Cart Balancer (Assessment)_4030,Cart Balancer (Assessment)_4035,Cart Balancer (Assessment)_4040,Cart Balancer (Assessment)_4070,Cart Balancer (Assessment)_4080,Cart Balancer (Assessment)_4090,Cart Balancer (Assessment)_4100,Cauldron Filler (Assessment)_2000,Cauldron Filler (Assessment)_2010,Cauldron Filler (Assessment)_2020,Cauldron Filler (Assessment)_2030,Cauldron Filler (Assessment)_3010,Cauldron Filler (Assessment)_3020,Cauldron Filler (Assessment)_3021,Cauldron Filler (Assessment)_3110,Cauldron Filler (Assessment)_3120,Cauldron Filler (Assessment)_3121,Cauldron Filler (Assessment)_4020,Cauldron Filler (Assessment)_4025,Cauldron Filler (Assessment)_4030,Cauldron Filler (Assessment)_4035,Cauldron Filler (Assessment)_4040,Cauldron Filler (Assessment)_4070,Cauldron Filler (Assessment)_4080,Cauldron Filler (Assessment)_4090,Cauldron Filler (Assessment)_4100,Chest Sorter (Assessment)_2000,Chest Sorter (Assessment)_2010,Chest Sorter (Assessment)_2020,Chest Sorter (Assessment)_2030,Chest Sorter (Assessment)_3010,Chest Sorter (Assessment)_3020,Chest Sorter (Assessment)_3021,Chest Sorter (Assessment)_3110,Chest Sorter (Assessment)_3120,Chest Sorter (Assessment)_3121,Chest Sorter (Assessment)_4020,Chest Sorter (Assessment)_4025,Chest Sorter (Assessment)_4030,Chest Sorter (Assessment)_4035,Chest Sorter (Assessment)_4040,Chest Sorter (Assessment)_4070,Chest Sorter (Assessment)_4080,Chest Sorter (Assessment)_4090,Chest Sorter (Assessment)_4100,Chicken Balancer (Activity)_2000,Chicken Balancer (Activity)_3010,Chicken Balancer (Activity)_3110,Chicken Balancer (Activity)_4020,Chicken Balancer (Activity)_4022,Chicken Balancer (Activity)_4030,Chicken Balancer (Activity)_4035,Chicken Balancer (Activity)_4070,Chicken Balancer (Activity)_4080,Chicken Balancer (Activity)_4090,Chow Time_2000,Chow Time_2020,Chow Time_2030,Chow Time_3010,Chow Time_3020,Chow Time_3021,Chow Time_3110,Chow Time_3120,Chow Time_3121,Chow Time_4010,Chow Time_4020,Chow Time_4030,Chow Time_4035,Chow Time_4070,Chow Time_4080,Chow Time_4090,Chow Time_4095,Costume Box_2000,Crystal Caves - Level 1_2000,Crystal Caves - Level 2_2000,Crystal Caves - Level 3_2000,Crystals Rule_2000,Crystals Rule_2020,Crystals Rule_2030,Crystals Rule_3010,Crystals Rule_3020,Crystals Rule_3021,Crystals Rule_3110,Crystals Rule_3120,Crystals Rule_3121,Crystals Rule_4010,Crystals Rule_4020,Crystals Rule_4050,Crystals Rule_4070,Crystals Rule_4090,Dino Dive_2000,Dino Dive_2020,Dino Dive_2030,Dino Dive_2060,Dino Dive_2070,Dino Dive_3010,Dino Dive_3020,Dino Dive_3021,Dino Dive_3110,Dino Dive_3120,Dino Dive_3121,Dino Dive_4010,Dino Dive_4020,Dino Dive_4070,Dino Dive_4080,Dino Dive_4090,Dino Drink_2000,Dino Drink_2020,Dino Drink_2030,Dino Drink_2060,Dino Drink_2070,Dino Drink_2075,Dino Drink_3010,Dino Drink_3020,Dino Drink_3021,Dino Drink_3110,Dino Drink_3120,Dino Drink_3121,Dino Drink_4010,Dino Drink_4020,Dino Drink_4030,Dino Drink_4031,Dino Drink_4070,Dino Drink_4080,Dino Drink_4090,Egg Dropper (Activity)_2000,Egg Dropper (Activity)_2020,Egg Dropper (Activity)_3010,Egg Dropper (Activity)_3110,Egg Dropper (Activity)_4020,Egg Dropper (Activity)_4025,Egg Dropper (Activity)_4070,Egg Dropper (Activity)_4080,Egg Dropper (Activity)_4090,Fireworks (Activity)_2000,Fireworks (Activity)_3010,Fireworks (Activity)_3110,Fireworks (Activity)_4020,Fireworks (Activity)_4030,Fireworks (Activity)_4070,Fireworks (Activity)_4080,Fireworks (Activity)_4090,Flower Waterer (Activity)_2000,Flower Waterer (Activity)_3010,Flower Waterer (Activity)_3110,Flower Waterer (Activity)_4020,Flower Waterer (Activity)_4022,Flower Waterer (Activity)_4025,Flower Waterer (Activity)_4030,Flower Waterer (Activity)_4070,Flower Waterer (Activity)_4080,Flower Waterer (Activity)_4090,Happy Camel_2000,Happy Camel_2020,Happy Camel_2030,Happy Camel_2080,Happy Camel_2081,Happy Camel_2083,Happy Camel_3010,Happy Camel_3020,Happy Camel_3021,Happy Camel_3110,Happy Camel_3120,Happy Camel_3121,Happy Camel_4010,Happy Camel_4020,Happy Camel_4030,Happy Camel_4035,Happy Camel_4040,Happy Camel_4045,Happy Camel_4070,Happy Camel_4080,Happy Camel_4090,Happy Camel_4095,"Heavy, Heavier, Heaviest_2000",Honey Cake_2000,Leaf Leader_2000,Leaf Leader_2020,Leaf Leader_2030,Leaf Leader_2060,Leaf Leader_2070,Leaf Leader_2075,Leaf Leader_3010,Leaf Leader_3020,Leaf Leader_3021,Leaf Leader_3110,Leaf Leader_3120,Leaf Leader_3121,Leaf Leader_4010,Leaf Leader_4020,Leaf Leader_4070,Leaf Leader_4080,Leaf Leader_4090,Leaf Leader_4095,Lifting Heavy Things_2000,Magma Peak - Level 1_2000,Magma Peak - Level 2_2000,Mushroom Sorter (Assessment)_2000,Mushroom Sorter (Assessment)_2010,Mushroom Sorter (Assessment)_2020,Mushroom Sorter (Assessment)_2025,Mushroom Sorter (Assessment)_2030,Mushroom Sorter (Assessment)_2035,Mushroom Sorter (Assessment)_3010,Mushroom Sorter (Assessment)_3020,Mushroom Sorter (Assessment)_3021,Mushroom Sorter (Assessment)_3110,Mushroom Sorter (Assessment)_3120,Mushroom Sorter (Assessment)_3121,Mushroom Sorter (Assessment)_4020,Mushroom Sorter (Assessment)_4025,Mushroom Sorter (Assessment)_4030,Mushroom Sorter (Assessment)_4035,Mushroom Sorter (Assessment)_4040,Mushroom Sorter (Assessment)_4070,Mushroom Sorter (Assessment)_4080,Mushroom Sorter (Assessment)_4090,Mushroom Sorter (Assessment)_4100,Ordering Spheres_2000,Pan Balance_2000,Pan Balance_2020,Pan Balance_2030,Pan Balance_3010,Pan Balance_3020,Pan Balance_3021,Pan Balance_3110,Pan Balance_3120,Pan Balance_3121,Pan Balance_4010,Pan Balance_4020,Pan Balance_4025,Pan Balance_4030,Pan Balance_4035,Pan Balance_4070,Pan Balance_4080,Pan Balance_4090,Pan Balance_4100,Pirate's Tale_2000,Rulers_2000,Sandcastle Builder (Activity)_2000,Sandcastle Builder (Activity)_3010,Sandcastle Builder (Activity)_3110,Sandcastle Builder (Activity)_4020,Sandcastle Builder (Activity)_4021,Sandcastle Builder (Activity)_4030,Sandcastle Builder (Activity)_4035,Sandcastle Builder (Activity)_4070,Sandcastle Builder (Activity)_4080,Sandcastle Builder (Activity)_4090,Scrub-A-Dub_2000,Scrub-A-Dub_2020,Scrub-A-Dub_2030,Scrub-A-Dub_2040,Scrub-A-Dub_2050,Scrub-A-Dub_2080,Scrub-A-Dub_2081,Scrub-A-Dub_2083,Scrub-A-Dub_3010,Scrub-A-Dub_3020,Scrub-A-Dub_3021,Scrub-A-Dub_3110,Scrub-A-Dub_3120,Scrub-A-Dub_3121,Scrub-A-Dub_4010,Scrub-A-Dub_4020,Scrub-A-Dub_4070,Scrub-A-Dub_4090,Slop Problem_2000,Treasure Map_2000,Tree Top City - Level 1_2000,Tree Top City - Level 2_2000,Tree Top City - Level 3_2000,Watering Hole (Activity)_2000,Watering Hole (Activity)_2010,Watering Hole (Activity)_3010,Watering Hole (Activity)_3110,Watering Hole (Activity)_4020,Watering Hole (Activity)_4021,Watering Hole (Activity)_4025,Watering Hole (Activity)_4070,Watering Hole (Activity)_4090,Watering Hole (Activity)_5000,Watering Hole (Activity)_5010,Welcome to Lost Lagoon!_2000,world_0,world_1,world_2,world_3,2000,2010,2020,2025,2030,2035,2040,2050,2060,2070,2075,2080,2081,2083,3010,3020,3021,3110,3120,3121,4010,4020,4021,4022,4025,4030,4031,4035,4040,4045,4050,4070,4080,4090,4095,4100,4110,4220,4230,4235,5000,5010,12 Monkeys,Air Show,All Star Sorting,Balancing Act,Bird Measurer (Assessment),Bottle Filler (Activity),Bubble Bath,Bug Measurer (Activity),Cart Balancer (Assessment),Cauldron Filler (Assessment),Chest Sorter (Assessment),Chicken Balancer (Activity),Chow Time,Costume Box,Crystal Caves - Level 1,Crystal Caves - Level 2,Crystal Caves - Level 3,Crystals Rule,Dino Dive,Dino Drink,Egg Dropper (Activity),Fireworks (Activity),Flower Waterer (Activity),Happy Camel,"Heavy, Heavier, Heaviest",Honey Cake,Leaf Leader,Lifting Heavy Things,Magma Peak - Level 1,Magma Peak - Level 2,Mushroom Sorter (Assessment),Ordering Spheres,Pan Balance,Pirate's Tale,Rulers,Sandcastle Builder (Activity),Scrub-A-Dub,Slop Problem,Treasure Map,Tree Top City - Level 1,Tree Top City - Level 2,Tree Top City - Level 3,Watering Hole (Activity),Welcome to Lost Lagoon!,accumulated_correct_attempts,accumulated_uncorrect_attempts,misses,accumulated_actions,morning_play,game_true,game_false,game_accuracy,game_accuracy_std,last_game_acc,installation_id,session_title,game_session,prev_assess_title,first_assessment,duration_mean,duration_std,duration_max,last_assess_acc,accuracy_group,0,1,2,3,accumulated_accuracy_group
0,11,3,0,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,5,4,3,12,4,3,11,4,3,3,26,27,1,42,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,15,15,23,23,7,0,0,1,17,17,18,31,19,50,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,20,19,7,14,21,0,18,0,2,1,15,15,6,6,4,1,2,15,3,6,15,3,6,1,18,14,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,235,2,410,18,0,20,4,18,0,6,6,0,0,0,4,1,2,79,7,9,77,7,9,4,92,14,31,19,121,0,1,0,0,0,94,0,4,0,0,0,0,0,0,0,0,1,0,153,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,84,166,0,0,0,0,0,1,0,0,1,0,1,0,102,131,1,0,1,1,0,0,2,0,0,15.0,647,543,13,13,0.5,0.234362,0.571429,0006a69f,30,901acc108f55a5a1,-999,1,0.0,0.0,0,-999.0,3,0,0,0,0,0.0
1,14,4,1,6,1,1,2,1,1,1,0,52,3,1,51,3,1,1,26,30,0,4,2,3,5,4,3,12,4,3,11,4,3,3,26,27,1,42,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,77,77,15,20,5,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,3,2,14,1,2,14,1,2,1,3,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,15,15,23,23,7,0,0,1,17,17,18,31,19,50,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,4,0,4,4,0,4,6,3,8,0,2,6,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,20,19,7,14,21,0,18,0,2,1,15,15,6,6,4,1,2,15,3,6,15,3,6,1,18,14,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,2,0,235,2,906,25,1,26,5,22,1,6,6,1,1,0,4,1,2,226,11,16,223,11,16,6,127,14,31,37,149,0,6,2,0,0,156,0,4,0,5,2,0,0,0,0,0,1,180,153,0,0,0,0,204,0,0,0,0,0,2,0,0,0,61,0,0,0,84,166,0,0,0,0,0,1,0,48,1,0,1,1,102,131,1,1,1,1,1,0,2,1,0,16.0,1143,1039,21,38,0.355932,0.242691,0.666667,0006a69f,4,77b8ee947eb84b4e,30,0,39.0,0.0,39,1.0,0,0,0,0,1,3.0
2,14,4,2,6,1,1,2,1,1,1,0,52,3,1,51,3,1,1,26,30,0,4,2,3,5,4,3,12,4,3,11,4,3,3,26,27,1,42,0,2,0,0,1,0,1,0,2,11,0,2,11,0,0,22,22,0,0,4,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,77,77,15,20,5,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,3,2,14,1,2,14,1,2,1,3,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,15,15,23,23,7,0,0,1,17,17,18,31,19,50,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,4,0,4,4,0,4,6,3,8,0,2,6,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,20,19,7,14,21,0,18,0,2,1,15,15,6,6,4,1,2,15,3,6,15,3,6,1,18,14,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,2,0,235,2,993,26,1,27,5,22,1,6,6,1,1,0,4,1,2,228,22,16,225,22,16,6,127,14,31,59,171,0,6,2,0,0,160,0,4,0,5,13,0,0,0,0,0,1,180,153,0,87,0,0,204,0,0,0,0,0,2,0,0,0,61,0,0,0,84,166,0,0,0,0,0,1,0,48,1,0,1,1,102,131,1,1,1,1,1,0,2,1,11,16.0,1230,1126,21,38,0.355932,0.242691,0.666667,0006a69f,30,6bdf9623adc94d89,4,0,65.5,26.5,92,0.0,3,1,0,0,1,1.5
3,24,9,4,10,2,1,2,1,1,1,0,52,3,1,51,3,1,1,26,30,0,4,2,4,8,4,6,16,4,6,15,4,6,4,38,39,1,46,0,2,0,0,1,0,1,0,2,11,0,2,11,0,0,22,22,0,0,4,0,0,0,11,1,8,7,15,15,21,28,7,8,0,0,1,2,2,2,2,1,1,14,2,6,14,2,6,1,2,7,2,38,0,0,1,9,0,0,1,77,77,15,20,5,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,3,2,14,1,2,14,1,2,1,3,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,2,1,1,0,7,1,5,6,1,5,1,6,8,6,37,0,0,0,0,0,0,0,0,0,0,0,2,47,47,57,70,76,0,0,2,26,26,37,45,38,83,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,3,2,3,3,2,3,9,0,11,9,0,11,9,9,11,0,2,11,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,34,33,11,21,33,1,24,0,2,2,22,21,10,9,7,2,4,23,3,9,22,3,9,2,24,23,0,2,1,2,2,1,1,0,5,5,9,8,9,4,0,5,5,3,0,728,3,1428,47,2,52,9,43,5,10,9,2,2,0,8,2,5,341,25,40,336,25,40,10,243,29,45,93,314,6,14,9,2,0,348,0,4,1,6,13,9,0,0,5,5,2,180,203,0,87,110,115,204,0,0,0,0,0,3,0,0,0,61,0,91,0,299,278,0,0,0,0,0,2,1,100,2,0,2,1,161,195,2,1,2,2,1,51,3,2,11,19.0,2159,1654,33,38,0.464789,0.327096,1.0,0006a69f,30,9501794defd84e4d,30,0,41.25,31.29996,92,0.0,2,2,0,0,2,1.5
4,28,10,5,13,2,2,5,3,2,1,1,81,5,3,79,5,3,2,45,48,0,8,2,4,8,4,6,16,4,6,15,4,6,4,38,39,1,46,0,2,0,0,1,0,1,0,2,11,0,2,11,0,0,22,22,0,0,4,0,0,0,11,1,8,7,15,15,21,28,7,8,0,0,1,2,2,2,2,1,1,14,2,6,14,2,6,1,2,7,2,38,0,0,1,9,0,0,2,121,121,24,30,6,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,11,9,60,3,9,60,3,9,2,12,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,2,1,1,0,7,1,5,6,1,5,1,6,8,6,37,0,0,0,0,0,0,0,0,0,0,0,2,47,47,57,70,76,0,0,2,26,26,37,45,38,83,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,4,3,4,4,3,4,12,1,15,12,1,15,15,12,18,0,3,12,0,0,4,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,34,33,11,21,33,1,24,0,2,2,22,21,10,9,7,2,4,23,3,9,22,3,9,2,24,23,0,2,3,2,2,2,1,0,5,5,9,8,9,4,0,5,5,3,0,728,3,1855,56,3,64,10,53,6,10,9,3,2,1,8,2,5,463,30,53,457,30,53,12,277,29,45,105,331,6,15,10,2,0,387,0,4,1,12,13,9,0,0,5,5,2,295,203,0,87,110,115,319,0,0,0,0,0,3,0,0,0,212,0,91,0,299,278,0,0,0,0,0,2,1,142,2,0,2,2,161,195,2,3,2,2,2,51,3,3,12,22.0,2586,1654,55,48,0.533981,0.331841,0.777778,0006a69f,4,a9ef3ecb3d1acc6a,30,0,39.2,28.294169,92,0.5,3,2,0,1,2,1.6
5,15,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25,24,31,31,46,61,15,16,0,0,1,2,1,1,1,1,1,10,0,2,10,0,2,2,1,8,1,83,0,0,1,5,6,6,1,45,44,8,10,2,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,3,3,9,4,3,9,4,1,9,13,4,16,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,3,1,1,14,4,5,14,4,4,1,11,119,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,79,79,32,65,108,9,90,0,2,1,13,12,6,5,3,1,2,15,10,5,14,10,5,1,22,96,0,0,0,3,0,0,1,0,2,2,0,0,0,38,0,0,0,3,83,1315,3,141,25,0,46,1,43,1,6,5,1,1,0,4,1,3,199,23,16,197,23,15,5,121,65,0,8,192,0,30,8,1,0,482,0,2,1,0,0,5,6,6,0,0,0,0,0,0,0,250,145,134,0,0,0,0,82,0,1,0,0,0,185,0,0,0,0,0,0,0,0,0,2,2,0,1,0,0,3,467,221,0,0,3,0,0,43,3,0,0,28.0,1542,0,17,17,0.5,0.267629,0.363636,0006c192,9,197a373a77101924,-999,1,0.0,0.0,0,-999.0,3,0,0,0,0,0.0
6,28,8,2,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25,24,31,31,46,61,15,16,0,0,1,2,1,1,1,1,1,10,0,2,10,0,2,2,1,8,1,83,0,0,1,5,6,6,1,45,44,8,10,2,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,1,3,0,2,2,0,1,3,0,3,0,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,10,10,24,2,41,15,7,0,1,1,3,3,3,9,4,3,9,4,1,9,13,4,16,0,0,0,0,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,3,1,1,14,4,5,14,4,4,1,11,119,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,27,26,20,35,16,51,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,3,3,79,79,32,65,108,9,90,0,2,1,13,12,6,5,3,1,2,15,10,5,14,10,5,1,22,96,0,0,0,3,1,0,1,0,2,2,0,0,0,38,0,0,0,4,206,1343,4,345,43,0,49,1,44,1,6,5,1,1,0,4,1,3,240,23,18,235,23,16,6,168,65,37,24,287,0,45,8,1,0,521,0,3,1,1,0,5,6,6,0,0,1,0,0,2,0,250,145,134,0,27,0,111,82,0,3,2,1,0,185,0,0,0,201,0,0,0,0,1,2,3,0,1,4,1,3,467,221,0,0,3,1,0,43,4,1,0,28.0,1898,0,0,0,0.0,0.0,0.0,0006c192,30,b2297d292892745a,9,0,7.0,7.0,14,0.0,0,1,0,0,1,1.5
7,30,8,3,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25,24,31,31,46,61,15,16,0,0,1,2,1,1,1,1,1,10,0,2,10,0,2,2,1,8,1,83,0,0,1,5,6,6,1,45,44,8,10,2,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,1,3,0,2,2,0,1,3,0,3,0,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,10,10,24,2,41,15,7,0,1,1,3,3,3,9,4,3,9,4,1,9,13,4,16,0,0,0,0,3,2,1,1,0,0,1,0,0,0,0,0,1,0,0,3,0,1,3,3,1,1,14,4,5,14,4,4,1,11,119,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,27,26,20,35,16,51,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,1,0,1,1,0,1,5,4,3,5,4,3,8,3,14,0,6,53,0,0,4,1,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,3,3,79,79,32,65,108,9,90,0,2,1,13,12,6,5,3,1,2,15,10,5,14,10,5,1,22,96,0,0,1,3,1,1,1,0,2,2,0,0,0,38,0,0,0,4,206,1343,4,469,47,0,50,2,44,2,6,5,1,1,0,4,1,3,246,27,21,240,27,19,7,176,65,37,27,301,0,45,14,1,0,577,0,3,1,5,0,5,6,6,0,0,1,0,0,2,0,250,145,134,0,27,0,111,82,0,3,2,1,6,185,0,0,0,201,0,0,0,0,1,2,3,116,1,4,1,3,467,221,0,1,3,1,1,43,4,1,4,28.0,2022,0,0,0,0.0,0.0,0.0,0006c192,4,957406a905d59afd,30,0,35.0,40.008332,91,0.0,2,2,0,0,1,1.0
8,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,3,0,0,0,0.0,0.0,0.0,00129856,4,ae691ec5ad5652cf,30,0,0.0,0.0,0,0.0,3,1,0,0,0,0.0
9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,5,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0.0,6,0,0,0,0.0,0.0,0.0,001d0ed0,30,c046a858e7c8bf03,-999,1,0.0,0.0,0,-999.0,2,0,0,0,0,0.0


# Feature selection

In [12]:
X_train = new_train.drop(['accuracy_group', 'game_session'],axis=1) 
lbl = preprocessing.LabelEncoder()
lbl.fit(list(X_train["installation_id"]))
X_train["installation_id"] = lbl.transform(list(X_train["installation_id"]))
remove_features = [i for i in X_train.columns if "_4235" in i or i == "world_"+str(activities_world["NONE"])]
for i in X_train.columns:
    if X_train[i].std() == 0 and i not in remove_features:
        remove_features.append(i)
X_train = X_train.drop(remove_features, axis=1)
X_train = X_train[sorted(X_train.columns.tolist())]
y_train = new_train.accuracy_group
print(X_train.shape)

X_test = new_test.drop(["installation_id","accuracy_group", "game_session"], axis=1)
X_test = X_test.drop(remove_features, axis=1)
X_test = X_test[sorted(X_test.columns.tolist())]

X_val = new_val.drop(["installation_id", "accuracy_group", "game_session"], axis=1)
X_val = X_val.drop(remove_features, axis=1)
X_val = X_val[sorted(X_val.columns.tolist())]
y_val = new_val["accuracy_group"]

(17690, 512)


# modelling and prediction

## oof approach

In [13]:
def run_model(X_train, y_train, X_test):
    n_folds=5
    skf=GroupKFold(n_splits = n_folds)
    models = []
    train_qwk_scores = []
    test_qwk_scores = []
    scores = []

    #lgbm_params = {
    #'objective': 'binary','metric': 'auc',"tree_learner": "serial", 
    #"max_depth" : 5, "boosting": 'gbdt', "num_leaves" : 13, "learning_rate" : 0.01,
#}
    
    lgbm_params = {
    'objective': 'regression','metric': 'rmse',"tree_learner": "serial", 
    "max_depth" : 5, "boosting": 'gbdt', "num_leaves" : 13, "learning_rate" : 0.01,
}

    valid = pd.DataFrame(np.zeros([X_train.shape[0]]))
    features_list = [i for i in X_train.columns if i != "installation_id"]
    feature_importance_df = pd.DataFrame(features_list, columns=["Feature"])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train, X_train["installation_id"])):
        print("Fold "+str(i+1))
        optR = OptimizedRounder()
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]
        X_train2 = X_train2.drop(['installation_id'],axis=1)
    
        X_test2 = X_train.iloc[test_index,:]
        y_test2 = y_train.iloc[test_index]
        X_test2 = X_test2.drop(['installation_id'],axis=1)
    
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_test2, y_test2, reference=lgb_train)
    
        clf = lgb.train(
        lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
        num_boost_round=10000,early_stopping_rounds=10,verbose_eval = 500,
    )
        
        train_predict = clf.predict(X_train2, num_iteration = clf.best_iteration)
        test_predict = clf.predict(X_test2, num_iteration = clf.best_iteration)
        valid.iloc[test_index] = clf.predict(X_test2, num_iteration = clf.best_iteration).reshape(X_test2.shape[0], 1)
        
        models.append(clf)
        feature_importance_df["Fold_"+str(i+1)] = clf.feature_importance()

    optR = OptimizedRounder()    
    optR.fit(np.array(valid).reshape(-1, ), y_train)
    coefficients = optR.coefficients()
    opt_preds = optR.predict(np.array(valid).reshape(-1, ), coefficients)
    qwk_score = qwk(y_train, opt_preds)
    print(qwk_score, coefficients)
    
    pred_value = np.zeros([X_test.shape[0]])
    for model in models:
        pred_value += model.predict(X_test, num_iteration = model.best_iteration) / len(models)
        
    feature_importance_df["Average"] = np.mean(feature_importance_df.iloc[:,1:n_folds+1], axis=1)
    feature_importance_df["Std"] = np.std(feature_importance_df.iloc[:,1:n_folds+1], axis=1)
    feature_importance_df["Cv"] = feature_importance_df["Std"] / feature_importance_df["Average"]
    
    return pred_value, coefficients, feature_importance_df

#y_train.loc[y_train<=1]=0
#y_train.loc[y_train>=2]=1
#pred_value = np.zeros([X_test.shape[0]])
#pred, coef, f_df = run_model(X_train, y_train, X_test)
#pred_value += pred 

#test_pred_class = pd.cut(pred_value, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3])
#sample_submission["accuracy_group"] = test_pred_class.astype(int)
#sample_submission.to_csv('submission.csv', index=False)
#sample_submission["accuracy_group"].value_counts(normalize = True)

## average of regression output by CV truncation 

In [14]:
def run_model(X_train, y_train, X_test):
    n_folds=5
    skf=GroupKFold(n_splits = n_folds)
    coefficients = []
    models = []
    train_qwk_scores = []
    test_qwk_scores = []
    scores = []

    lgbm_params = {
    'objective': 'regression','metric': 'rmse',"tree_learner": "serial", 
    "max_depth" : 5, "boosting": 'gbdt', "num_leaves" : 13, "learning_rate" : 0.01,
}

    features_list = [i for i in X_train.columns if i != "installation_id"]
    feature_importance_df = pd.DataFrame(features_list, columns=["Feature"])
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train, X_train["installation_id"])):
        print("Fold "+str(i+1))
        optR = OptimizedRounder()
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]
        X_train2 = X_train2.drop(['installation_id'],axis=1)
    
        X_test2 = X_train.iloc[test_index,:]
        y_test2 = y_train.iloc[test_index]
        
        test2 = pd.concat([X_test2, y_test2], axis=1)
        #second_last_index = list(set(test2.groupby('installation_id').tail(2).index)- set(test2.groupby('installation_id').tail(1).index))
        #third_last_index = list(set(test2.groupby('installation_id').tail(3).index)- set(test2.groupby('installation_id').tail(2).index))
        #test_last2 = test2[test2.index.isin(second_last_index)]
        #test_last3 = test2[test2.index.isin(third_last_index)]
        test2 = test2.groupby('installation_id').tail(1)
        X_test2 = test2.drop(["accuracy_group", "installation_id"], axis=1)
        y_test2 = test2["accuracy_group"]
    
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_test2, y_test2, reference=lgb_train)
    
        clf = lgb.train(
        lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
        num_boost_round=10000,early_stopping_rounds=10,verbose_eval = 500,
    )
        
        train_predict = clf.predict(X_train2, num_iteration = clf.best_iteration)
        test_predict = clf.predict(X_test2, num_iteration = clf.best_iteration)
                
        optR.fit(train_predict.reshape(-1,), y_train2)
        tmp_coefficients = optR.coefficients()
        opt_train_preds = optR.predict(train_predict.reshape(-1, ), tmp_coefficients)
        train_qwk_score = qwk(y_train2, opt_train_preds)
        opt_test_preds = optR.predict(test_predict.reshape(-1, ), tmp_coefficients)
        test_qwk_score = qwk(y_test2, opt_test_preds)
        train_qwk_scores.append(train_qwk_score)
        test_qwk_scores.append(test_qwk_score)
        coefficients.append(tmp_coefficients)
        
        models.append(clf)
        feature_importance_df["Fold_"+str(i+1)] = clf.feature_importance()
    
    print("-----------------------------")
    print('train qwk list: ', train_qwk_scores, np.mean(train_qwk_scores))
    print('valid qwk list: ', test_qwk_scores, np.mean(test_qwk_scores))
    
    pred_value = np.zeros([X_test.shape[0]])
    avg_coefficients = np.mean(coefficients, axis=0)
    for model in models:
        pred_value += model.predict(X_test, num_iteration = model.best_iteration) / len(models)
        
    feature_importance_df["Average"] = np.mean(feature_importance_df.iloc[:,1:n_folds+1], axis=1)
    feature_importance_df["Std"] = np.std(feature_importance_df.iloc[:,1:n_folds+1], axis=1)
    feature_importance_df["Cv"] = feature_importance_df["Std"] / feature_importance_df["Average"]
    
    return pred_value, np.mean(train_qwk_scores), np.mean(test_qwk_scores), avg_coefficients, feature_importance_df

# regression
pred_value = np.zeros([X_test.shape[0]])
tr_score = []; va_score = []; coefficients = []
num = 1
for i in range(num):
    pred, train_score, valid_score, coef, f_df = run_model(X_train, y_train, X_test)
    pred_value += pred / num
    tr_score.append(train_score)
    va_score.append(valid_score)
    coefficients.append(coef)
print(np.mean(tr_score), np.mean(va_score))

test_coefficients = np.mean(coefficients, axis=0)
test_pred_class = pd.cut(pred_value, [-np.inf] + list(np.sort(test_coefficients)) + [np.inf], labels = [0, 1, 2, 3])
sample_submission["accuracy_group"] = test_pred_class.astype(int)
sample_submission.to_csv('submission.csv', index=False)
sample_submission["accuracy_group"].value_counts(normalize = True)

Fold 1
Training until validation scores don't improve for 10 rounds
[500]	training's rmse: 0.964455	valid_1's rmse: 1.07203
Early stopping, best iteration is:
[928]	training's rmse: 0.93035	valid_1's rmse: 1.06224
Fold 2
Training until validation scores don't improve for 10 rounds
[500]	training's rmse: 0.962328	valid_1's rmse: 1.03471
Early stopping, best iteration is:
[888]	training's rmse: 0.931365	valid_1's rmse: 1.02515
Fold 3
Training until validation scores don't improve for 10 rounds
[500]	training's rmse: 0.962729	valid_1's rmse: 1.01018
Early stopping, best iteration is:
[603]	training's rmse: 0.953066	valid_1's rmse: 1.00781
Fold 4
Training until validation scores don't improve for 10 rounds
[500]	training's rmse: 0.955698	valid_1's rmse: 1.09355
Early stopping, best iteration is:
[902]	training's rmse: 0.923934	valid_1's rmse: 1.08523
Fold 5
Training until validation scores don't improve for 10 rounds
[500]	training's rmse: 0.953263	valid_1's rmse: 1.06483
Early stopping, b

3    0.464
2    0.273
0    0.150
1    0.113
Name: accuracy_group, dtype: float64

## classification of correct or not

In [15]:
def classification1(new_train, new_test):
    X_train = new_train.drop(['accuracy_group'],axis=1) 
    X_train = pd.merge(X_train, train_labels[["game_session", "num_correct", "num_incorrect"]], on ="game_session")
    y_train = X_train.num_correct
    X_train = X_train.drop(['game_session', "num_correct", "num_incorrect"],axis=1) 
    lbl = preprocessing.LabelEncoder()
    lbl.fit(list(X_train["installation_id"]))
    X_train["installation_id"] = lbl.transform(list(X_train["installation_id"]))
    remove_features = [i for i in X_train.columns if "_4235" in i or i == "world_"+str(activities_world["NONE"]) or "bet_" in i]
    for i in X_train.columns:
        if X_train[i].std() == 0 and i not in remove_features:
            remove_features.append(i)
    X_train = X_train.drop(remove_features, axis=1)
    X_train = X_train[sorted(X_train.columns.tolist())]
    print(X_train.shape)

    X_test = new_test.drop(["installation_id","accuracy_group", "game_session"], axis=1)
    X_test = X_test.drop(remove_features, axis=1)
    X_test = X_test[sorted(X_test.columns.tolist())]

    n_folds=5
    skf=GroupKFold(n_splits = n_folds)
    coefficients = []
    models = []
    lgbm_params = {
    'objective': 'binary','eval_metric': 'auc', 
    "max_depth" : 5, "boosting": 'gbdt', "num_leaves" : 20, "learning_rate" : 0.01,
}

    valid_correct_num = pd.DataFrame(np.zeros([X_train.shape[0]]))
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train, X_train["installation_id"])):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]
        X_train2 = X_train2.drop(['installation_id'],axis=1)
    
        X_test2 = X_train.iloc[test_index,:]
        y_test2 = y_train.iloc[test_index]
        
        test2 = pd.concat([X_test2, y_test2], axis=1)
        X_test2 = test2.drop(["num_correct", "installation_id"], axis=1)
        y_test2 = test2["num_correct"]
    
        lgb_train = lgb.Dataset(X_train2, y_train2)
        lgb_eval = lgb.Dataset(X_test2, y_test2, reference=lgb_train)
        clf = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
            num_boost_round=10000,early_stopping_rounds=100,verbose_eval = 500,)
        train_predict = clf.predict(X_train2, num_iteration = clf.best_iteration)
        test_predict = clf.predict(X_test2, num_iteration = clf.best_iteration)
    
        train_predict = clf.predict(X_train2, num_iteration = clf.best_iteration)
        test_predict = clf.predict(X_test2, num_iteration = clf.best_iteration)
        
        models.append(clf)
        valid_correct_num.iloc[test_index] = test_predict.reshape(X_test2.shape[0], 1)
        
    print('Accuracy score = \t {}'.format(accuracy_score(y_train, np.round(valid_correct_num))))
    print('Precision score = \t {}'.format(precision_score(y_train, np.round(valid_correct_num))))
    print('Recall score =   \t {}'.format(recall_score(y_train, np.round(valid_correct_num))))
    print('F1 score =      \t {}'.format(f1_score(y_train, np.round(valid_correct_num))))
    print(confusion_matrix(y_train, np.round(valid_correct_num)))
    pred_value = np.zeros([X_test.shape[0]])
    for model in models:
        pred_value += model.predict(X_test, num_iteration = model.best_iteration) / len(models)
    return pred_value, valid_correct_num
#pred_value, valid_correct_num = classification1(new_train, new_test)

In [16]:
def regression2(new_train, new_test):
    X_train = new_train.drop(['accuracy_group'],axis=1) 
    X_train = pd.merge(X_train, train_labels[["game_session", "num_correct", "num_incorrect"]], on ="game_session")
    y_train = X_train.num_incorrect
    X_train = X_train.drop(['game_session', "num_correct", "num_incorrect"],axis=1) 
    lbl = preprocessing.LabelEncoder()
    lbl.fit(list(X_train["installation_id"]))
    X_train["installation_id"] = lbl.transform(list(X_train["installation_id"]))
    remove_features = [i for i in X_train.columns if "_4235" in i or i == "world_"+str(activities_world["NONE"]) or "bet_" in i]
    for i in X_train.columns:
        if X_train[i].std() == 0 and i not in remove_features:
            remove_features.append(i)
    X_train = X_train.drop(remove_features, axis=1)
    X_train = X_train[sorted(X_train.columns.tolist())]
    print(X_train.shape)

    X_test = new_test.drop(["installation_id","accuracy_group", "game_session"], axis=1)
    X_test = X_test.drop(remove_features, axis=1)
    X_test = X_test[sorted(X_test.columns.tolist())]

    n_folds=5
    skf=GroupKFold(n_splits = n_folds)
    coefficients = []
    models = []
    lgbm_params = {
        'objective': 'regression','metric': 'rmse',"tree_learner": "serial", 
    "max_depth" : 5, "boosting": 'gbdt', "num_leaves" : 13, "learning_rate" : 0.01,
    }

    valid_incorrect_num = pd.DataFrame(np.zeros([X_train.shape[0]]))
    for i , (train_index, test_index) in enumerate(skf.split(X_train, y_train, X_train["installation_id"])):
        print("Fold "+str(i+1))
        X_train2 = X_train.iloc[train_index,:]
        y_train2 = y_train.iloc[train_index]
    X_train2 = X_train2.drop(['installation_id'],axis=1)
    
    X_test2 = X_train.iloc[test_index,:]
    y_test2 = y_train.iloc[test_index]
        
    test2 = pd.concat([X_test2, y_test2], axis=1)
    X_test2 = test2.drop(["num_incorrect", "installation_id"], axis=1)
    y_test2 = test2["num_incorrect"]
    
    lgb_train = lgb.Dataset(X_train2, y_train2)
    lgb_eval = lgb.Dataset(X_test2, y_test2, reference=lgb_train)
    clf = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
        num_boost_round=10000,early_stopping_rounds=100,verbose_eval = 500,)
    train_predict = clf.predict(X_train2, num_iteration = clf.best_iteration)
    test_predict = clf.predict(X_test2, num_iteration = clf.best_iteration)
    
    train_predict = clf.predict(X_train2, num_iteration = clf.best_iteration)
    test_predict = clf.predict(X_test2, num_iteration = clf.best_iteration)
        
    models.append(clf)
    valid_incorrect_num.iloc[test_index] = test_predict.reshape(X_test2.shape[0], 1)
    
    pred_value_incorrect = np.zeros([X_test.shape[0]])
    for model in models:
        pred_value_incorrect += model.predict(X_test, num_iteration = model.best_iteration) / len(models)
    return pred_value_incorrect, valid_incorrect_num
#pred_value_incorrect, valid_incorrect_num = regression2(new_train, new_test)

In [17]:
#train_exp_accuracy = np.round(valid_correct_num) / (np.round(valid_correct_num) + valid_incorrect_num)
#test_exp_accuracy = np.round(pred_value) / (np.round(pred_value) + pred_value_incorrect)
#best_score = 0
#for i in range(10):
#    optR = OptimizedRounder()
#    optR.fit(np.array(train_exp_accuracy).reshape(-1,), new_train.accuracy_group, random_flg=True)
#    coefficients = optR.coefficients()
#    final_valid_pred = optR.predict(np.array(train_exp_accuracy).reshape(-1,), coefficients)
#    score = qwk(new_train.accuracy_group, final_valid_pred)
#    print(coefficients, score)
#    if score > best_score:
#        best_score = score
#        best_coefficients = coefficients
#final_test_pred = pd.cut(np.array(test_exp_accuracy).reshape(-1,), [-np.inf] + list(np.sort(best_coefficients)) + [np.inf], labels = [0, 1, 2, 3])
#sample_submission["accuracy_group"] = final_test_pred.astype(int)
#sample_submission.to_csv('submission.csv', index=False)
#sample_submission["accuracy_group"].value_counts(normalize = True)