Agenda
1. preparation
2. Model

In [1]:
import warnings
warnings.simplefilter('ignore')

import os
import gc
import re
from collections import Counter

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)
from tqdm.auto import tqdm
tqdm.pandas()

from gensim.models import Word2Vec
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error

import lightgbm as lgb
from xgboost import XGBRegressor
from sklearn.ensemble import HistGradientBoostingRegressor

# 1-1. load data

In [2]:
train_logs = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/train_logs.csv')
display(train_logs)
train_scores = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/train_scores.csv')
display(train_scores)
test_logs = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/test_logs.csv')
display(test_logs)

Unnamed: 0,id,event_id,down_time,up_time,action_time,activity,down_event,up_event,text_change,cursor_position,word_count
0,001519c8,1,4526,4557,31,Nonproduction,Leftclick,Leftclick,NoChange,0,0
1,001519c8,2,4558,4962,404,Nonproduction,Leftclick,Leftclick,NoChange,0,0
2,001519c8,3,106571,106571,0,Nonproduction,Shift,Shift,NoChange,0,0
3,001519c8,4,106686,106777,91,Input,q,q,q,1,1
4,001519c8,5,107196,107323,127,Input,q,q,q,2,1
...,...,...,...,...,...,...,...,...,...,...,...
8405893,fff05981,3615,2063944,2064440,496,Nonproduction,Leftclick,Leftclick,NoChange,1031,240
8405894,fff05981,3616,2064497,2064497,0,Nonproduction,Shift,Shift,NoChange,1031,240
8405895,fff05981,3617,2064657,2064765,108,Replace,q,q,q => q,1031,240
8405896,fff05981,3618,2069186,2069259,73,Nonproduction,Leftclick,Leftclick,NoChange,1028,240


Unnamed: 0,id,score
0,001519c8,3.5
1,0022f953,3.5
2,0042269b,6.0
3,0059420b,2.0
4,0075873a,4.0
...,...,...
2466,ffb8c745,3.5
2467,ffbef7e5,4.0
2468,ffccd6fd,1.5
2469,ffec5b38,5.0


Unnamed: 0,id,event_id,down_time,up_time,action_time,activity,down_event,up_event,text_change,cursor_position,word_count
0,0000aaaa,1,338433,338518,85,Input,Space,Space,,0,0
1,0000aaaa,2,760073,760160,87,Input,Space,Space,,1,0
2,2222bbbb,1,711956,712023,67,Input,q,q,q,0,1
3,2222bbbb,2,290502,290548,46,Input,q,q,q,1,1
4,4444cccc,1,635547,635641,94,Input,Space,Space,,0,0
5,4444cccc,2,184996,185052,56,Input,q,q,q,1,1


## 1-2. helper functions

### 1-2-1. const definition

In [3]:
# variables category
exp_key_column = "id"
obj_column = "score"

In [4]:
# count_elements
target_activities = ['Input', 'Remove/Cut', 'Nonproduction', 'Replace', 'Paste']
target_events = ['q', 'Space', 'Backspace', 'Shift', 'ArrowRight', 'Leftclick', 'ArrowLeft', '.', ',', 
          'ArrowDown', 'ArrowUp', 'Enter', 'CapsLock', "'", 'Delete', 'Unidentified']
target_text_changes = ['q', ' ', 'NoChange', '.', ',', '\n', "'", '"', '-', '?', ';', '=', '/', '\\', ':']
target_punctuations = ['"', '.', ',', "'", '-', ';', ':', '?', '!', '<', '>', '/',
                '@', '#', '$', '%', '^', '&', '*', '(', ')', '_', '+']

In [5]:
def element_counts(df: pd.DataFrame, key_colname: str, count_colname: str, count_elements: list, match: str):
    """
    This function counts each elements
    Parameters
    ----------
    df: DataFrame
    key_colname: aggregator
    count_colname: count columns
    count_elements: count value in this list
    match: exact or partital
        
    Returns
    -------
    d : pd.DataFrame
    """
    print(f"======================element_counts start({count_colname})!!======================")
    tmp_df = df.groupby(key_colname).agg({count_colname: list}).reset_index()
    display(tmp_df)
    ret = list()
    if match == "exact":
        for li in tqdm(tmp_df[count_colname].values):
            items = list(Counter(li).items())
            di = dict()
            for k in count_elements:
                di[k] = 0
            for item in items:
                k,v = item[0], item[1]
                if k in di: # this part counts disgnated part
                    di[k] = v
            ret.append(di)
        ret = pd.DataFrame(ret)
        display(ret)
        cols = [f"{count_colname}_{i}_count" for i in range(len(ret.columns))]
        ret.columns = cols
    elif match == "partitial":
        for li in tqdm(tmp_df['down_event'].values):
            cnt = 0
            items = list(Counter(li).items())
            for item in items:
                k, v = item[0], item[1]
                if k in count_elements:
                    cnt += v
            ret.append(cnt)
        ret = pd.DataFrame({'punct_cnt': ret})
        
    return ret
    
def get_input_words(df: pd.DataFrame):
    print("======================get_input_words start!!======================")
    tmp_df = df[(~df["text_change"].str.contains("=>"))&(df["text_change"] != "Nochange")].reset_index(drop= True)
    tmp_df = tmp_df.groupby(exp_key_column).agg({"text_change": list}).reset_index()
    # concat part
    tmp_df["text_change"] = tmp_df["text_change"].apply(lambda x: "".join(x))
    tmp_df['text_change'] = tmp_df['text_change'].apply(lambda x: re.findall(r'q+', x))
    
    tmp_df['input_word_count'] = tmp_df['text_change'].apply(len)
    display(tmp_df[["id","text_change"]])
    
    # calc part
    tmp_df['input_word_length_mean'] = tmp_df['text_change'].apply(lambda x: np.mean([len(i) for i in x] if len(x) > 0 else 0))
    tmp_df['input_word_length_max'] = tmp_df['text_change'].apply(lambda x: np.max([len(i) for i in x] if len(x) > 0 else 0))
    tmp_df['input_word_length_std'] = tmp_df['text_change'].apply(lambda x: np.std([len(i) for i in x] if len(x) > 0 else 0))
    tmp_df.drop(['text_change'], axis=1, inplace=True)
    return tmp_df

## 1-3. make features

In [6]:
def make_feats(df: pd.DataFrame):
    print("======================make_feats start!!======================")
    feats = pd.DataFrame({exp_key_column: df[exp_key_column].unique().tolist()})
    # time shift
    target_col = "up_time"
    df[f'{target_col}_shift1'] = df.groupby(exp_key_column)[target_col].shift(1) #(going under ↓)
    df['action_time_gap'] = df['down_time'] - df[f'{target_col}_shift1']
    df.drop(f'{target_col}_shift1', axis=1, inplace=True)
    
    # cursor position shift
    target_col = "cursor_position"
    df[f'{target_col}_shift1'] = df.groupby(exp_key_column)[target_col].shift(1)
    df[f'{target_col}_change'] = np.abs(df[target_col] - df[f'{target_col}_shift1'])
    df.drop(f'{target_col}_shift1', axis=1, inplace=True)
    
    # word count shift
    target_col = "word_count"
    df[f'{target_col}_shift1'] = df.groupby(exp_key_column)[target_col].shift(1)
    df[f'{target_col}_change'] = np.abs(df[target_col] - df[f'{target_col}_shift1'])
    df.drop(f'{target_col}_shift1', axis=1, inplace=True)
    
    for item in tqdm([
        ('event_id', ['max']),
        ('up_time', ['max']),
        ('action_time', ['sum', 'max', 'mean', 'std']),
        ('activity', ['nunique']),
        ('down_event', ['nunique']),
        ('up_event', ['nunique']),
        ('text_change', ['nunique']),
        ('cursor_position', ['nunique', 'max', 'mean']),
        ('word_count', ['nunique', 'max', 'mean']),
        ('action_time_gap', ['max', 'min', 'mean', 'std', 'sum']),
        ('cursor_position_change', ['max', 'mean', 'std', 'sum']),
        ('word_count_change', ['max', 'mean', 'std', 'sum'])
    ]):
        colname, methods = item[0], item[1]
        for method in methods:
            print(f"calc: {method} of {colname}")
            tmp_df = df.groupby([exp_key_column]).agg({colname: method}).reset_index().rename(columns = {colname: f"{colname}_{method}"})
            feats = feats.merge(tmp_df, on = exp_key_column, how= "left")
    
    # counts
#     element_counts(df: pd.DataFrame, key_colname = exp_key_column: str, count_colname: str, count_elements: list, match: str):
    # tmp_df = activity_counts(df)
    tmp_df = element_counts(df, exp_key_column, "activity", target_activities, "exact")
    feats = pd.concat([feats, tmp_df], axis=1)
    
#     tmp_df = event_counts(df, 'down_event')
    tmp_df = element_counts(df, exp_key_column, "down_event", target_events, "exact")
    feats = pd.concat([feats, tmp_df], axis=1)
    
#     tmp_df = event_counts(df, 'up_event')
    tmp_df = element_counts(df, exp_key_column, "up_event", target_events, "exact")
    feats = pd.concat([feats, tmp_df], axis=1)
    
#     tmp_df = text_change_counts(df)
    tmp_df = element_counts(df, exp_key_column, "text_change", target_text_changes, "exact")
    feats = pd.concat([feats, tmp_df], axis=1)
    
#     tmp_df = match_punctuations(df)
    tmp_df = element_counts(df, exp_key_column, "down_event", target_punctuations, "partitial")
    feats = pd.concat([feats, tmp_df], axis=1)
    
    
    # input words
    tmp_df = get_input_words(df)
    feats = pd.merge(feats, tmp_df, on='id', how='left')
    
    # compare feats
    feats['word_time_ratio'] = feats['word_count_max'] / feats['up_time_max']
    feats['word_event_ratio'] = feats['word_count_max'] / feats['event_id_max']
    feats['event_time_ratio'] = feats['event_id_max']  / feats['up_time_max']
    feats['idle_time_ratio'] = feats['action_time_gap_sum'] / feats['up_time_max']
    
    return feats
    

In [7]:
train_feats = make_feats(train_logs)



  0%|          | 0/12 [00:00<?, ?it/s]

calc: max of event_id
calc: max of up_time
calc: sum of action_time
calc: max of action_time
calc: mean of action_time
calc: std of action_time
calc: nunique of activity
calc: nunique of down_event
calc: nunique of up_event
calc: nunique of text_change
calc: nunique of cursor_position
calc: max of cursor_position
calc: mean of cursor_position
calc: nunique of word_count
calc: max of word_count
calc: mean of word_count
calc: max of action_time_gap
calc: min of action_time_gap
calc: mean of action_time_gap
calc: std of action_time_gap
calc: sum of action_time_gap
calc: max of cursor_position_change
calc: mean of cursor_position_change
calc: std of cursor_position_change
calc: sum of cursor_position_change
calc: max of word_count_change
calc: mean of word_count_change
calc: std of word_count_change
calc: sum of word_count_change


Unnamed: 0,id,activity
0,001519c8,"[Nonproduction, Nonproduction, Nonproduction, ..."
1,0022f953,"[Nonproduction, Nonproduction, Input, Input, I..."
2,0042269b,"[Nonproduction, Nonproduction, Input, Input, I..."
3,0059420b,"[Nonproduction, Nonproduction, Nonproduction, ..."
4,0075873a,"[Nonproduction, Nonproduction, Input, Input, I..."
...,...,...
2466,ffb8c745,"[Nonproduction, Nonproduction, Nonproduction, ..."
2467,ffbef7e5,"[Nonproduction, Nonproduction, Nonproduction, ..."
2468,ffccd6fd,"[Nonproduction, Nonproduction, Input, Input, I..."
2469,ffec5b38,"[Nonproduction, Nonproduction, Input, Input, I..."


  0%|          | 0/2471 [00:00<?, ?it/s]

Unnamed: 0,Input,Remove/Cut,Nonproduction,Replace,Paste
0,2010,417,120,7,0
1,1938,260,254,1,1
2,3515,439,175,7,0
3,1304,151,99,1,1
4,1942,517,72,0,0
...,...,...,...,...,...
2466,3588,960,189,2,0
2467,2395,60,148,1,0
2468,2849,88,126,0,0
2469,2895,276,71,0,0




Unnamed: 0,id,down_event
0,001519c8,"[Leftclick, Leftclick, Shift, q, q, q, q, q, q..."
1,0022f953,"[Leftclick, Shift, q, q, q, q, Space, q, q, Sp..."
2,0042269b,"[Leftclick, Shift, q, q, q, q, q, q, q, Space,..."
3,0059420b,"[Leftclick, Leftclick, Shift, Shift, Shift, Sh..."
4,0075873a,"[Leftclick, Shift, q, q, q, q, q, q, q, q, q, ..."
...,...,...
2466,ffb8c745,"[Leftclick, Tab, Leftclick, Space, Space, Spac..."
2467,ffbef7e5,"[Leftclick, Leftclick, Shift, q, q, q, q, Spac..."
2468,ffccd6fd,"[Leftclick, Leftclick, q, q, q, q, q, q, Space..."
2469,ffec5b38,"[Leftclick, Shift, q, q, q, q, q, q, q, q, Bac..."


  0%|          | 0/2471 [00:00<?, ?it/s]

Unnamed: 0,q,Space,Backspace,Shift,ArrowRight,Leftclick,ArrowLeft,.,",",ArrowDown,ArrowUp,Enter,CapsLock,',Delete,Unidentified
0,1619,357,417,27,2,92,2,21,12,0,0,4,0,3,0,0
1,1490,391,260,97,46,56,49,15,21,3,2,6,0,3,0,0
2,2904,552,439,39,6,129,0,21,23,0,0,17,0,0,0,0
3,1038,243,152,68,0,18,0,13,3,0,0,3,2,2,0,0
4,1541,324,517,39,0,33,0,23,24,0,0,10,0,17,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,2844,651,960,164,0,24,0,43,32,0,0,7,0,7,0,0
2467,1874,447,60,106,0,36,0,31,24,0,0,12,0,8,0,0
2468,969,1861,88,0,29,9,53,5,2,29,0,12,6,0,0,0
2469,2361,457,276,52,0,14,0,31,27,0,0,6,2,4,0,0




Unnamed: 0,id,up_event
0,001519c8,"[Leftclick, Leftclick, Shift, q, q, q, q, q, q..."
1,0022f953,"[Leftclick, Shift, q, q, q, q, Space, q, q, Sp..."
2,0042269b,"[Leftclick, Shift, q, q, q, q, q, q, q, Space,..."
3,0059420b,"[Leftclick, Leftclick, Shift, Shift, Shift, Sh..."
4,0075873a,"[Leftclick, Shift, q, q, q, q, q, q, q, q, q, ..."
...,...,...
2466,ffb8c745,"[Leftclick, Tab, Leftclick, Space, Space, Spac..."
2467,ffbef7e5,"[Leftclick, Leftclick, Shift, q, q, q, q, Spac..."
2468,ffccd6fd,"[Leftclick, Leftclick, q, q, q, q, q, q, Space..."
2469,ffec5b38,"[Leftclick, Shift, q, q, q, q, q, q, q, q, Bac..."


  0%|          | 0/2471 [00:00<?, ?it/s]

Unnamed: 0,q,Space,Backspace,Shift,ArrowRight,Leftclick,ArrowLeft,.,",",ArrowDown,ArrowUp,Enter,CapsLock,',Delete,Unidentified
0,1619,357,417,27,2,92,2,21,12,0,0,4,0,3,0,0
1,1490,391,260,97,46,56,49,15,21,3,2,6,0,3,0,0
2,2899,552,439,39,6,129,0,21,23,0,0,17,0,0,0,0
3,1038,243,152,68,0,18,0,13,3,0,0,3,2,2,0,0
4,1541,324,517,39,0,33,0,23,24,0,0,10,0,17,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,2844,651,960,164,0,24,0,43,32,0,0,7,0,7,0,0
2467,1874,447,60,106,0,36,0,31,24,0,0,12,0,8,0,0
2468,969,1861,88,0,29,9,53,5,2,29,0,12,6,0,0,0
2469,2361,457,276,52,0,14,0,31,27,0,0,6,2,4,0,0




Unnamed: 0,id,text_change
0,001519c8,"[NoChange, NoChange, NoChange, q, q, q, q, q, ..."
1,0022f953,"[NoChange, NoChange, q, q, q, q, , q, q, , N..."
2,0042269b,"[NoChange, NoChange, q, q, q, q, q, q, q, , q..."
3,0059420b,"[NoChange, NoChange, NoChange, NoChange, NoCha..."
4,0075873a,"[NoChange, NoChange, q, q, q, q, q, q, q, q, q..."
...,...,...
2466,ffb8c745,"[NoChange, NoChange, NoChange, , , , , , ..."
2467,ffbef7e5,"[NoChange, NoChange, NoChange, q, q, q, q, , ..."
2468,ffccd6fd,"[NoChange, NoChange, q, q, q, q, q, q, , q, q..."
2469,ffec5b38,"[NoChange, NoChange, q, q, q, q, q, q, q, q, q..."


  0%|          | 0/2471 [00:00<?, ?it/s]

Unnamed: 0,q,Unnamed: 2,NoChange,.,",",\n,',"""",-,?,;,=,/,\,:
0,1940,436,120,28,14,4,5,0,0,0,1,0,0,0,0
1,1698,432,254,18,24,7,4,6,6,3,0,0,0,0,0
2,3257,615,175,23,26,23,0,2,1,0,0,4,0,0,0
3,1146,281,99,13,3,4,3,0,0,0,0,5,0,0,0
4,1964,397,72,32,25,12,25,0,0,2,0,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,3605,813,189,59,42,11,8,6,1,0,0,0,0,0,2
2467,1920,457,148,33,24,12,9,0,0,0,0,0,0,0,0
2468,1031,1879,126,6,3,18,0,0,0,0,0,0,0,0,0
2469,2593,490,71,34,29,8,4,6,1,0,2,0,0,0,0




Unnamed: 0,id,down_event
0,001519c8,"[Leftclick, Leftclick, Shift, q, q, q, q, q, q..."
1,0022f953,"[Leftclick, Shift, q, q, q, q, Space, q, q, Sp..."
2,0042269b,"[Leftclick, Shift, q, q, q, q, q, q, q, Space,..."
3,0059420b,"[Leftclick, Leftclick, Shift, Shift, Shift, Sh..."
4,0075873a,"[Leftclick, Shift, q, q, q, q, q, q, q, q, q, ..."
...,...,...
2466,ffb8c745,"[Leftclick, Tab, Leftclick, Space, Space, Spac..."
2467,ffbef7e5,"[Leftclick, Leftclick, Shift, q, q, q, q, Spac..."
2468,ffccd6fd,"[Leftclick, Leftclick, q, q, q, q, q, q, Space..."
2469,ffec5b38,"[Leftclick, Shift, q, q, q, q, q, q, q, q, Bac..."


  0%|          | 0/2471 [00:00<?, ?it/s]



Unnamed: 0,id,text_change
0,001519c8,"[qqqqqq, qqq, qqqqqqq, qqqqqq, qq, qqqq, qqqqq..."
1,0022f953,"[qqqq, qq, qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq, ..."
2,0042269b,"[qqqqqqq, qqq, qqqq, qqqq, qqqq, qq, qqqqqqqq,..."
3,0059420b,"[qqqq, qq, qqqqqqq, qqqqqq, q, q, qqq, qqqqqqq..."
4,0075873a,"[qqqqqqqqqqq, qq, q, qq, qqqqq, qq, qqqqqqqqqq..."
...,...,...
2466,ffb8c745,"[qq, qqqqq, q, qqqqqqq, qqq, q, qqqqq, q, qqqq..."
2467,ffbef7e5,"[qqqq, qqqqqq, qqqqq, qq, qqqqq, qqqqq, qq, qq..."
2468,ffccd6fd,"[qqqqqq, qqqq, q, qqqqqqq, qqqqqqqq, q, qq, qq..."
2469,ffec5b38,"[qqqqqqqqqqqq, qqqqqqq, qqqqqq, qqqq, qqqqq, q..."


In [8]:
test_feats = make_feats(test_logs)



  0%|          | 0/12 [00:00<?, ?it/s]

calc: max of event_id
calc: max of up_time
calc: sum of action_time
calc: max of action_time
calc: mean of action_time
calc: std of action_time
calc: nunique of activity
calc: nunique of down_event
calc: nunique of up_event
calc: nunique of text_change
calc: nunique of cursor_position
calc: max of cursor_position
calc: mean of cursor_position
calc: nunique of word_count
calc: max of word_count
calc: mean of word_count
calc: max of action_time_gap
calc: min of action_time_gap
calc: mean of action_time_gap
calc: std of action_time_gap
calc: sum of action_time_gap
calc: max of cursor_position_change
calc: mean of cursor_position_change
calc: std of cursor_position_change
calc: sum of cursor_position_change
calc: max of word_count_change
calc: mean of word_count_change
calc: std of word_count_change
calc: sum of word_count_change


Unnamed: 0,id,activity
0,0000aaaa,"[Input, Input]"
1,2222bbbb,"[Input, Input]"
2,4444cccc,"[Input, Input]"


  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0,Input,Remove/Cut,Nonproduction,Replace,Paste
0,2,0,0,0,0
1,2,0,0,0,0
2,2,0,0,0,0




Unnamed: 0,id,down_event
0,0000aaaa,"[Space, Space]"
1,2222bbbb,"[q, q]"
2,4444cccc,"[Space, q]"


  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0,q,Space,Backspace,Shift,ArrowRight,Leftclick,ArrowLeft,.,",",ArrowDown,ArrowUp,Enter,CapsLock,',Delete,Unidentified
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0




Unnamed: 0,id,up_event
0,0000aaaa,"[Space, Space]"
1,2222bbbb,"[q, q]"
2,4444cccc,"[Space, q]"


  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0,q,Space,Backspace,Shift,ArrowRight,Leftclick,ArrowLeft,.,",",ArrowDown,ArrowUp,Enter,CapsLock,',Delete,Unidentified
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0




Unnamed: 0,id,text_change
0,0000aaaa,"[ , ]"
1,2222bbbb,"[q, q]"
2,4444cccc,"[ , q]"


  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0,q,Unnamed: 2,NoChange,.,",",\n,',"""",-,?,;,=,/,\,:
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0




Unnamed: 0,id,down_event
0,0000aaaa,"[Space, Space]"
1,2222bbbb,"[q, q]"
2,4444cccc,"[Space, q]"


  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,id,text_change
0,0000aaaa,[]
1,2222bbbb,[qq]
2,4444cccc,[q]


In [9]:
train_feats = train_feats.merge(train_scores, on='id', how='left') # join object variance

In [10]:
display(train_feats)
display(test_feats)

Unnamed: 0,id,event_id_max,up_time_max,action_time_sum,action_time_max,action_time_mean,action_time_std,activity_nunique,down_event_nunique,up_event_nunique,text_change_nunique,cursor_position_nunique,cursor_position_max,cursor_position_mean,word_count_nunique,word_count_max,word_count_mean,action_time_gap_max,action_time_gap_min,action_time_gap_mean,action_time_gap_std,action_time_gap_sum,cursor_position_change_max,cursor_position_change_mean,cursor_position_change_std,cursor_position_change_sum,word_count_change_max,word_count_change_mean,word_count_change_std,word_count_change_sum,activity_0_count,activity_1_count,activity_2_count,activity_3_count,activity_4_count,down_event_0_count,down_event_1_count,down_event_2_count,down_event_3_count,down_event_4_count,down_event_5_count,down_event_6_count,down_event_7_count,down_event_8_count,down_event_9_count,down_event_10_count,down_event_11_count,down_event_12_count,down_event_13_count,down_event_14_count,down_event_15_count,up_event_0_count,up_event_1_count,up_event_2_count,up_event_3_count,up_event_4_count,up_event_5_count,up_event_6_count,up_event_7_count,up_event_8_count,up_event_9_count,up_event_10_count,up_event_11_count,up_event_12_count,up_event_13_count,up_event_14_count,up_event_15_count,text_change_0_count,text_change_1_count,text_change_2_count,text_change_3_count,text_change_4_count,text_change_5_count,text_change_6_count,text_change_7_count,text_change_8_count,text_change_9_count,text_change_10_count,text_change_11_count,text_change_12_count,text_change_13_count,text_change_14_count,punct_cnt,input_word_count,input_word_length_mean,input_word_length_max,input_word_length_std,word_time_ratio,word_event_ratio,event_time_ratio,idle_time_ratio,score
0,001519c8,2557,1801969,297243,2259,116.246774,91.797374,7,12,12,17,1469,1539,711.163473,257,256,128.116152,154136.0,-142.0,586.932707,4294.022274,1500200.0,1350.0,4.159624,43.180116,10632.0,2.0,0.172535,0.381013,441.0,2010,417,120,7,0,1619,357,417,27,2,92,2,21,12,0,0,4,0,3,0,0,1619,357,417,27,2,92,2,21,12,0,0,4,0,3,0,0,1940,436,120,28,14,4,5,0,0,0,1,0,0,0,0,37,377,5.169761,20,3.346931,0.000142,0.100117,0.001419,0.832534,3.5
1,0022f953,2454,1788969,275391,1758,112.221271,55.431189,5,17,17,12,1416,1676,776.205786,324,323,182.714751,145899.0,-166.0,604.547493,4897.303641,1482955.0,1581.0,9.819405,84.785626,24087.0,1.0,0.170404,0.376064,418.0,1938,260,254,1,1,1490,391,260,97,46,56,49,15,21,3,2,6,0,3,0,0,1490,391,260,97,46,56,49,15,21,3,2,6,0,3,0,0,1698,432,254,18,24,7,4,6,6,3,0,0,0,0,0,53,401,4.234414,33,3.062917,0.000181,0.131622,0.001372,0.828944,3.5
2,0042269b,4136,1771669,421201,3005,101.837766,82.383766,4,13,18,19,1649,2291,731.611702,405,404,194.772727,153886.0,-250.0,325.520435,3937.359025,1346027.0,1862.0,6.531318,71.786451,27007.0,28.0,0.167836,0.644564,694.0,3515,439,175,7,0,2904,552,439,39,6,129,0,21,23,0,0,17,0,0,0,0,2899,552,439,39,6,129,0,21,23,0,0,17,0,0,0,0,3257,615,175,23,26,23,0,2,1,0,0,4,0,0,0,47,639,5.344288,25,3.372135,0.000228,0.097679,0.002335,0.759751,6.0
3,0059420b,1556,1404469,189596,806,121.848329,113.768226,5,15,15,10,1048,1047,542.537275,207,206,103.618895,101690.0,-516.0,754.648232,4242.152639,1173478.0,357.0,1.457878,9.920533,2267.0,1.0,0.181350,0.385432,282.0,1304,151,99,1,1,1038,243,152,68,0,18,0,13,3,0,0,3,2,2,0,0,1038,243,152,68,0,18,0,13,3,0,0,3,2,2,0,0,1146,281,99,13,3,4,3,0,0,0,0,5,0,0,0,18,255,4.537255,15,2.867940,0.000147,0.132391,0.001108,0.835531,2.0
4,0075873a,2531,1662472,313702,701,123.943896,62.082013,3,11,11,9,1197,1402,600.050968,253,252,125.082971,110688.0,-158.0,502.094862,3896.209237,1270300.0,643.0,2.803953,24.251326,7094.0,1.0,0.168379,0.374277,426.0,1942,517,72,0,0,1541,324,517,39,0,33,0,23,24,0,0,10,0,17,0,0,1541,324,517,39,0,33,0,23,24,0,0,10,0,17,0,0,1964,397,72,32,25,12,25,0,0,2,0,2,0,0,0,66,431,4.556845,14,2.783927,0.000152,0.099565,0.001522,0.764103,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,ffb8c745,4739,1791649,499670,3323,105.437856,63.622575,4,13,13,13,1484,1634,774.000633,462,461,256.353661,128570.0,-117.0,267.942592,3458.823757,1269512.0,1262.0,1.990713,27.213406,9432.0,170.0,0.204095,2.495495,967.0,3588,960,189,2,0,2844,651,960,164,0,24,0,43,32,0,0,7,0,7,0,0,2844,651,960,164,0,24,0,43,32,0,0,7,0,7,0,0,3605,813,189,59,42,11,8,6,1,0,0,0,0,0,2,88,741,4.869096,20,2.977718,0.000257,0.097278,0.002645,0.708572,3.5
2467,ffbef7e5,2604,1799174,214221,1144,82.266129,36.178818,4,11,11,8,1808,1877,1022.078725,439,438,223.013057,267869.0,-64.0,600.545909,5630.628933,1563221.0,1124.0,2.820207,35.614696,7341.0,1.0,0.177488,0.382154,462.0,2395,60,148,1,0,1874,447,60,106,0,36,0,31,24,0,0,12,0,8,0,0,1874,447,60,106,0,36,0,31,24,0,0,12,0,8,0,0,1920,457,148,33,24,12,9,0,0,0,0,0,0,0,0,63,473,4.059197,13,2.221528,0.000243,0.168203,0.001447,0.868855,4.0
2468,ffccd6fd,3063,1959363,231580,564,75.605615,63.494975,3,11,11,6,2759,2761,1309.085537,202,201,157.589292,229804.0,-87.0,556.597322,5398.118769,1704301.0,427.0,1.344546,8.509608,4117.0,1.0,0.073481,0.260968,225.0,2849,88,126,0,0,969,1861,88,0,29,9,53,5,2,29,0,12,6,0,0,0,969,1861,88,0,29,9,53,5,2,29,0,12,6,0,0,0,1031,1879,126,6,3,18,0,0,0,0,0,0,0,0,0,7,232,4.443966,15,2.693600,0.000103,0.065622,0.001563,0.869824,1.5
2469,ffec5b38,3242,1508504,289439,1388,89.277915,54.515788,3,15,15,13,2106,2133,1192.640962,414,413,205.917027,127733.0,-132.0,370.003085,3462.066161,1199180.0,563.0,1.814563,16.147617,5881.0,9.0,0.163838,0.401387,531.0,2895,276,71,0,0,2361,457,276,52,0,14,0,31,27,0,0,6,2,4,0,0,2361,457,276,52,0,14,0,31,27,0,0,6,2,4,0,0,2593,490,71,34,29,8,4,6,1,0,2,0,0,0,0,70,512,5.169922,24,3.419903,0.000274,0.127390,0.002149,0.794947,5.0


Unnamed: 0,id,event_id_max,up_time_max,action_time_sum,action_time_max,action_time_mean,action_time_std,activity_nunique,down_event_nunique,up_event_nunique,text_change_nunique,cursor_position_nunique,cursor_position_max,cursor_position_mean,word_count_nunique,word_count_max,word_count_mean,action_time_gap_max,action_time_gap_min,action_time_gap_mean,action_time_gap_std,action_time_gap_sum,cursor_position_change_max,cursor_position_change_mean,cursor_position_change_std,cursor_position_change_sum,word_count_change_max,word_count_change_mean,word_count_change_std,word_count_change_sum,activity_0_count,activity_1_count,activity_2_count,activity_3_count,activity_4_count,down_event_0_count,down_event_1_count,down_event_2_count,down_event_3_count,down_event_4_count,down_event_5_count,down_event_6_count,down_event_7_count,down_event_8_count,down_event_9_count,down_event_10_count,down_event_11_count,down_event_12_count,down_event_13_count,down_event_14_count,down_event_15_count,up_event_0_count,up_event_1_count,up_event_2_count,up_event_3_count,up_event_4_count,up_event_5_count,up_event_6_count,up_event_7_count,up_event_8_count,up_event_9_count,up_event_10_count,up_event_11_count,up_event_12_count,up_event_13_count,up_event_14_count,up_event_15_count,text_change_0_count,text_change_1_count,text_change_2_count,text_change_3_count,text_change_4_count,text_change_5_count,text_change_6_count,text_change_7_count,text_change_8_count,text_change_9_count,text_change_10_count,text_change_11_count,text_change_12_count,text_change_13_count,text_change_14_count,punct_cnt,input_word_count,input_word_length_mean,input_word_length_max,input_word_length_std,word_time_ratio,word_event_ratio,event_time_ratio,idle_time_ratio
0,0000aaaa,2,760160,172,87,86.0,1.414214,1,1,1,1,2,1,0.5,1,0,0.0,421555.0,421555.0,421555.0,,421555.0,1.0,1.0,,1.0,0.0,0.0,,0.0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0.0,3e-06,0.554561
1,2222bbbb,2,712023,113,67,56.5,14.849242,1,1,1,1,2,1,0.5,1,1,1.0,-421521.0,-421521.0,-421521.0,,-421521.0,1.0,1.0,,1.0,0.0,0.0,,0.0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2.0,2,0.0,1e-06,0.5,3e-06,-0.592005
2,4444cccc,2,635641,150,94,75.0,26.870058,1,2,2,2,2,1,0.5,2,1,0.5,-450645.0,-450645.0,-450645.0,,-450645.0,1.0,1.0,,1.0,1.0,1.0,,1.0,2,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1.0,1,0.0,2e-06,0.5,3e-06,-0.708962


# 2. Model

## 2-1. prepartation

### 2-1-1. package import

In [11]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

### 2-1-2 . data split into X and Y

In [12]:
Y = train_feats[obj_column]
X = train_feats.drop([exp_key_column , obj_column] , axis=1)

### 2-1-3. metric function& predict function

In [13]:
metric = "rsme"

In [14]:
def classify_predict(predict):
    res = 0
    if predict <= 0.25:
        res = 0
    elif 0.25 < predict and predict <= 0.75:
        res = 0.5
    elif 0.75 < predict and predict <= 1.25:
        res = 1.0
    elif 1.25 < predict and predict <= 1.75:
        res = 1.5
    elif 1.75 < predict and predict <= 2.25:
        res = 2.0
    elif 2.25 < predict and predict <= 2.75:
        res = 2.5
    elif 2.75 < predict and predict <= 3.25:
        res = 3.0
    elif 3.25 < predict and predict <= 3.75:
        res = 3.5
    elif 3.75 < predict and predict <= 4.25:
        res = 4.0
    elif 4.25 < predict and predict <= 4.75:
        res = 4.5
    elif 4.75 < predict and predict <= 5.25:
        res = 5.0
    elif 5.25 < predict and predict <= 5.75:
        res = 5.5
    else:
        res = 6.0
    return res
        

In [15]:
classify_predict(4.8)

5.0

In [16]:
def classify_predict_vectorize(lis):
    res = np.vectorize(classify_predict)(lis)
    return res

## 2-2. AutoML

### 2-2-1. TPOT

In [17]:
# !pip install TPOT

In [18]:
# from tpot import TPOTRegressor

In [19]:
# tpot = TPOTRegressor(scoring='neg_mean_absolute_error',
#                      max_time_mins = 100,
#                      generations=10,
#                      population_size=2,
#                      random_state=42,
#                      verbosity=2,
#                      n_jobs=-1,
#                      memory = "auto"
#                     )
# tpot.fit(X,Y)
# tpot.fitted_pipeline_

In [20]:
# tpot

In [21]:
# def calculate_scores(true, pred):
#     """全ての評価指標を計算する

#     Parameters
#     ----------
#     true (np.array)       : 実測値
#     pred (np.array)       : 予測値

#     Returns
#     -------
#     scores (pd.DataFrame) : 各評価指標を纏めた結果

#     """
#     scores = {}
#     scores = np.sqrt(mean_squared_error(true, pred))
#     return scores

# scores = calculate_scores(Y, tpot.predict(X))
# print(scores)

In [22]:
# tpot_dict = {}
# tpot_dict["model"] = tpot.fitted_pipeline_

### 2-2-2. pycaret

In [23]:
# !pip install pycaret==2.0
# # !pip install pycaret--no-deps

In [24]:
# from pycaret.regression import *

In [25]:
# data_pycaret = X.join(Y)
# exp = setup(data = data_pycaret, target = obj_column,train_size=0.7,data_split_shuffle=True,session_id=2)
# compare_models()

## 2-2. study by Optuna
find best parameters of each models

In [26]:
models = {}
n_trials = 10
n_splits = 10

### 2-2-1. lgb

In [27]:
def lgb_objective(trial,data=X,target=Y):
    
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2,random_state=42)
    param = {
        'metric': 'rmse', 
        'random_state': 42,
        'n_estimators': trial.suggest_int('n_estimators', 1000,30000),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_categorical('learning_rate', [0.006,0.008,0.01,0.014,0.017,0.02]),
        'max_depth': trial.suggest_int('max_depth', 1 , 100),
        'num_leaves' : trial.suggest_int('num_leaves', 1, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 300),
        'cat_smooth' : trial.suggest_int('min_data_per_groups', 1, 100)
    }
    model = lgb.LGBMRegressor(**param)  
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=100,verbose=False)
    
    preds = model.predict(test_x)
    preds = classify_predict_vectorize(preds)
    
    rmse = mean_squared_error(test_y, preds,squared=False)
    
    return rmse

In [28]:
lgb_dict = {}
lgb_study = optuna.create_study(direction='minimize')
lgb_study.optimize(lgb_objective, n_trials=n_trials)
print('Number of finished trials:', len(lgb_study.trials))
print('Best trial:', lgb_study.best_trial.params)
lgb_param = lgb_study.best_params 
lgb_dict["model"] = lgb.LGBMRegressor(**lgb_param)  

[I 2023-10-10 13:53:28,212] A new study created in memory with name: no-name-b9a22511-d966-407a-b5e7-ac011bacbfc7
[I 2023-10-10 13:53:32,171] Trial 0 finished with value: 0.5916079783099616 and parameters: {'n_estimators': 29279, 'reg_alpha': 1.0315755694877324, 'reg_lambda': 0.016092426233074962, 'colsample_bytree': 0.3, 'subsample': 0.5, 'learning_rate': 0.017, 'max_depth': 83, 'num_leaves': 200, 'min_child_samples': 56, 'min_data_per_groups': 26}. Best is trial 0 with value: 0.5916079783099616.
[I 2023-10-10 13:53:40,786] Trial 1 finished with value: 0.6026037778933294 and parameters: {'n_estimators': 22914, 'reg_alpha': 0.2922090388471696, 'reg_lambda': 0.0020700899576693767, 'colsample_bytree': 0.7, 'subsample': 0.5, 'learning_rate': 0.01, 'max_depth': 53, 'num_leaves': 775, 'min_child_samples': 42, 'min_data_per_groups': 99}. Best is trial 0 with value: 0.5916079783099616.
[I 2023-10-10 13:53:48,556] Trial 2 finished with value: 0.5881833013267174 and parameters: {'n_estimators':

Number of finished trials: 10
Best trial: {'n_estimators': 11152, 'reg_alpha': 0.07001259284375175, 'reg_lambda': 0.03933650706001696, 'colsample_bytree': 0.6, 'subsample': 0.5, 'learning_rate': 0.006, 'max_depth': 76, 'num_leaves': 236, 'min_child_samples': 69, 'min_data_per_groups': 54}


### 2-2-2. xgb

In [29]:
def xgb_objective(trial,data=X,target=Y):
    
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2,random_state=42)
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)
    }
    model = XGBRegressor(**param)  
    
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=100,verbose=False)
    
    preds = model.predict(test_x)
    preds = classify_predict_vectorize(preds)
    
    rmse = mean_squared_error(test_y, preds,squared=False)
    
    return rmse

In [30]:
xgb_dict = {}
xgb_study = optuna.create_study(direction='minimize')
xgb_study.optimize(xgb_objective, n_trials=n_trials)
print('Number of finished trials:', len(xgb_study.trials))
print('Best trial:', xgb_study.best_trial.params)
xgb_params=xgb_study.best_params 
xgb_dict["model"] = XGBRegressor(**xgb_params)

[I 2023-10-10 13:55:19,438] A new study created in memory with name: no-name-dd2139f0-5f8a-4795-ad21-869f5bd76687
[I 2023-10-10 13:55:20,339] Trial 0 finished with value: 0.6893856143353481 and parameters: {'max_depth': 5, 'learning_rate': 0.6818916163931741, 'n_estimators': 321, 'min_child_weight': 5, 'gamma': 0.40897570161104946, 'subsample': 0.34758771074618183, 'colsample_bytree': 0.4821843783148029, 'reg_alpha': 0.9043121154405482, 'reg_lambda': 0.6135075185035503, 'random_state': 999}. Best is trial 0 with value: 0.6893856143353481.
[I 2023-10-10 13:55:22,685] Trial 1 finished with value: 0.7296048466966433 and parameters: {'max_depth': 7, 'learning_rate': 0.8034409817778724, 'n_estimators': 394, 'min_child_weight': 5, 'gamma': 0.6089035755171794, 'subsample': 0.8499965527734015, 'colsample_bytree': 0.7524661412050635, 'reg_alpha': 0.8992934863431707, 'reg_lambda': 0.21919543151439053, 'random_state': 738}. Best is trial 0 with value: 0.6893856143353481.
[I 2023-10-10 13:55:24,85

Number of finished trials: 10
Best trial: {'max_depth': 4, 'learning_rate': 0.10257756742628865, 'n_estimators': 719, 'min_child_weight': 2, 'gamma': 0.659607317162864, 'subsample': 0.22685248332586147, 'colsample_bytree': 0.9065331915879229, 'reg_alpha': 0.4197354597645597, 'reg_lambda': 0.8926325347455212, 'random_state': 327}


### 2-2-3. randomforest

In [31]:
# def rf_objective(trial,data=X,target=Y):
    
#     train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2,random_state=42)
#     param = {
#         "max_depth": trial.suggest_int('max_depth', 2, 1000),
#         "max_leaf_nodes": trial.suggest_int('max_leaf_nodes', 2,1000),
#     }
#     model = HistGradientBoostingRegressor(**param)  
    
#     model.fit(train_x,train_y)
    
#     preds = model.predict(test_x)
#     preds = classify_predict_vectorize(preds)
    
#     rmse = mean_squared_error(test_y, preds,squared=False)
    
#     return rmse

In [32]:
# rf_dict = {}
# rf_study = optuna.create_study(direction='minimize')
# rf_study.optimize(rf_objective, n_trials=n_trials)
# print('Number of finished trials:', len(rf_study.trials))
# print('Best trial:', rf_study.best_trial.params)
# rf_params=rf_study.best_params 
# rf_dict["model"] = HistGradientBoostingRegressor(**rf_params)

In [33]:
models["lgb"] = lgb_dict
models["xgb"] = xgb_dict
# models["tpot"] = tpot_dict
# models["rf"] = rf_dict

## 2-3. train lGBM

In [34]:
feature_names = list(filter(lambda x: x not in [obj_column, exp_key_column], train_feats.columns))

In [35]:
oof = []
prediction = test_feats[[exp_key_column]]
prediction[obj_column] = 0
df_importance_list = []

kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
for fold_id, (trn_idx, val_idx) in enumerate(kfold.split(train_feats[feature_names])):
    X_train = train_feats.iloc[trn_idx][feature_names]
    Y_train = train_feats.iloc[trn_idx][obj_column]

    X_val = train_feats.iloc[val_idx][feature_names]
    Y_val = train_feats.iloc[val_idx][obj_column]

    print('\nFold_{} Training ================================\n'.format(fold_id+1))
    for key, value in models.items():
        print(f"***********{key}***************")
        best_model = value["model"]
        
        ## we have to change because each models have different params
        if key == "xgb":
            model = best_model.fit(X_train,
                                  Y_train,
                                  # eval_names=['train', 'valid'],
                                  eval_set=[(X_train, Y_train), (X_val, Y_val)],
                                  verbose=20,
                                  # eval_metric= [metric],
                                  early_stopping_rounds=50)
            pred_val = model.predict(X_val)
            pred_test = model.predict(test_feats[feature_names])
            prediction[obj_column] += pred_test / kfold.n_splits
            df_oof = train_feats.iloc[val_idx][[exp_key_column, obj_column]].copy()
            df_oof['pred'] = pred_val
            oof.append(df_oof)
            df_oof["pred_range"] = classify_predict_vectorize(pred_val)
            display(df_oof)
        elif key == "lgb":
            model = best_model.fit(X_train,
                      Y_train,
                      eval_names=['train', 'valid'],
                      eval_set=[(X_train, Y_train), (X_val, Y_val)],
                      verbose=20,
                      eval_metric= metric,
                      early_stopping_rounds=50)
            pred_val = model.predict(X_val, num_iteration=model.best_iteration_)
            pred_test = model.predict(test_feats[feature_names], num_iteration=model.best_iteration_)
            prediction[obj_column] += pred_test / kfold.n_splits
            df_oof = train_feats.iloc[val_idx][[exp_key_column, obj_column]].copy()
            df_oof['pred'] = pred_val
            oof.append(df_oof)
            df_oof["pred_range"] = classify_predict_vectorize(pred_val)
            display(df_oof)
        
        elif key == "rf":
            model = best_model.fit(X_train,Y_train)
            pred_val = model.predict(X_val)
            pred_test = model.predict(test_feats[feature_names])
            prediction[obj_column] += pred_test / kfold.n_splits
            df_oof = train_feats.iloc[val_idx][[exp_key_column, obj_column]].copy()
            df_oof['pred'] = pred_val
            oof.append(df_oof)
            display(df_oof)
        elif key == "tpot":
            model = best_model.fit(X_train,
                      Y_train,
                      eval_names=['train', 'valid'],
                      eval_set=[(X_train, Y_train), (X_val, Y_val)],
                      verbose=20,
                      eval_metric= metric,
                      early_stopping_rounds=50)
            pred_val = model.predict(X_val, num_iteration=model.best_iteration_)
            pred_test = model.predict(test_feats[feature_names], num_iteration=model.best_iteration_)
            prediction[obj_column] += pred_test / kfold.n_splits
            df_oof = train_feats.iloc[val_idx][[exp_key_column, obj_column]].copy()
            df_oof['pred'] = pred_val
            oof.append(df_oof)
            df_oof["pred_range"] = classify_predict_vectorize(pred_val)
            display(oof)
            
        models[key]["prediction"] = prediction
        if key in ["lgb","xgb"]:
            df_importance = pd.DataFrame({
                'column': feature_names,
                'importance': model.feature_importances_,
            })
            df_importance_list.append(df_importance)
        else:
            df_importance = pd.DataFrame([])
        del model, pred_val, pred_test
        gc.collect()
        df_oof = pd.concat(oof)
        rmse = mean_squared_error(df_oof[obj_column], np.clip(df_oof['pred'], a_min=0.5, a_max=6.0), squared=False)
        print('rmse:', rmse)
        models[key]["score"] = rmse



***********lgb***************
[20]	train's l2: 0.917797	valid's l2: 0.915429
[40]	train's l2: 0.810976	valid's l2: 0.817293
[60]	train's l2: 0.725302	valid's l2: 0.738422
[80]	train's l2: 0.656122	valid's l2: 0.674819
[100]	train's l2: 0.599962	valid's l2: 0.62437
[120]	train's l2: 0.554208	valid's l2: 0.583371
[140]	train's l2: 0.51665	valid's l2: 0.551473
[160]	train's l2: 0.485385	valid's l2: 0.52488
[180]	train's l2: 0.458799	valid's l2: 0.5022
[200]	train's l2: 0.436145	valid's l2: 0.482911
[220]	train's l2: 0.417089	valid's l2: 0.466664
[240]	train's l2: 0.400631	valid's l2: 0.453594
[260]	train's l2: 0.386417	valid's l2: 0.442222
[280]	train's l2: 0.373576	valid's l2: 0.432674
[300]	train's l2: 0.362256	valid's l2: 0.425209
[320]	train's l2: 0.352293	valid's l2: 0.418318
[340]	train's l2: 0.343307	valid's l2: 0.412667
[360]	train's l2: 0.334799	valid's l2: 0.407144
[380]	train's l2: 0.327261	valid's l2: 0.402339
[400]	train's l2: 0.319698	valid's l2: 0.397877
[420]	train's l2:

Unnamed: 0,id,score,pred,pred_range
43,0445b534,3.5,3.026874,3.0
44,044b274d,6.0,4.87245,5.0
56,05624ab0,3.5,3.219176,3.0
93,098c4b58,4.0,4.289935,4.5
96,09bf7971,5.0,4.437649,4.5
100,0a21bbd6,4.5,4.593528,4.5
111,0bdda3d0,4.0,4.334118,4.5
134,0e45ff80,2.5,3.427068,3.5
170,1201aaf5,3.5,3.939088,4.0
173,12552ba4,4.5,3.636896,3.5


rmse: 0.6117261445379939
***********xgb***************
[0]	validation_0-rmse:3.04568	validation_1-rmse:3.08550
[20]	validation_0-rmse:0.70483	validation_1-rmse:0.73680
[40]	validation_0-rmse:0.56847	validation_1-rmse:0.61927
[60]	validation_0-rmse:0.53421	validation_1-rmse:0.62317
[80]	validation_0-rmse:0.51286	validation_1-rmse:0.63019
[93]	validation_0-rmse:0.49531	validation_1-rmse:0.62859


Unnamed: 0,id,score,pred,pred_range
43,0445b534,3.5,3.010334,3.0
44,044b274d,6.0,4.789495,5.0
56,05624ab0,3.5,2.983991,3.0
93,098c4b58,4.0,4.192138,4.0
96,09bf7971,5.0,4.532926,4.5
100,0a21bbd6,4.5,4.428116,4.5
111,0bdda3d0,4.0,4.517224,4.5
134,0e45ff80,2.5,3.272555,3.5
170,1201aaf5,3.5,3.732631,3.5
173,12552ba4,4.5,3.365804,3.5


rmse: 0.6143708844190321


***********lgb***************
[20]	train's l2: 0.931858	valid's l2: 0.789066
[40]	train's l2: 0.824952	valid's l2: 0.691365
[60]	train's l2: 0.738837	valid's l2: 0.614121
[80]	train's l2: 0.669277	valid's l2: 0.553714
[100]	train's l2: 0.612695	valid's l2: 0.505059
[120]	train's l2: 0.566238	valid's l2: 0.46635
[140]	train's l2: 0.528301	valid's l2: 0.435294
[160]	train's l2: 0.496609	valid's l2: 0.410301
[180]	train's l2: 0.469587	valid's l2: 0.389725
[200]	train's l2: 0.44636	valid's l2: 0.373237
[220]	train's l2: 0.426616	valid's l2: 0.359496
[240]	train's l2: 0.409708	valid's l2: 0.348286
[260]	train's l2: 0.394267	valid's l2: 0.33934
[280]	train's l2: 0.380952	valid's l2: 0.33204
[300]	train's l2: 0.368911	valid's l2: 0.32622
[320]	train's l2: 0.358565	valid's l2: 0.321385
[340]	train's l2: 0.348961	valid's l2: 0.317601
[360]	train's l2: 0.340529	valid's l2: 0.314147
[380]	train's l2: 0.332361	valid's l2: 0.310907
[400]	train's l2: 0.324466	valid's l2: 0

Unnamed: 0,id,score,pred,pred_range
18,0182aa1c,4.0,4.053625,4.0
25,01d602a7,4.5,4.124225,4.0
29,0294b4f5,2.5,2.811412,3.0
48,04c5f885,3.0,2.260647,2.5
49,04e37b6b,3.0,3.738868,3.5
51,04f83ccb,5.0,4.250518,4.5
67,06e59db8,6.0,4.346559,4.5
69,073b3535,4.0,4.059882,4.0
70,075f92a5,4.5,4.152678,4.0
73,07bb2245,3.5,3.042907,3.0


rmse: 0.5927288937464711
***********xgb***************
[0]	validation_0-rmse:3.04906	validation_1-rmse:3.00522
[20]	validation_0-rmse:0.71028	validation_1-rmse:0.68947
[40]	validation_0-rmse:0.57600	validation_1-rmse:0.58757
[60]	validation_0-rmse:0.54031	validation_1-rmse:0.58540
[80]	validation_0-rmse:0.51636	validation_1-rmse:0.59553
[100]	validation_0-rmse:0.49388	validation_1-rmse:0.58807


Unnamed: 0,id,score,pred,pred_range
18,0182aa1c,4.0,4.127906,4.0
25,01d602a7,4.5,3.79501,4.0
29,0294b4f5,2.5,2.919432,3.0
48,04c5f885,3.0,2.244446,2.0
49,04e37b6b,3.0,3.787778,4.0
51,04f83ccb,5.0,3.93547,4.0
67,06e59db8,6.0,4.416129,4.5
69,073b3535,4.0,3.920374,4.0
70,075f92a5,4.5,4.049087,4.0
73,07bb2245,3.5,3.042585,3.0


rmse: 0.5895320977816126


***********lgb***************
[20]	train's l2: 0.912599	valid's l2: 0.95691
[40]	train's l2: 0.805053	valid's l2: 0.8583
[60]	train's l2: 0.71859	valid's l2: 0.782345
[80]	train's l2: 0.648717	valid's l2: 0.723141
[100]	train's l2: 0.592115	valid's l2: 0.676591
[120]	train's l2: 0.545613	valid's l2: 0.640709
[140]	train's l2: 0.5076	valid's l2: 0.612098
[160]	train's l2: 0.475948	valid's l2: 0.589469
[180]	train's l2: 0.449466	valid's l2: 0.572429
[200]	train's l2: 0.427146	valid's l2: 0.558938
[220]	train's l2: 0.407844	valid's l2: 0.547066
[240]	train's l2: 0.391616	valid's l2: 0.538225
[260]	train's l2: 0.377376	valid's l2: 0.530308
[280]	train's l2: 0.364732	valid's l2: 0.525498
[300]	train's l2: 0.353448	valid's l2: 0.520654
[320]	train's l2: 0.342994	valid's l2: 0.517113
[340]	train's l2: 0.33393	valid's l2: 0.513794
[360]	train's l2: 0.325231	valid's l2: 0.511455
[380]	train's l2: 0.317343	valid's l2: 0.509596
[400]	train's l2: 0.310162	valid's l2: 0.5

Unnamed: 0,id,score,pred,pred_range
8,00e048f1,3.5,2.655821,2.5
13,00fc9a6a,3.5,3.236798,3.0
23,01c359fc,3.5,3.833141,4.0
65,069d668b,4.0,4.196046,4.0
87,08ed1a7f,4.0,4.059108,4.0
99,0a1be27b,2.5,2.951042,3.0
105,0a59a757,4.5,3.810912,4.0
115,0c3c0e97,3.0,3.280579,3.5
118,0c6ae36d,2.0,3.033649,3.0
120,0c81d872,3.0,2.587669,2.5


rmse: 0.6155503607192688
***********xgb***************
[0]	validation_0-rmse:3.04913	validation_1-rmse:3.01284
[20]	validation_0-rmse:0.69554	validation_1-rmse:0.76696
[40]	validation_0-rmse:0.55587	validation_1-rmse:0.71572
[60]	validation_0-rmse:0.52694	validation_1-rmse:0.72027
[79]	validation_0-rmse:0.50102	validation_1-rmse:0.72456


Unnamed: 0,id,score,pred,pred_range
8,00e048f1,3.5,2.645078,2.5
13,00fc9a6a,3.5,3.100194,3.0
23,01c359fc,3.5,3.687359,3.5
65,069d668b,4.0,4.213599,4.0
87,08ed1a7f,4.0,3.841613,4.0
99,0a1be27b,2.5,2.817733,3.0
105,0a59a757,4.5,3.94113,4.0
115,0c3c0e97,3.0,2.979975,3.0
118,0c6ae36d,2.0,2.804018,3.0
120,0c81d872,3.0,3.081848,3.0


rmse: 0.6327149259087133


***********lgb***************
[20]	train's l2: 0.905227	valid's l2: 1.03604
[40]	train's l2: 0.800414	valid's l2: 0.924253
[60]	train's l2: 0.715874	valid's l2: 0.834933
[80]	train's l2: 0.647355	valid's l2: 0.763546
[100]	train's l2: 0.59183	valid's l2: 0.706645
[120]	train's l2: 0.546352	valid's l2: 0.660896
[140]	train's l2: 0.508887	valid's l2: 0.623951
[160]	train's l2: 0.477809	valid's l2: 0.592912
[180]	train's l2: 0.451688	valid's l2: 0.568296
[200]	train's l2: 0.429882	valid's l2: 0.547493
[220]	train's l2: 0.411451	valid's l2: 0.531466
[240]	train's l2: 0.395231	valid's l2: 0.518551
[260]	train's l2: 0.381093	valid's l2: 0.507744
[280]	train's l2: 0.368887	valid's l2: 0.499007
[300]	train's l2: 0.357951	valid's l2: 0.491838
[320]	train's l2: 0.347985	valid's l2: 0.486565
[340]	train's l2: 0.339404	valid's l2: 0.481606
[360]	train's l2: 0.331568	valid's l2: 0.477482
[380]	train's l2: 0.323789	valid's l2: 0.472083
[400]	train's l2: 0.316704	valid's l2

Unnamed: 0,id,score,pred,pred_range
20,01963e20,4.5,4.37984,4.5
30,02a41d1a,3.0,3.625449,3.5
32,02d3c9fc,4.0,3.885741,4.0
37,036cfd42,4.5,4.48544,4.5
39,03971ddf,4.5,3.826485,4.0
45,044c5c54,1.0,2.083444,2.0
59,05f425a4,3.5,3.707965,3.5
63,0657fcde,4.5,4.333757,4.5
71,077ee03e,4.0,4.270375,4.5
78,081d84f4,4.0,3.974976,4.0


rmse: 0.636017173399257
***********xgb***************
[0]	validation_0-rmse:3.04186	validation_1-rmse:3.07311
[20]	validation_0-rmse:0.69958	validation_1-rmse:0.75003
[40]	validation_0-rmse:0.55712	validation_1-rmse:0.67061
[60]	validation_0-rmse:0.52793	validation_1-rmse:0.68181
[80]	validation_0-rmse:0.50314	validation_1-rmse:0.68724
[87]	validation_0-rmse:0.49607	validation_1-rmse:0.68430


Unnamed: 0,id,score,pred,pred_range
20,01963e20,4.5,4.307832,4.5
30,02a41d1a,3.0,3.679602,3.5
32,02d3c9fc,4.0,3.634757,3.5
37,036cfd42,4.5,4.677821,4.5
39,03971ddf,4.5,3.621072,3.5
45,044c5c54,1.0,2.200943,2.0
59,05f425a4,3.5,3.616615,3.5
63,0657fcde,4.5,4.084046,4.0
71,077ee03e,4.0,4.154851,4.0
78,081d84f4,4.0,4.13932,4.0


rmse: 0.6400328062546045


***********lgb***************
[20]	train's l2: 0.925534	valid's l2: 0.832153
[40]	train's l2: 0.81737	valid's l2: 0.742645
[60]	train's l2: 0.730212	valid's l2: 0.672504
[80]	train's l2: 0.659994	valid's l2: 0.616773
[100]	train's l2: 0.602931	valid's l2: 0.57315
[120]	train's l2: 0.556235	valid's l2: 0.538678
[140]	train's l2: 0.517718	valid's l2: 0.511327
[160]	train's l2: 0.485904	valid's l2: 0.489397
[180]	train's l2: 0.459034	valid's l2: 0.47205
[200]	train's l2: 0.436517	valid's l2: 0.458408
[220]	train's l2: 0.41725	valid's l2: 0.447986
[240]	train's l2: 0.400901	valid's l2: 0.439424
[260]	train's l2: 0.386213	valid's l2: 0.43239
[280]	train's l2: 0.373416	valid's l2: 0.426812
[300]	train's l2: 0.361725	valid's l2: 0.42225
[320]	train's l2: 0.351247	valid's l2: 0.418449
[340]	train's l2: 0.341877	valid's l2: 0.415427
[360]	train's l2: 0.333512	valid's l2: 0.412983
[380]	train's l2: 0.325396	valid's l2: 0.409549
[400]	train's l2: 0.317744	valid's l2: 0.

Unnamed: 0,id,score,pred,pred_range
2,0042269b,6.0,4.360865,4.5
15,014e7ae9,5.0,4.478709,4.5
26,0249a095,5.0,4.363481,4.5
41,0417d421,3.0,3.510743,3.5
53,052b25e1,5.0,4.911378,5.0
58,05b35fa0,3.5,2.970182,3.0
72,078a6196,4.5,4.616388,4.5
101,0a248634,4.5,4.163417,4.0
107,0ad15907,3.5,3.618447,3.5
136,0e6b2690,3.0,3.750058,4.0


rmse: 0.6375530407086419
***********xgb***************
[0]	validation_0-rmse:3.03727	validation_1-rmse:3.07298
[20]	validation_0-rmse:0.71124	validation_1-rmse:0.76449
[40]	validation_0-rmse:0.57200	validation_1-rmse:0.65542
[60]	validation_0-rmse:0.53837	validation_1-rmse:0.64064
[80]	validation_0-rmse:0.51494	validation_1-rmse:0.64687
[100]	validation_0-rmse:0.49426	validation_1-rmse:0.65409
[107]	validation_0-rmse:0.48657	validation_1-rmse:0.65431


Unnamed: 0,id,score,pred,pred_range
2,0042269b,6.0,4.179821,4.0
15,014e7ae9,5.0,4.721081,4.5
26,0249a095,5.0,4.672086,4.5
41,0417d421,3.0,3.258693,3.5
53,052b25e1,5.0,4.632513,4.5
58,05b35fa0,3.5,3.10104,3.0
72,078a6196,4.5,4.25059,4.5
101,0a248634,4.5,4.285218,4.5
107,0ad15907,3.5,3.224972,3.0
136,0e6b2690,3.0,3.680245,3.5


rmse: 0.6377697980987005


***********lgb***************
[20]	train's l2: 0.911265	valid's l2: 0.988355
[40]	train's l2: 0.805923	valid's l2: 0.888446
[60]	train's l2: 0.721264	valid's l2: 0.809926
[80]	train's l2: 0.652557	valid's l2: 0.746712
[100]	train's l2: 0.596771	valid's l2: 0.695017
[120]	train's l2: 0.550852	valid's l2: 0.653394
[140]	train's l2: 0.51261	valid's l2: 0.620196
[160]	train's l2: 0.48097	valid's l2: 0.592489
[180]	train's l2: 0.454107	valid's l2: 0.569354
[200]	train's l2: 0.431567	valid's l2: 0.550036
[220]	train's l2: 0.412515	valid's l2: 0.534862
[240]	train's l2: 0.39616	valid's l2: 0.521318
[260]	train's l2: 0.381851	valid's l2: 0.509105
[280]	train's l2: 0.369017	valid's l2: 0.498253
[300]	train's l2: 0.357706	valid's l2: 0.488463
[320]	train's l2: 0.347855	valid's l2: 0.480774
[340]	train's l2: 0.338907	valid's l2: 0.474052
[360]	train's l2: 0.33083	valid's l2: 0.46766
[380]	train's l2: 0.323575	valid's l2: 0.463846
[400]	train's l2: 0.31648	valid's l2: 0.

Unnamed: 0,id,score,pred,pred_range
6,0093f095,4.5,3.104226,3.0
10,00e713bd,4.5,4.776859,5.0
14,0144e4d5,2.5,2.357591,2.5
17,0178a105,3.0,3.227272,3.0
22,01992d32,4.0,3.956997,4.0
31,02cf6a52,3.5,3.660979,3.5
54,052cf3a6,2.5,2.822205,3.0
81,08390d49,4.0,3.311955,3.5
86,08eb331a,2.5,2.332756,2.5
88,08f461ac,5.0,4.130848,4.0


rmse: 0.6386678657494432
***********xgb***************
[0]	validation_0-rmse:3.04137	validation_1-rmse:3.09050
[20]	validation_0-rmse:0.69985	validation_1-rmse:0.75685
[40]	validation_0-rmse:0.56336	validation_1-rmse:0.64834
[60]	validation_0-rmse:0.53291	validation_1-rmse:0.64835
[80]	validation_0-rmse:0.50798	validation_1-rmse:0.65613
[99]	validation_0-rmse:0.48605	validation_1-rmse:0.66828


Unnamed: 0,id,score,pred,pred_range
6,0093f095,4.5,3.273965,3.5
10,00e713bd,4.5,4.813362,5.0
14,0144e4d5,2.5,2.012282,2.0
17,0178a105,3.0,2.77375,3.0
22,01992d32,4.0,4.151284,4.0
31,02cf6a52,3.5,3.795185,4.0
54,052cf3a6,2.5,2.711379,2.5
81,08390d49,4.0,3.350066,3.5
86,08eb331a,2.5,2.74491,2.5
88,08f461ac,5.0,4.282442,4.5


rmse: 0.63899132275747


***********lgb***************
[20]	train's l2: 0.90381	valid's l2: 1.05577
[40]	train's l2: 0.79957	valid's l2: 0.945018
[60]	train's l2: 0.715139	valid's l2: 0.856249
[80]	train's l2: 0.646866	valid's l2: 0.785796
[100]	train's l2: 0.591407	valid's l2: 0.728777
[120]	train's l2: 0.546159	valid's l2: 0.683417
[140]	train's l2: 0.509068	valid's l2: 0.647046
[160]	train's l2: 0.47822	valid's l2: 0.616679
[180]	train's l2: 0.452374	valid's l2: 0.59226
[200]	train's l2: 0.430369	valid's l2: 0.571165
[220]	train's l2: 0.411331	valid's l2: 0.554823
[240]	train's l2: 0.395177	valid's l2: 0.540585
[260]	train's l2: 0.380472	valid's l2: 0.528186
[280]	train's l2: 0.366921	valid's l2: 0.516312
[300]	train's l2: 0.355121	valid's l2: 0.507349
[320]	train's l2: 0.344775	valid's l2: 0.500928
[340]	train's l2: 0.335433	valid's l2: 0.496092
[360]	train's l2: 0.327082	valid's l2: 0.492945
[380]	train's l2: 0.319377	valid's l2: 0.489823
[400]	train's l2: 0.312064	valid's l2: 0.4

Unnamed: 0,id,score,pred,pred_range
3,0059420b,2.0,2.555517,2.5
5,0081af50,2.0,3.075774,3.0
7,009e23ab,4.0,3.625871,3.5
12,00f8e84c,4.5,4.225407,4.0
24,01d0ba4b,1.5,2.13502,2.0
27,0262bf61,4.5,4.600379,4.5
42,0432f117,4.0,4.24103,4.0
47,04b88b3e,4.0,4.277736,4.5
52,052a7811,3.5,3.487365,3.5
55,053be4e5,3.0,4.458996,4.5


rmse: 0.6428285860038476
***********xgb***************
[0]	validation_0-rmse:3.04280	validation_1-rmse:3.01421
[20]	validation_0-rmse:0.70455	validation_1-rmse:0.76067
[40]	validation_0-rmse:0.56769	validation_1-rmse:0.67432
[60]	validation_0-rmse:0.53277	validation_1-rmse:0.67311
[80]	validation_0-rmse:0.50639	validation_1-rmse:0.68062
[100]	validation_0-rmse:0.48152	validation_1-rmse:0.68149
[101]	validation_0-rmse:0.48079	validation_1-rmse:0.68104


Unnamed: 0,id,score,pred,pred_range
3,0059420b,2.0,2.40132,2.5
5,0081af50,2.0,3.084847,3.0
7,009e23ab,4.0,3.839427,4.0
12,00f8e84c,4.5,4.312397,4.5
24,01d0ba4b,1.5,2.102405,2.0
27,0262bf61,4.5,4.614049,4.5
42,0432f117,4.0,4.330359,4.5
47,04b88b3e,4.0,4.390491,4.5
52,052a7811,3.5,3.665408,3.5
55,053be4e5,3.0,4.28504,4.5


rmse: 0.6446577220269365


***********lgb***************
[20]	train's l2: 0.921816	valid's l2: 0.867581
[40]	train's l2: 0.813628	valid's l2: 0.778304
[60]	train's l2: 0.726919	valid's l2: 0.708727
[80]	train's l2: 0.657049	valid's l2: 0.653498
[100]	train's l2: 0.600087	valid's l2: 0.610015
[120]	train's l2: 0.553713	valid's l2: 0.575756
[140]	train's l2: 0.515682	valid's l2: 0.547973
[160]	train's l2: 0.484265	valid's l2: 0.525431
[180]	train's l2: 0.458121	valid's l2: 0.508116
[200]	train's l2: 0.436029	valid's l2: 0.494108
[220]	train's l2: 0.417261	valid's l2: 0.481313
[240]	train's l2: 0.400934	valid's l2: 0.471743
[260]	train's l2: 0.3864	valid's l2: 0.463343
[280]	train's l2: 0.373274	valid's l2: 0.457029
[300]	train's l2: 0.361561	valid's l2: 0.451876
[320]	train's l2: 0.351125	valid's l2: 0.447418
[340]	train's l2: 0.341981	valid's l2: 0.443279
[360]	train's l2: 0.333679	valid's l2: 0.439867
[380]	train's l2: 0.325408	valid's l2: 0.436868
[400]	train's l2: 0.317682	valid's l2

Unnamed: 0,id,score,pred,pred_range
0,001519c8,3.5,3.367651,3.5
9,00e1f05a,4.5,4.930566,5.0
28,026be946,5.0,4.262998,4.5
33,02e86b6a,4.0,4.634783,4.5
38,0395b217,5.5,4.491128,4.5
40,040c429b,5.0,4.2153,4.0
57,056c41fc,2.5,2.47928,2.5
62,064a82e9,4.5,4.616456,4.5
74,07bb7765,3.5,3.5402,3.5
79,08276ea7,3.0,3.457879,3.5


rmse: 0.6452881334560454
***********xgb***************
[0]	validation_0-rmse:3.04006	validation_1-rmse:3.10787
[20]	validation_0-rmse:0.70246	validation_1-rmse:0.75625
[40]	validation_0-rmse:0.56720	validation_1-rmse:0.64716
[60]	validation_0-rmse:0.53658	validation_1-rmse:0.65047
[80]	validation_0-rmse:0.51083	validation_1-rmse:0.65126
[100]	validation_0-rmse:0.48811	validation_1-rmse:0.65404


Unnamed: 0,id,score,pred,pred_range
0,001519c8,3.5,3.566235,3.5
9,00e1f05a,4.5,4.771795,5.0
28,026be946,5.0,4.56176,4.5
33,02e86b6a,4.0,4.656575,4.5
38,0395b217,5.5,4.594956,4.5
40,040c429b,5.0,4.373893,4.5
57,056c41fc,2.5,2.429937,2.5
62,064a82e9,4.5,4.546958,4.5
74,07bb7765,3.5,3.388005,3.5
79,08276ea7,3.0,3.445287,3.5


rmse: 0.6451537675003448


***********lgb***************
[20]	train's l2: 0.910162	valid's l2: 0.991156
[40]	train's l2: 0.804391	valid's l2: 0.885634
[60]	train's l2: 0.719052	valid's l2: 0.802251
[80]	train's l2: 0.650078	valid's l2: 0.736958
[100]	train's l2: 0.594258	valid's l2: 0.685727
[120]	train's l2: 0.548544	valid's l2: 0.643208
[140]	train's l2: 0.510787	valid's l2: 0.609155
[160]	train's l2: 0.479026	valid's l2: 0.582362
[180]	train's l2: 0.4525	valid's l2: 0.561796
[200]	train's l2: 0.430083	valid's l2: 0.544763
[220]	train's l2: 0.411154	valid's l2: 0.530397
[240]	train's l2: 0.394643	valid's l2: 0.519102
[260]	train's l2: 0.38063	valid's l2: 0.510106
[280]	train's l2: 0.367453	valid's l2: 0.499609
[300]	train's l2: 0.356136	valid's l2: 0.491741
[320]	train's l2: 0.346432	valid's l2: 0.485959
[340]	train's l2: 0.337609	valid's l2: 0.481076
[360]	train's l2: 0.329547	valid's l2: 0.476942
[380]	train's l2: 0.322055	valid's l2: 0.472328
[400]	train's l2: 0.315193	valid's l2:

Unnamed: 0,id,score,pred,pred_range
4,0075873a,4.0,3.161544,3.0
11,00f0737e,2.5,3.065552,3.0
16,015aa732,5.0,4.161009,4.0
19,0190ff4c,4.0,3.334648,3.5
35,034d61db,4.5,4.80595,5.0
36,035f09fc,3.5,4.163752,4.0
46,048fd254,4.0,3.703451,3.5
50,04e63aec,4.0,4.254338,4.5
61,06336d82,6.0,4.657293,4.5
75,07c36ae1,3.5,3.497669,3.5


rmse: 0.6459245010854625
***********xgb***************
[0]	validation_0-rmse:3.05303	validation_1-rmse:2.94760
[20]	validation_0-rmse:0.70048	validation_1-rmse:0.75205
[40]	validation_0-rmse:0.56021	validation_1-rmse:0.67205
[60]	validation_0-rmse:0.52680	validation_1-rmse:0.67306
[80]	validation_0-rmse:0.50740	validation_1-rmse:0.67046
[100]	validation_0-rmse:0.48700	validation_1-rmse:0.67289
[120]	validation_0-rmse:0.46817	validation_1-rmse:0.67312
[140]	validation_0-rmse:0.44993	validation_1-rmse:0.68389
[157]	validation_0-rmse:0.43659	validation_1-rmse:0.68956


Unnamed: 0,id,score,pred,pred_range
4,0075873a,4.0,3.399668,3.5
11,00f0737e,2.5,3.009774,3.0
16,015aa732,5.0,4.425796,4.5
19,0190ff4c,4.0,3.023177,3.0
35,034d61db,4.5,5.174715,5.0
36,035f09fc,3.5,4.282932,4.5
46,048fd254,4.0,3.759248,4.0
50,04e63aec,4.0,4.207324,4.0
61,06336d82,6.0,4.694901,4.5
75,07c36ae1,3.5,3.505358,3.5


rmse: 0.6470998646968893


***********lgb***************
[20]	train's l2: 0.927493	valid's l2: 0.816379
[40]	train's l2: 0.819397	valid's l2: 0.722328
[60]	train's l2: 0.73276	valid's l2: 0.64884
[80]	train's l2: 0.662618	valid's l2: 0.591257
[100]	train's l2: 0.605998	valid's l2: 0.545057
[120]	train's l2: 0.559562	valid's l2: 0.509348
[140]	train's l2: 0.52177	valid's l2: 0.48041
[160]	train's l2: 0.490172	valid's l2: 0.45876
[180]	train's l2: 0.463763	valid's l2: 0.440863
[200]	train's l2: 0.441399	valid's l2: 0.426208
[220]	train's l2: 0.422427	valid's l2: 0.415203
[240]	train's l2: 0.405787	valid's l2: 0.406108
[260]	train's l2: 0.391228	valid's l2: 0.398323
[280]	train's l2: 0.378011	valid's l2: 0.392271
[300]	train's l2: 0.366045	valid's l2: 0.387746
[320]	train's l2: 0.355707	valid's l2: 0.383558
[340]	train's l2: 0.346419	valid's l2: 0.38105
[360]	train's l2: 0.337856	valid's l2: 0.378799
[380]	train's l2: 0.329514	valid's l2: 0.377266
[400]	train's l2: 0.321793	valid's l2: 0.

Unnamed: 0,id,score,pred,pred_range
1,0022f953,3.5,3.558038,3.5
21,019737b6,3.5,4.67947,4.5
34,031c0c58,4.0,3.816024,4.0
64,0666fb4e,4.5,5.038876,5.0
95,09a67581,4.5,4.888101,5.0
98,09eb3ce5,3.0,2.634339,2.5
103,0a4e3aec,3.5,3.45823,3.5
122,0ca05fe3,1.5,3.07299,3.0
130,0d71be2a,4.5,4.185044,4.0
146,0f2b0127,3.5,3.220995,3.0


rmse: 0.6451254643665283
***********xgb***************
[0]	validation_0-rmse:3.05245	validation_1-rmse:3.04449
[20]	validation_0-rmse:0.70546	validation_1-rmse:0.71489
[40]	validation_0-rmse:0.56379	validation_1-rmse:0.62195
[60]	validation_0-rmse:0.53856	validation_1-rmse:0.63652
[80]	validation_0-rmse:0.51525	validation_1-rmse:0.63557
[92]	validation_0-rmse:0.50099	validation_1-rmse:0.64816


Unnamed: 0,id,score,pred,pred_range
1,0022f953,3.5,3.598878,3.5
21,019737b6,3.5,4.464058,4.5
34,031c0c58,4.0,3.791131,4.0
64,0666fb4e,4.5,4.994617,5.0
95,09a67581,4.5,5.057966,5.0
98,09eb3ce5,3.0,2.229911,2.0
103,0a4e3aec,3.5,3.355551,3.5
122,0ca05fe3,1.5,2.784628,3.0
130,0d71be2a,4.5,4.243341,4.0
146,0f2b0127,3.5,3.393975,3.5


rmse: 0.6439666913517806


In [36]:
for key, value in models.items():
    print(key)
#     print(value.values())
#     print(value["model"])
    print(value["score"])

lgb
0.6451254643665283
xgb
0.6439666913517806


# 3. submission

In [37]:
prediction

Unnamed: 0,id,score
0,0000aaaa,2.549888
1,2222bbbb,2.548401
2,4444cccc,2.51133


In [38]:
# prediction['score'] = np.clip(prediction['score'], a_min=0.5, a_max=6.0)
display(models["lgb"]["prediction"])
prediction['score'] = classify_predict_vectorize(models["lgb"]["prediction"]["score"])
display(prediction)
prediction.to_csv('submission.csv', index=False)

Unnamed: 0,id,score
0,0000aaaa,2.549888
1,2222bbbb,2.548401
2,4444cccc,2.51133


Unnamed: 0,id,score
0,0000aaaa,2.5
1,2222bbbb,2.5
2,4444cccc,2.5
