In [59]:
import numpy as np
import pandas as pd
from IPython.display import Markdown

# Import the functions used in this project
#import modeling_danial.awesome_functions as af
#import modeling_danial.decode_utils as du
#from modeling_danial.feature_engineering import *  
import matplotlib.pyplot as plt

# Pretty display for notebooks
%matplotlib inline

# Ignore the warnings
import warnings
warnings.filterwarnings('ignore')

import lightgbm
from scipy.sparse import csr_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


# train, test데이터 메모리 줄이기.

In [60]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [61]:
train = import_data("train.csv")
test = import_data("test.csv")

Memory usage of dataframe is 34.56 MB
Memory usage after optimization is: 9.26 MB
Decreased by 73.2%
Memory usage of dataframe is 29.92 MB
Memory usage after optimization is: 8.11 MB
Decreased by 72.9%


# 숫자를 스트링 등 변환하기 편하게 만들어준다

In [62]:
def float_to_str(obj):
    """
    Convert Upc code from float to string
    Use this function by applying lambda
    :param obj: "Upc" column of DataFrame
    :return: string converted Upc removing dot.
    """
    while obj != "nan":
        obj = str(obj).split(".")[0]
        return obj

In [63]:

def company(x):
    """
    Return company code from given Upc code.
    :param x: "Upc" column of DataFrame
    :return: company code
    """
    try:
        p = x[:6]
        if p == "000000":
            return x[-5]
        return p
    except:
        return -9999

In [64]:
train["Upc"] = train.Upc.apply(float_to_str)
test["Upc"] = test.Upc.apply(float_to_str)

train["company"] = train.Upc.apply(company) 
test["company"] = test.Upc.apply(company)

In [65]:
train.head()

Unnamed: 0,TripType,VisitNumber,Weekday,Upc,ScanCount,DepartmentDescription,FinelineNumber,company
0,999,5,Friday,68113154048,-1,FINANCIAL SERVICES,1000.0,681131
1,30,7,Friday,60538814464,1,SHOES,8928.0,605388
2,30,7,Friday,7410810880,1,PERSONAL CARE,4504.0,741081
3,26,8,Friday,2238403584,2,PAINT AND ACCESSORIES,3564.0,223840
4,26,8,Friday,2006613760,2,PAINT AND ACCESSORIES,1017.0,200661


# MENSWEAR 와 MENS WEAR는 같은 값이므로 MENSWEAR로 합쳐준다.

In [66]:
len(train["DepartmentDescription"].unique().tolist())

69

In [67]:
train = train.replace('MENSWEAR', 'MENS WEAR')
test = test.replace('MENSWEAR', 'MENS WEAR')# tsst에 MENSWEAR가 띄워쓰기가 되어있어서
len(train["DepartmentDescription"].unique().tolist()) # 컬럼이 하나 줄어든 것을 확인할 수 있다.

68

In [68]:
traincolumn = train['DepartmentDescription'].unique().tolist()

In [69]:
testcolumn = test['DepartmentDescription'].unique().tolist()

In [70]:
[x for x in traincolumn if x not in testcolumn] 

['HEALTH AND BEAUTY AIDS']

# 중분류를 해준다

In [71]:

WEARls = [x for x in traincolumn if 'WEAR' in str(x) or 'SOCKS' in str(x) or 'SHOES' in str(x)\
         or 'MATERNITY' in str(x) ]
FOODls = [x for x in traincolumn if 'FOOD' in str(x) or 'MEAT' in str(x) or 'DAIRY' in str(x) or 'GROCERY' in str(x)\
         or 'PRODUCE' in str(x) or "BREAD" in str(x) or 'BAKERY' in str(x) or 'WINE' in str(x) or "DELI" in str(x)\
         or 'COOKIE' in str(x) or 'COOK' in str(x)]
ACCls = [x for x in traincolumn if 'SUNGLASS' in str(x) or 'OPTICAL' in str(x)]
ELECls = [x for x in traincolumn if 'ELECTRONICS' in str(x) or 'CAMERAS' in str(x) or 'MEDIA' in str(x)\
         or 'WIRELESS' in str(x) or 'HARDWARE' in str(x)]
COSls = [x for x in traincolumn if 'PERSONAL' in str(x) or 'BEAUTY' in str(x) or 'PHARMACY' in str(x)\
        or 'BATH' in str(x)]
HOUSEls = [x for x in traincolumn if 'PAPER' in str(x) or 'HOME' in str(x) or "BEDDING" in str(x)\
           or 'HOUSE' in str(x) or 'CELE' in str(x) or 'OFFICE' in str(x)]
GARDENls =  [x for x in traincolumn if 'GARDEN' in str(x) or 'HORTI' in str(x)]
INFANls = [x for x in traincolumn if 'INFANT' in str(x)]



In [72]:
traindf = train.copy()

testdf = test.copy()

In [73]:
traindf.loc[traindf['DepartmentDescription'].isin(INFANls), "DD_big"] = "INFAN"
traindf.loc[traindf['DepartmentDescription'].isin(GARDENls), "DD_big"] = "GARDEN"
traindf.loc[traindf['DepartmentDescription'].isin(HOUSEls), "DD_big"] = "HOUSE"
traindf.loc[traindf['DepartmentDescription'].isin(COSls), "DD_big"] = "COS"
traindf.loc[traindf['DepartmentDescription'].isin(ELECls), "DD_big"] = "ELEC"
traindf.loc[traindf['DepartmentDescription'].isin(ACCls), "DD_big"] = "ACC" 
traindf.loc[traindf['DepartmentDescription'].isin(FOODls), "DD_big"] = "FOOD"
traindf.loc[traindf['DepartmentDescription'].isin(WEARls), "DD_big"] = "WEAR"

testdf.loc[testdf['DepartmentDescription'].isin(INFANls), "DD_big"] = "INFAN"
testdf.loc[testdf['DepartmentDescription'].isin(GARDENls), "DD_big"] = "GARDEN"
testdf.loc[testdf['DepartmentDescription'].isin(HOUSEls), "DD_big"] = "HOUSE"
testdf.loc[testdf['DepartmentDescription'].isin(COSls), "DD_big"] = "COS"
testdf.loc[testdf['DepartmentDescription'].isin(ELECls), "DD_big"] = "ELEC"
testdf.loc[testdf['DepartmentDescription'].isin(ACCls), "DD_big"] = "ACC" 
testdf.loc[testdf['DepartmentDescription'].isin(FOODls), "DD_big"] = "FOOD"
testdf.loc[testdf['DepartmentDescription'].isin(WEARls), "DD_big"] = "WEAR"



# 리턴한것과 아닌것으로 분류한다. 반품이라면 1, 반품이 아니라면 0을 나타내는 컬럼을 하나 더 만든다.

In [74]:
# 스캔카운트가 마이너스이면 리턴에 1을 두고, 아니면 0을 두는 리턴 칼럼을 만든다
train.loc[train["ScanCount"] < 0, "Return"] = 1 # 와 이런식으로 칼럼을 만든다고?
train.loc[train["Return"] != 1, "Return"] = 0 # 위에거 만들어놓고, 아닌건 0으로 채워준다

test.loc[test["ScanCount"] < 0, "Return"] = 1
test.loc[test["Return"] != 1, "Return"] = 0

In [75]:
train.head()

Unnamed: 0,TripType,VisitNumber,Weekday,Upc,ScanCount,DepartmentDescription,FinelineNumber,company,Return
0,999,5,Friday,68113154048,-1,FINANCIAL SERVICES,1000.0,681131,1.0
1,30,7,Friday,60538814464,1,SHOES,8928.0,605388,0.0
2,30,7,Friday,7410810880,1,PERSONAL CARE,4504.0,741081,0.0
3,26,8,Friday,2238403584,2,PAINT AND ACCESSORIES,3564.0,223840,0.0
4,26,8,Friday,2006613760,2,PAINT AND ACCESSORIES,1017.0,200661,0.0


In [76]:
# 반대로 리턴을 한게 없으면 갯수를 센다. 몇개를 반품하든 반품을 한번 했다는 자체로,,
train["Pos_Sum"] = train["ScanCount"]
test["Pos_Sum"] = test["ScanCount"]

train.loc[train["Pos_Sum"] < 0, "Pos_Sum"] = 0 # 반환한 손님은 그냥 0으로 둠. 그러면 0은 그냥 전부 반품한손님(갯수제외)
test.loc[test["Pos_Sum"] < 0, "Pos_Sum"] = 0

In [77]:
train.head()

Unnamed: 0,TripType,VisitNumber,Weekday,Upc,ScanCount,DepartmentDescription,FinelineNumber,company,Return,Pos_Sum
0,999,5,Friday,68113154048,-1,FINANCIAL SERVICES,1000.0,681131,1.0,0
1,30,7,Friday,60538814464,1,SHOES,8928.0,605388,0.0,1
2,30,7,Friday,7410810880,1,PERSONAL CARE,4504.0,741081,0.0,1
3,26,8,Friday,2238403584,2,PAINT AND ACCESSORIES,3564.0,223840,0.0,2
4,26,8,Friday,2006613760,2,PAINT AND ACCESSORIES,1017.0,200661,0.0,2


In [78]:
train["Neg_Sum"] = train["ScanCount"]
test["Neg_Sum"] = test["ScanCount"]

train.loc[train["Neg_Sum"] > 0, "Neg_Sum"] = 0
test.loc[test["Neg_Sum"] > 0, "Neg_Sum"] = 0 # negsum은 숫자가 있으면 반품, 없으면 그냥 구매

In [79]:
train.head()

Unnamed: 0,TripType,VisitNumber,Weekday,Upc,ScanCount,DepartmentDescription,FinelineNumber,company,Return,Pos_Sum,Neg_Sum
0,999,5,Friday,68113154048,-1,FINANCIAL SERVICES,1000.0,681131,1.0,0,-1
1,30,7,Friday,60538814464,1,SHOES,8928.0,605388,0.0,1,0
2,30,7,Friday,7410810880,1,PERSONAL CARE,4504.0,741081,0.0,1,0
3,26,8,Friday,2238403584,2,PAINT AND ACCESSORIES,3564.0,223840,0.0,2,0
4,26,8,Friday,2006613760,2,PAINT AND ACCESSORIES,1017.0,200661,0.0,2,0


# UPC의 길이를 나타내는 컬럼을 만들어준다.

In [80]:

train_upc_fine = train[["VisitNumber", "Upc"]]
test_upc_fine = test[["VisitNumber", "Upc"]]


train_upc_fine["len_of_UPC"] = train_upc_fine["Upc"].apply(lambda x: len(x))
test_upc_fine["len_of_UPC"] = test_upc_fine["Upc"].apply(lambda x: len(x))
train_UPC = pd.get_dummies(train_upc_fine["len_of_UPC"])

train_UPC = pd.concat([train[["VisitNumber"]], train_UPC], axis = 1)
train_UPC = train_UPC.groupby("VisitNumber", as_index=False).sum()

In [81]:
train_UPC.tail()

Unnamed: 0,VisitNumber,3,4,5,7,8,9,10,11,12
95669,191343,0,0,0,0,0,0,2,5,0
95670,191344,0,0,0,0,0,0,3,2,0
95671,191345,0,0,0,0,0,0,11,2,0
95672,191346,0,1,0,0,0,0,15,1,0
95673,191347,0,0,0,0,0,0,2,0,0


In [82]:
test_UPC = pd.get_dummies(test_upc_fine["len_of_UPC"])
test_UPC = pd.concat([test[["VisitNumber"]], test_UPC], axis = 1)
test_UPC = test_UPC.groupby("VisitNumber", as_index=False).sum()

#  길이가 10, 11인 컬럼만 제외하고 get_dummies를 사용한다.

In [86]:
train_upc_fine["len_of_UPC_new"] = train_upc_fine["Upc"].apply(lambda x: x[:6] if len(x)== 11 or len(x)== 10 else x)

In [87]:
train_upc_fine.tail(10)

Unnamed: 0,VisitNumber,Upc,len_of_UPC,len_of_UPC_new
647044,191346,5100019712,10,510001
647045,191346,7874204160,10,787420
647046,191346,3120020224,10,312002
647047,191346,3120033024,10,312003
647048,191346,3700091136,10,370009
647049,191346,32390002688,11,323900
647050,191346,7874205184,10,787420
647051,191346,4072,4,4072
647052,191347,4190007552,10,419000
647053,191347,3800059648,10,380005


### Unique한 밸류 확인. 그냥 전부 get_dummies했을땐 20000개였음.

In [89]:
len(train_upc_fine["len_of_UPC_new"].unique())

9526

In [90]:
train_UPC_dummy = pd.get_dummies(train_upc_fine["len_of_UPC_new"])
#test_UPC_dummy = pd.get_dummies(test["Upc"])

train_UPC_dummy = pd.concat([train[["VisitNumber"]], train_UPC_dummy], axis=1)
#test_UPC_dummy = pd.concat([test[["VisitNumber"]], test_UPC_dummy], axis=1)

train_UPC_dummy = train_UPC_dummy.groupby("VisitNumber", as_index=False).sum() # sum을 함
#test_UPC_dummy = test_UPC_dummy.groupby("VisitNumber", as_index=False).sum()
train_UPC_dummy.tail()

Unnamed: 0,VisitNumber,100866,101644,101645,101810,101811,101990,101991,102735,102791,...,9899,993389,995000,995550,995551,996062,998000,999239,999880,nan
95669,191343,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95670,191344,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95671,191345,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95672,191346,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95673,191347,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Fineline을 전부 get dummies 한 다음, sum을 한다. (손님별 산 Fineline을 count가능)

In [91]:
train_FL = pd.get_dummies(train["FinelineNumber"])
test_FL = pd.get_dummies(test["FinelineNumber"])

train_FL = pd.concat([train[["VisitNumber"]], train_FL], axis=1)
test_FL = pd.concat([test[["VisitNumber"]], test_FL], axis=1)

train_FL = train_FL.groupby("VisitNumber", as_index=False).sum() # sum을 함
test_FL = test_FL.groupby("VisitNumber", as_index=False).sum()
train_FL.tail()

Unnamed: 0,VisitNumber,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,...,9912.0,9920.0,9928.0,9936.0,9944.0,9960.0,9968.0,9976.0,9992.0,10000.0
95669,191343,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95670,191344,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95671,191345,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95672,191346,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95673,191347,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Department Description으로 get dummies 한 다음 sum을 한다.

In [92]:
# Departmentdescription으로 더미변수 생성. 비짓 넘버와 결합한것이 train_dd
train_dd = pd.get_dummies(train["DepartmentDescription"])
test_dd = pd.get_dummies(test["DepartmentDescription"])

train_dd = pd.concat([train[["VisitNumber"]], train_dd], axis=1)
test_dd = pd.concat([test[["VisitNumber"]], test_dd], axis=1)

train_dd = train_dd.groupby("VisitNumber", as_index=False).sum() # sum을 함
test_dd = test_dd.groupby("VisitNumber", as_index=False).sum()

In [93]:

train_dd.tail()

Unnamed: 0,VisitNumber,1-HR PHOTO,ACCESSORIES,AUTOMOTIVE,BAKERY,BATH AND SHOWER,BEAUTY,BEDDING,BOOKS AND MAGAZINES,BOYS WEAR,...,SEAFOOD,SEASONAL,SERVICE DELI,SHEER HOSIERY,SHOES,SLEEPWEAR/FOUNDATIONS,SPORTING GOODS,SWIMWEAR/OUTERWEAR,TOYS,WIRELESS
95669,191343,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95670,191344,0,0,0,0,0,4,0,0,0,...,0,0,0,0,0,0,0,0,0,1
95671,191345,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95672,191346,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
95673,191347,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# 중분류한것을 바탕으로 DataFrame을 만든다

In [94]:
train_dd_big = pd.get_dummies(traindf["DD_big"])
test_dd_big = pd.get_dummies(testdf["DepartmentDescription"])

train_dd_big = pd.concat([traindf[["VisitNumber"]], train_dd_big], axis=1)
test_dd_big = pd.concat([testdf[["VisitNumber"]], test_dd_big], axis=1)

train_dd_big = train_dd_big.groupby("VisitNumber", as_index=False).sum()
test_dd_big = test_dd_big.groupby("VisitNumber", as_index=False).sum()

# scancount, pos_sum, neg_sum을 합친다.

In [95]:
train_by_sum = train[["VisitNumber", "ScanCount", "Pos_Sum", "Neg_Sum"]]
test_by_sum = test[["VisitNumber", "ScanCount", "Pos_Sum", "Neg_Sum"]]

train_by_sum = train_by_sum.groupby("VisitNumber", as_index=False).sum()
test_by_sum = test_by_sum.groupby("VisitNumber", as_index=False).sum()

train_by_max = train[["TripType", "VisitNumber", "Weekday", "Return"]]
test_by_max = test[["VisitNumber", "Weekday", "Return"]]

train_by_max = train_by_max.groupby("VisitNumber", as_index=False).max()
test_by_max = test_by_max.groupby("VisitNumber", as_index=False).max()

In [96]:
train_by_sum.tail() # 다 더함

Unnamed: 0,VisitNumber,ScanCount,Pos_Sum,Neg_Sum
95669,191343,9,9,0
95670,191344,5,5,0
95671,191345,17,17,0
95672,191346,17,17,0
95673,191347,2,2,0


In [97]:
train_by_max.tail() 

Unnamed: 0,VisitNumber,TripType,Weekday,Return
95669,191343,25,Sunday,0.0
95670,191344,22,Sunday,0.0
95671,191345,39,Sunday,0.0
95672,191346,39,Sunday,0.0
95673,191347,8,Sunday,0.0


# 위에서 만든 DataFrame을 합친다

In [102]:

train = train_by_sum.merge(train_by_max, on=["VisitNumber"])
train = train.merge(train_dd, on=["VisitNumber"])
train = train.merge(train_UPC, on=["VisitNumber"])
train = train.merge(train_dd_big, on=["VisitNumber"])
train = train.merge(train_FL, on=["VisitNumber"])
train = train.merge(train_UPC_dummy, on=["VisitNumber"])

test = test_by_sum.merge(test_by_max, on=["VisitNumber"])
test = test.merge(test_dd, on=["VisitNumber"])
test = test.merge(test_UPC, on=["VisitNumber"])
test = test.merge(test_dd_big, on=["VisitNumber"])
test = test.merge(test_FL, on=["VisitNumber"])
#test = test.merge(test_UPC_dummy, on=["VisitNumber"])

# 요일을 더미변수화 시켜준다.

In [103]:
train_weekday = pd.get_dummies(train["Weekday"])

In [104]:
test_weekday = pd.get_dummies(test["Weekday"])

In [105]:
train = train.drop(columns="Weekday")
test = test.drop(columns="Weekday")

In [106]:
train = pd.concat([train, train_weekday],axis = 1 )
test = pd.concat([test, test_weekday], axis = 1)

In [108]:
train.tail()

Unnamed: 0,VisitNumber,ScanCount,Pos_Sum,Neg_Sum,TripType,Return,1-HR PHOTO,ACCESSORIES,AUTOMOTIVE,BAKERY,...,999239,999880,nan,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
95669,191343,9,9,0,25,0.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
95670,191344,5,5,0,22,0.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
95671,191345,17,17,0,39,0.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
95672,191346,17,17,0,39,0.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
95673,191347,2,2,0,8,0.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [109]:
train.drop("VisitNumber", axis=1, inplace=True)
test.drop("VisitNumber", axis=1, inplace=True)

In [110]:

train.replace(np.inf, 0, inplace=True)
train.fillna(value=0, inplace=True)

test.replace(np.inf, 0, inplace=True)
test.fillna(value=0, inplace=True)

In [111]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

df = import_data("train.csv")

Memory usage of dataframe is 34.56 MB
Memory usage after optimization is: 9.26 MB
Decreased by 73.2%


# X는 DepartmentDescription 관련 비율을 측정한 테이블

In [112]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# 아래 코드
df_d_sum = df.groupby(['VisitNumber', 'DepartmentDescription'], as_index=False)['ScanCount'].agg('sum').sort_values(['VisitNumber', 'ScanCount', 'DepartmentDescription'], ascending=[1, 0, 1])
df_d_sum['abs_sum'] = df_d_sum.assign(abs=df_d_sum['ScanCount'].abs()).groupby(['VisitNumber'])['abs'].transform('sum')

# change 'abs_sum' for div
criteria = df_d_sum['abs_sum'] == 0
df_d_sum.loc[criteria, 'abs_sum'] = 999

# create ratio
df_d_sum['ratio'] = df_d_sum['ScanCount'] / df_d_sum['abs_sum']

# abs_sum 원복
criteria = df_d_sum['abs_sum'] == 999
df_d_sum.loc[criteria, 'abs_sum'] = 0

# Dept Na 였던 VisitNumber append
diff = set(df.VisitNumber) - set(df_d_sum.VisitNumber)
df_d_sum = df_d_sum.append(pd.DataFrame({'VisitNumber': list(diff)},)).fillna({'DepartmentDescription': 'Na',
                                                                          'ScanCount': 0,
                                                                         'ratio':0 })
# create pivot table
df_pivot = df_d_sum.pivot(index='VisitNumber', columns='DepartmentDescription', values='ratio').fillna(0)
df_pivot = pd.merge(df_pivot, df[['VisitNumber', 'TripType']].drop_duplicates(), on='VisitNumber')

# make training data
X = df_pivot.iloc[:, :-1]
y = df_pivot.iloc[:, -1]

# X1은 Fineline관련해서 비율을 측정한 테이블

In [113]:
df["FinelineNumber"] = df['FinelineNumber'].astype("str")
df_d_sum1 = df.groupby(['VisitNumber', 'FinelineNumber'], as_index=False)['ScanCount'].agg('sum').sort_values(['VisitNumber', 'ScanCount', 'FinelineNumber'], ascending=[1, 0, 1])
df_d_sum1['abs_sum'] = df_d_sum1.assign(abs=df_d_sum1['ScanCount'].abs()).groupby(['VisitNumber'])['abs'].transform('sum')

# change 'abs_sum' for div
criteria1 = df_d_sum1['abs_sum'] == 0
df_d_sum1.loc[criteria1, 'abs_sum'] = 999

# create ratio
df_d_sum1['ratio'] = df_d_sum1['ScanCount'] / df_d_sum1['abs_sum']

# abs_sum 원복
criteria1 = df_d_sum1['abs_sum'] == 999
df_d_sum1.loc[criteria1, 'abs_sum'] = 0

# Dept Na 였던 VisitNumber append
diff1 = set(df.VisitNumber) - set(df_d_sum1.VisitNumber)
df_d_sum1 = df_d_sum1.append(pd.DataFrame({'VisitNumber': list(diff)},)).fillna({'FinelineNumber': 'Na',
                                                                          'ScanCount': 0,
                                                                         'ratio':0 })
# create pivot table
df_pivot1 = df_d_sum1.pivot(index='VisitNumber', columns='FinelineNumber', values='ratio').fillna(0)
df_pivot1 = pd.merge(df_pivot1, df[['VisitNumber', 'TripType']].drop_duplicates(), on='VisitNumber')

# make training data
X1 = df_pivot1.iloc[:, :-1]
y1 = df_pivot1.iloc[:, -1]

In [114]:
X1.head()

Unnamed: 0,VisitNumber,0.0,1.0,10.0,100.0,1000.0,10000.0,1001.0,1002.0,1003.0,...,9930.0,9940.0,9944.0,995.0,9960.0,9970.0,9976.0,998.0,9990.0,nan
0,5.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.107143,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714
3,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [115]:
X.head()

Unnamed: 0,VisitNumber,1-HR PHOTO,ACCESSORIES,AUTOMOTIVE,BAKERY,BATH AND SHOWER,BEAUTY,BEDDING,BOOKS AND MAGAZINES,BOYS WEAR,...,SEAFOOD,SEASONAL,SERVICE DELI,SHEER HOSIERY,SHOES,SLEEPWEAR/FOUNDATIONS,SPORTING GOODS,SWIMWEAR/OUTERWEAR,TOYS,WIRELESS
0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0
2,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# train과 X, X1을 합쳐준다

In [120]:
newtrain = X1.merge(X, on=["VisitNumber"])
newtrain.drop(columns="VisitNumber")
newtrain = pd.concat([newtrain, train],axis = 1 )
newtrain= newtrain.drop(columns="VisitNumber")

In [121]:
newtrain.head()

Unnamed: 0,0.0,1.0,10.0,100.0,1000.0,10000.0,1001.0,1002.0,1003.0,1004.0,...,999239,999880,nan,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.107143,0.0,0.0,0.0,...,0,0,1,1,0,0,0,0,0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0


# lightgbm으로 성능측정

In [122]:
from sklearn.preprocessing import LabelEncoder

In [123]:
label_enc = LabelEncoder().fit(y)
y_labeled = label_enc.transform(y)


In [124]:

X_train, X_test, y_train, y_test = train_test_split(
    newtrain, y_labeled, random_state=0)

In [125]:
import lightgbm

In [126]:
dtrain = lightgbm.Dataset(X_train, label=y_train)

In [127]:
dtest = lightgbm.Dataset(X_test, label=y_test)

In [128]:
num_boost_round = 4000
learning_rate=0.02

params = {'objective':'multiclass',
          'boosting_type': 'gbdt',
          'max_depth' : -1,
          'nthread': 4,
          'metric': 'multi_logloss',
          'num_class':38,
          'learning_rate':learning_rate,
          }


In [129]:
lightgbm_model = lightgbm.train(params = params,
                                train_set = dtrain, # traindata로 학습시킴
                                valid_sets = [dtrain, dtest],
                                num_boost_round = num_boost_round,
                                early_stopping_rounds=10)

[1]	training's multi_logloss: 2.73248	valid_1's multi_logloss: 2.73301
Training until validation scores don't improve for 10 rounds.
[2]	training's multi_logloss: 2.55459	valid_1's multi_logloss: 2.55776
[3]	training's multi_logloss: 2.41597	valid_1's multi_logloss: 2.42017
[4]	training's multi_logloss: 2.2986	valid_1's multi_logloss: 2.30345
[5]	training's multi_logloss: 2.19622	valid_1's multi_logloss: 2.20164
[6]	training's multi_logloss: 2.10503	valid_1's multi_logloss: 2.11088
[7]	training's multi_logloss: 2.0227	valid_1's multi_logloss: 2.02898
[8]	training's multi_logloss: 1.94696	valid_1's multi_logloss: 1.95344
[9]	training's multi_logloss: 1.87738	valid_1's multi_logloss: 1.88418
[10]	training's multi_logloss: 1.81287	valid_1's multi_logloss: 1.8199
[11]	training's multi_logloss: 1.75256	valid_1's multi_logloss: 1.75974
[12]	training's multi_logloss: 1.69611	valid_1's multi_logloss: 1.70338
[13]	training's multi_logloss: 1.64301	valid_1's multi_logloss: 1.65034
[14]	training'

[112]	training's multi_logloss: 0.182822	valid_1's multi_logloss: 0.189158
[113]	training's multi_logloss: 0.179172	valid_1's multi_logloss: 0.185478
[114]	training's multi_logloss: 0.175595	valid_1's multi_logloss: 0.181871
[115]	training's multi_logloss: 0.172091	valid_1's multi_logloss: 0.178338
[116]	training's multi_logloss: 0.168658	valid_1's multi_logloss: 0.174877
[117]	training's multi_logloss: 0.165294	valid_1's multi_logloss: 0.171485
[118]	training's multi_logloss: 0.161999	valid_1's multi_logloss: 0.168162
[119]	training's multi_logloss: 0.158769	valid_1's multi_logloss: 0.164906
[120]	training's multi_logloss: 0.155605	valid_1's multi_logloss: 0.161712
[121]	training's multi_logloss: 0.152504	valid_1's multi_logloss: 0.158586
[122]	training's multi_logloss: 0.149466	valid_1's multi_logloss: 0.155521
[123]	training's multi_logloss: 0.14649	valid_1's multi_logloss: 0.152517
[124]	training's multi_logloss: 0.143573	valid_1's multi_logloss: 0.149574
[125]	training's multi_log

[220]	training's multi_logloss: 0.0210499	valid_1's multi_logloss: 0.0249893
[221]	training's multi_logloss: 0.020634	valid_1's multi_logloss: 0.0245602
[222]	training's multi_logloss: 0.0202264	valid_1's multi_logloss: 0.0241376
[223]	training's multi_logloss: 0.0198268	valid_1's multi_logloss: 0.0237244
[224]	training's multi_logloss: 0.0194349	valid_1's multi_logloss: 0.0233191
[225]	training's multi_logloss: 0.0190509	valid_1's multi_logloss: 0.022922
[226]	training's multi_logloss: 0.0186746	valid_1's multi_logloss: 0.0225304
[227]	training's multi_logloss: 0.0183057	valid_1's multi_logloss: 0.0221486
[228]	training's multi_logloss: 0.0179439	valid_1's multi_logloss: 0.021773
[229]	training's multi_logloss: 0.0175894	valid_1's multi_logloss: 0.0214043
[230]	training's multi_logloss: 0.017242	valid_1's multi_logloss: 0.0210448
[231]	training's multi_logloss: 0.0169013	valid_1's multi_logloss: 0.0206912
[232]	training's multi_logloss: 0.0165675	valid_1's multi_logloss: 0.020346
[233

[326]	training's multi_logloss: 0.00254381	valid_1's multi_logloss: 0.00534912
[327]	training's multi_logloss: 0.0024935	valid_1's multi_logloss: 0.00529131
[328]	training's multi_logloss: 0.00244422	valid_1's multi_logloss: 0.00523488
[329]	training's multi_logloss: 0.00239589	valid_1's multi_logloss: 0.00517883
[330]	training's multi_logloss: 0.00234851	valid_1's multi_logloss: 0.00512438
[331]	training's multi_logloss: 0.00230209	valid_1's multi_logloss: 0.00507003
[332]	training's multi_logloss: 0.00225657	valid_1's multi_logloss: 0.00501765
[333]	training's multi_logloss: 0.00221194	valid_1's multi_logloss: 0.00496554
[334]	training's multi_logloss: 0.00216823	valid_1's multi_logloss: 0.00491475
[335]	training's multi_logloss: 0.00212536	valid_1's multi_logloss: 0.00486462
[336]	training's multi_logloss: 0.00208334	valid_1's multi_logloss: 0.00481602
[337]	training's multi_logloss: 0.00204216	valid_1's multi_logloss: 0.0047673
[338]	training's multi_logloss: 0.00200178	valid_1's m

[430]	training's multi_logloss: 0.000319621	valid_1's multi_logloss: 0.00261529
[431]	training's multi_logloss: 0.000313314	valid_1's multi_logloss: 0.0026045
[432]	training's multi_logloss: 0.000307125	valid_1's multi_logloss: 0.0025951
[433]	training's multi_logloss: 0.000301058	valid_1's multi_logloss: 0.00258451
[434]	training's multi_logloss: 0.00029512	valid_1's multi_logloss: 0.00257729
[435]	training's multi_logloss: 0.00028929	valid_1's multi_logloss: 0.00256699
[436]	training's multi_logloss: 0.000283577	valid_1's multi_logloss: 0.00255822
[437]	training's multi_logloss: 0.00027798	valid_1's multi_logloss: 0.00255063
[438]	training's multi_logloss: 0.00027249	valid_1's multi_logloss: 0.00254215
[439]	training's multi_logloss: 0.000267111	valid_1's multi_logloss: 0.00253247
[440]	training's multi_logloss: 0.000261838	valid_1's multi_logloss: 0.00252381
[441]	training's multi_logloss: 0.000256667	valid_1's multi_logloss: 0.00251439
[442]	training's multi_logloss: 0.000251601	va

[533]	training's multi_logloss: 4.11732e-05	valid_1's multi_logloss: 0.0020688
[534]	training's multi_logloss: 4.03651e-05	valid_1's multi_logloss: 0.00206275
[535]	training's multi_logloss: 3.95725e-05	valid_1's multi_logloss: 0.00205666
[536]	training's multi_logloss: 3.87962e-05	valid_1's multi_logloss: 0.00205071
[537]	training's multi_logloss: 3.8036e-05	valid_1's multi_logloss: 0.00204472
[538]	training's multi_logloss: 3.72904e-05	valid_1's multi_logloss: 0.00203884
[539]	training's multi_logloss: 3.65587e-05	valid_1's multi_logloss: 0.00203294
[540]	training's multi_logloss: 3.58418e-05	valid_1's multi_logloss: 0.0020284
[541]	training's multi_logloss: 3.5139e-05	valid_1's multi_logloss: 0.00202264
[542]	training's multi_logloss: 3.44506e-05	valid_1's multi_logloss: 0.00201743
[543]	training's multi_logloss: 3.37754e-05	valid_1's multi_logloss: 0.00201176
[544]	training's multi_logloss: 3.31132e-05	valid_1's multi_logloss: 0.00200604
[545]	training's multi_logloss: 3.24642e-05	

[636]	training's multi_logloss: 5.46632e-06	valid_1's multi_logloss: 0.00165539
[637]	training's multi_logloss: 5.36194e-06	valid_1's multi_logloss: 0.00165316
[638]	training's multi_logloss: 5.25993e-06	valid_1's multi_logloss: 0.00165043
[639]	training's multi_logloss: 5.15963e-06	valid_1's multi_logloss: 0.00164825
[640]	training's multi_logloss: 5.06129e-06	valid_1's multi_logloss: 0.00164609
[641]	training's multi_logloss: 4.96514e-06	valid_1's multi_logloss: 0.00164339
[642]	training's multi_logloss: 4.87065e-06	valid_1's multi_logloss: 0.00164129
[643]	training's multi_logloss: 4.77803e-06	valid_1's multi_logloss: 0.00163922
[644]	training's multi_logloss: 4.68739e-06	valid_1's multi_logloss: 0.00163664
[645]	training's multi_logloss: 4.59837e-06	valid_1's multi_logloss: 0.00163463
[646]	training's multi_logloss: 4.51124e-06	valid_1's multi_logloss: 0.00163204
[647]	training's multi_logloss: 4.42565e-06	valid_1's multi_logloss: 0.00163008
[648]	training's multi_logloss: 4.34172e

[739]	training's multi_logloss: 8.61484e-07	valid_1's multi_logloss: 0.00152377
[740]	training's multi_logloss: 8.48502e-07	valid_1's multi_logloss: 0.00152317
[741]	training's multi_logloss: 8.35858e-07	valid_1's multi_logloss: 0.00152264
[742]	training's multi_logloss: 8.2345e-07	valid_1's multi_logloss: 0.00152208
[743]	training's multi_logloss: 8.11261e-07	valid_1's multi_logloss: 0.00152157
[744]	training's multi_logloss: 7.99294e-07	valid_1's multi_logloss: 0.00152103
[745]	training's multi_logloss: 7.87539e-07	valid_1's multi_logloss: 0.00152054
[746]	training's multi_logloss: 7.76014e-07	valid_1's multi_logloss: 0.00151978
[747]	training's multi_logloss: 7.64677e-07	valid_1's multi_logloss: 0.00151928
[748]	training's multi_logloss: 7.53728e-07	valid_1's multi_logloss: 0.0015189
[749]	training's multi_logloss: 7.42959e-07	valid_1's multi_logloss: 0.0015188
[750]	training's multi_logloss: 7.324e-07	valid_1's multi_logloss: 0.00151879
[751]	training's multi_logloss: 7.22168e-07	v

[842]	training's multi_logloss: 3.08059e-07	valid_1's multi_logloss: 0.00149982
[843]	training's multi_logloss: 3.06391e-07	valid_1's multi_logloss: 0.00149952
[844]	training's multi_logloss: 3.04747e-07	valid_1's multi_logloss: 0.00149946
[845]	training's multi_logloss: 3.03125e-07	valid_1's multi_logloss: 0.00149915
[846]	training's multi_logloss: 3.01546e-07	valid_1's multi_logloss: 0.00149876
[847]	training's multi_logloss: 2.99969e-07	valid_1's multi_logloss: 0.00149855
[848]	training's multi_logloss: 2.98513e-07	valid_1's multi_logloss: 0.0014985
[849]	training's multi_logloss: 2.9713e-07	valid_1's multi_logloss: 0.00149823
[850]	training's multi_logloss: 2.95767e-07	valid_1's multi_logloss: 0.00149819
[851]	training's multi_logloss: 2.94423e-07	valid_1's multi_logloss: 0.001498
[852]	training's multi_logloss: 2.93116e-07	valid_1's multi_logloss: 0.00149765
[853]	training's multi_logloss: 2.91809e-07	valid_1's multi_logloss: 0.00149762
[854]	training's multi_logloss: 2.9052e-07	v

[945]	training's multi_logloss: 2.42413e-07	valid_1's multi_logloss: 0.0014862
[946]	training's multi_logloss: 2.42304e-07	valid_1's multi_logloss: 0.00148599
[947]	training's multi_logloss: 2.42198e-07	valid_1's multi_logloss: 0.00148596
[948]	training's multi_logloss: 2.42091e-07	valid_1's multi_logloss: 0.00148601
[949]	training's multi_logloss: 2.41988e-07	valid_1's multi_logloss: 0.00148581
[950]	training's multi_logloss: 2.41885e-07	valid_1's multi_logloss: 0.00148558
[951]	training's multi_logloss: 2.41784e-07	valid_1's multi_logloss: 0.00148542
[952]	training's multi_logloss: 2.41682e-07	valid_1's multi_logloss: 0.00148547
[953]	training's multi_logloss: 2.41584e-07	valid_1's multi_logloss: 0.00148541
[954]	training's multi_logloss: 2.41486e-07	valid_1's multi_logloss: 0.00148525
[955]	training's multi_logloss: 2.41388e-07	valid_1's multi_logloss: 0.00148502
[956]	training's multi_logloss: 2.41293e-07	valid_1's multi_logloss: 0.00148484
[957]	training's multi_logloss: 2.41197e-

[1048]	training's multi_logloss: 2.3546e-07	valid_1's multi_logloss: 0.00147512
[1049]	training's multi_logloss: 2.35422e-07	valid_1's multi_logloss: 0.00147511
[1050]	training's multi_logloss: 2.35384e-07	valid_1's multi_logloss: 0.00147518
[1051]	training's multi_logloss: 2.35344e-07	valid_1's multi_logloss: 0.00147504
[1052]	training's multi_logloss: 2.35303e-07	valid_1's multi_logloss: 0.00147501
[1053]	training's multi_logloss: 2.35266e-07	valid_1's multi_logloss: 0.00147508
[1054]	training's multi_logloss: 2.35228e-07	valid_1's multi_logloss: 0.00147493
[1055]	training's multi_logloss: 2.35191e-07	valid_1's multi_logloss: 0.00147501
[1056]	training's multi_logloss: 2.3515e-07	valid_1's multi_logloss: 0.00147486
[1057]	training's multi_logloss: 2.35114e-07	valid_1's multi_logloss: 0.00147485
[1058]	training's multi_logloss: 2.35079e-07	valid_1's multi_logloss: 0.00147491
[1059]	training's multi_logloss: 2.35041e-07	valid_1's multi_logloss: 0.00147477
[1060]	training's multi_loglos

[1150]	training's multi_logloss: 2.32598e-07	valid_1's multi_logloss: 0.00147376
[1151]	training's multi_logloss: 2.32578e-07	valid_1's multi_logloss: 0.00147366
[1152]	training's multi_logloss: 2.32559e-07	valid_1's multi_logloss: 0.00147372
[1153]	training's multi_logloss: 2.32538e-07	valid_1's multi_logloss: 0.00147371
[1154]	training's multi_logloss: 2.32519e-07	valid_1's multi_logloss: 0.00147361
[1155]	training's multi_logloss: 2.325e-07	valid_1's multi_logloss: 0.00147368
[1156]	training's multi_logloss: 2.3248e-07	valid_1's multi_logloss: 0.00147366
[1157]	training's multi_logloss: 2.32461e-07	valid_1's multi_logloss: 0.00147374
[1158]	training's multi_logloss: 2.32442e-07	valid_1's multi_logloss: 0.00147363
[1159]	training's multi_logloss: 2.32424e-07	valid_1's multi_logloss: 0.00147362
[1160]	training's multi_logloss: 2.32405e-07	valid_1's multi_logloss: 0.00147369
[1161]	training's multi_logloss: 2.32386e-07	valid_1's multi_logloss: 0.00147359
[1162]	training's multi_logloss

[1252]	training's multi_logloss: 2.31037e-07	valid_1's multi_logloss: 0.0014724
[1253]	training's multi_logloss: 2.31025e-07	valid_1's multi_logloss: 0.00147244
[1254]	training's multi_logloss: 2.31014e-07	valid_1's multi_logloss: 0.00147236
[1255]	training's multi_logloss: 2.31002e-07	valid_1's multi_logloss: 0.00147229
[1256]	training's multi_logloss: 2.3099e-07	valid_1's multi_logloss: 0.00147235
[1257]	training's multi_logloss: 2.30978e-07	valid_1's multi_logloss: 0.00147226
[1258]	training's multi_logloss: 2.30967e-07	valid_1's multi_logloss: 0.00147218
[1259]	training's multi_logloss: 2.30956e-07	valid_1's multi_logloss: 0.00147224
[1260]	training's multi_logloss: 2.30944e-07	valid_1's multi_logloss: 0.00147216
[1261]	training's multi_logloss: 2.30932e-07	valid_1's multi_logloss: 0.00147208
[1262]	training's multi_logloss: 2.30922e-07	valid_1's multi_logloss: 0.00147201
[1263]	training's multi_logloss: 2.3091e-07	valid_1's multi_logloss: 0.00147195
[1264]	training's multi_logloss

[1354]	training's multi_logloss: 2.30067e-07	valid_1's multi_logloss: 0.00146946
[1355]	training's multi_logloss: 2.3006e-07	valid_1's multi_logloss: 0.00146951
[1356]	training's multi_logloss: 2.30052e-07	valid_1's multi_logloss: 0.00146951
[1357]	training's multi_logloss: 2.30045e-07	valid_1's multi_logloss: 0.00146944
[1358]	training's multi_logloss: 2.30036e-07	valid_1's multi_logloss: 0.0014695
[1359]	training's multi_logloss: 2.30029e-07	valid_1's multi_logloss: 0.00146944
[1360]	training's multi_logloss: 2.30021e-07	valid_1's multi_logloss: 0.00146938
[1361]	training's multi_logloss: 2.30014e-07	valid_1's multi_logloss: 0.00146942
[1362]	training's multi_logloss: 2.30007e-07	valid_1's multi_logloss: 0.00146936
[1363]	training's multi_logloss: 2.3e-07	valid_1's multi_logloss: 0.0014693
[1364]	training's multi_logloss: 2.29993e-07	valid_1's multi_logloss: 0.00146934
[1365]	training's multi_logloss: 2.29985e-07	valid_1's multi_logloss: 0.00146928
[1366]	training's multi_logloss: 2.

[1456]	training's multi_logloss: 2.29377e-07	valid_1's multi_logloss: 0.00146734
[1457]	training's multi_logloss: 2.29372e-07	valid_1's multi_logloss: 0.00146729
[1458]	training's multi_logloss: 2.29367e-07	valid_1's multi_logloss: 0.00146723
[1459]	training's multi_logloss: 2.29362e-07	valid_1's multi_logloss: 0.00146728
[1460]	training's multi_logloss: 2.29357e-07	valid_1's multi_logloss: 0.00146722
[1461]	training's multi_logloss: 2.29352e-07	valid_1's multi_logloss: 0.00146717
[1462]	training's multi_logloss: 2.29347e-07	valid_1's multi_logloss: 0.00146721
[1463]	training's multi_logloss: 2.29341e-07	valid_1's multi_logloss: 0.00146716
[1464]	training's multi_logloss: 2.29336e-07	valid_1's multi_logloss: 0.0014671
[1465]	training's multi_logloss: 2.29331e-07	valid_1's multi_logloss: 0.00146711
[1466]	training's multi_logloss: 2.29326e-07	valid_1's multi_logloss: 0.00146714
[1467]	training's multi_logloss: 2.29321e-07	valid_1's multi_logloss: 0.00146709
[1468]	training's multi_loglo

[1558]	training's multi_logloss: 2.28868e-07	valid_1's multi_logloss: 0.00146537
[1559]	training's multi_logloss: 2.28865e-07	valid_1's multi_logloss: 0.0014654
[1560]	training's multi_logloss: 2.28861e-07	valid_1's multi_logloss: 0.00146536
[1561]	training's multi_logloss: 2.28857e-07	valid_1's multi_logloss: 0.00146531
[1562]	training's multi_logloss: 2.28854e-07	valid_1's multi_logloss: 0.00146526
[1563]	training's multi_logloss: 2.2885e-07	valid_1's multi_logloss: 0.0014653
[1564]	training's multi_logloss: 2.28846e-07	valid_1's multi_logloss: 0.00146526
[1565]	training's multi_logloss: 2.28843e-07	valid_1's multi_logloss: 0.00146521
[1566]	training's multi_logloss: 2.28839e-07	valid_1's multi_logloss: 0.00146525
[1567]	training's multi_logloss: 2.28836e-07	valid_1's multi_logloss: 0.00146525
[1568]	training's multi_logloss: 2.28831e-07	valid_1's multi_logloss: 0.0014652
[1569]	training's multi_logloss: 2.28828e-07	valid_1's multi_logloss: 0.00146515
[1570]	training's multi_logloss:

[1660]	training's multi_logloss: 2.28508e-07	valid_1's multi_logloss: 0.00146382
[1661]	training's multi_logloss: 2.28505e-07	valid_1's multi_logloss: 0.00146382
[1662]	training's multi_logloss: 2.28503e-07	valid_1's multi_logloss: 0.00146379
[1663]	training's multi_logloss: 2.285e-07	valid_1's multi_logloss: 0.00146378
[1664]	training's multi_logloss: 2.28498e-07	valid_1's multi_logloss: 0.00146374
[1665]	training's multi_logloss: 2.28495e-07	valid_1's multi_logloss: 0.00146374
[1666]	training's multi_logloss: 2.28493e-07	valid_1's multi_logloss: 0.00146371
[1667]	training's multi_logloss: 2.2849e-07	valid_1's multi_logloss: 0.0014637
[1668]	training's multi_logloss: 2.28488e-07	valid_1's multi_logloss: 0.00146374
[1669]	training's multi_logloss: 2.28485e-07	valid_1's multi_logloss: 0.0014637
[1670]	training's multi_logloss: 2.28483e-07	valid_1's multi_logloss: 0.00146366
[1671]	training's multi_logloss: 2.2848e-07	valid_1's multi_logloss: 0.00146366
[1672]	training's multi_logloss: 2

[1762]	training's multi_logloss: 2.28281e-07	valid_1's multi_logloss: 0.00146216
[1763]	training's multi_logloss: 2.28279e-07	valid_1's multi_logloss: 0.00146213
[1764]	training's multi_logloss: 2.28277e-07	valid_1's multi_logloss: 0.00146213
[1765]	training's multi_logloss: 2.28276e-07	valid_1's multi_logloss: 0.00146216
[1766]	training's multi_logloss: 2.28274e-07	valid_1's multi_logloss: 0.00146212
[1767]	training's multi_logloss: 2.28272e-07	valid_1's multi_logloss: 0.00146209
[1768]	training's multi_logloss: 2.2827e-07	valid_1's multi_logloss: 0.00146209
[1769]	training's multi_logloss: 2.28268e-07	valid_1's multi_logloss: 0.00146205
[1770]	training's multi_logloss: 2.28266e-07	valid_1's multi_logloss: 0.00146204
[1771]	training's multi_logloss: 2.28264e-07	valid_1's multi_logloss: 0.00146201
[1772]	training's multi_logloss: 2.28262e-07	valid_1's multi_logloss: 0.00146201
[1773]	training's multi_logloss: 2.2826e-07	valid_1's multi_logloss: 0.00146197
[1774]	training's multi_loglos

[1864]	training's multi_logloss: 2.28069e-07	valid_1's multi_logloss: 0.00146063
[1865]	training's multi_logloss: 2.28068e-07	valid_1's multi_logloss: 0.00146063
[1866]	training's multi_logloss: 2.28066e-07	valid_1's multi_logloss: 0.00146063
[1867]	training's multi_logloss: 2.28065e-07	valid_1's multi_logloss: 0.0014606
[1868]	training's multi_logloss: 2.28063e-07	valid_1's multi_logloss: 0.0014606
[1869]	training's multi_logloss: 2.28062e-07	valid_1's multi_logloss: 0.00146056
[1870]	training's multi_logloss: 2.2806e-07	valid_1's multi_logloss: 0.00146056
[1871]	training's multi_logloss: 2.28059e-07	valid_1's multi_logloss: 0.00146059
[1872]	training's multi_logloss: 2.28057e-07	valid_1's multi_logloss: 0.00146055
[1873]	training's multi_logloss: 2.28056e-07	valid_1's multi_logloss: 0.00146052
[1874]	training's multi_logloss: 2.28054e-07	valid_1's multi_logloss: 0.00146052
[1875]	training's multi_logloss: 2.28053e-07	valid_1's multi_logloss: 0.00146049
[1876]	training's multi_logloss

[1966]	training's multi_logloss: 2.27935e-07	valid_1's multi_logloss: 0.00145922
[1967]	training's multi_logloss: 2.27934e-07	valid_1's multi_logloss: 0.00145919
[1968]	training's multi_logloss: 2.27933e-07	valid_1's multi_logloss: 0.00145919
[1969]	training's multi_logloss: 2.27932e-07	valid_1's multi_logloss: 0.00145915
[1970]	training's multi_logloss: 2.2793e-07	valid_1's multi_logloss: 0.00145915
[1971]	training's multi_logloss: 2.27929e-07	valid_1's multi_logloss: 0.00145912
[1972]	training's multi_logloss: 2.27928e-07	valid_1's multi_logloss: 0.00145911
[1973]	training's multi_logloss: 2.27927e-07	valid_1's multi_logloss: 0.00145908
[1974]	training's multi_logloss: 2.27926e-07	valid_1's multi_logloss: 0.00145911
[1975]	training's multi_logloss: 2.27925e-07	valid_1's multi_logloss: 0.00145908
[1976]	training's multi_logloss: 2.27924e-07	valid_1's multi_logloss: 0.00145908
[1977]	training's multi_logloss: 2.27922e-07	valid_1's multi_logloss: 0.00145905
[1978]	training's multi_loglo

[2068]	training's multi_logloss: 2.27826e-07	valid_1's multi_logloss: 0.00145784
[2069]	training's multi_logloss: 2.27825e-07	valid_1's multi_logloss: 0.00145784
[2070]	training's multi_logloss: 2.27824e-07	valid_1's multi_logloss: 0.00145781
[2071]	training's multi_logloss: 2.27823e-07	valid_1's multi_logloss: 0.0014578
[2072]	training's multi_logloss: 2.27822e-07	valid_1's multi_logloss: 0.00145777
[2073]	training's multi_logloss: 2.27821e-07	valid_1's multi_logloss: 0.00145778
[2074]	training's multi_logloss: 2.2782e-07	valid_1's multi_logloss: 0.00145778
[2075]	training's multi_logloss: 2.27819e-07	valid_1's multi_logloss: 0.00145774
[2076]	training's multi_logloss: 2.27818e-07	valid_1's multi_logloss: 0.00145774
[2077]	training's multi_logloss: 2.27817e-07	valid_1's multi_logloss: 0.00145777
[2078]	training's multi_logloss: 2.27816e-07	valid_1's multi_logloss: 0.00145774
[2079]	training's multi_logloss: 2.27815e-07	valid_1's multi_logloss: 0.00145774
[2080]	training's multi_loglos

[2170]	training's multi_logloss: 2.27736e-07	valid_1's multi_logloss: 0.00145653
[2171]	training's multi_logloss: 2.27735e-07	valid_1's multi_logloss: 0.00145653
[2172]	training's multi_logloss: 2.27734e-07	valid_1's multi_logloss: 0.0014565
[2173]	training's multi_logloss: 2.27734e-07	valid_1's multi_logloss: 0.00145647
[2174]	training's multi_logloss: 2.27733e-07	valid_1's multi_logloss: 0.00145649
[2175]	training's multi_logloss: 2.27732e-07	valid_1's multi_logloss: 0.00145649
[2176]	training's multi_logloss: 2.27731e-07	valid_1's multi_logloss: 0.00145646
[2177]	training's multi_logloss: 2.2773e-07	valid_1's multi_logloss: 0.00145646
[2178]	training's multi_logloss: 2.2773e-07	valid_1's multi_logloss: 0.00145644
[2179]	training's multi_logloss: 2.27729e-07	valid_1's multi_logloss: 0.00145643
[2180]	training's multi_logloss: 2.27728e-07	valid_1's multi_logloss: 0.0014564
[2181]	training's multi_logloss: 2.27727e-07	valid_1's multi_logloss: 0.0014564
[2182]	training's multi_logloss: 

[2272]	training's multi_logloss: 2.27661e-07	valid_1's multi_logloss: 0.00145528
[2273]	training's multi_logloss: 2.2766e-07	valid_1's multi_logloss: 0.00145525
[2274]	training's multi_logloss: 2.2766e-07	valid_1's multi_logloss: 0.00145525
[2275]	training's multi_logloss: 2.27659e-07	valid_1's multi_logloss: 0.00145525
[2276]	training's multi_logloss: 2.27658e-07	valid_1's multi_logloss: 0.00145522
[2277]	training's multi_logloss: 2.27658e-07	valid_1's multi_logloss: 0.00145524
[2278]	training's multi_logloss: 2.27657e-07	valid_1's multi_logloss: 0.00145522
[2279]	training's multi_logloss: 2.27656e-07	valid_1's multi_logloss: 0.00145521
[2280]	training's multi_logloss: 2.27656e-07	valid_1's multi_logloss: 0.00145519
[2281]	training's multi_logloss: 2.27655e-07	valid_1's multi_logloss: 0.00145518
[2282]	training's multi_logloss: 2.27654e-07	valid_1's multi_logloss: 0.00145516
[2283]	training's multi_logloss: 2.27654e-07	valid_1's multi_logloss: 0.00145516
[2284]	training's multi_loglos

[2374]	training's multi_logloss: 2.27598e-07	valid_1's multi_logloss: 0.00145406
[2375]	training's multi_logloss: 2.27598e-07	valid_1's multi_logloss: 0.00145406
[2376]	training's multi_logloss: 2.27597e-07	valid_1's multi_logloss: 0.00145408
[2377]	training's multi_logloss: 2.27597e-07	valid_1's multi_logloss: 0.00145406
[2378]	training's multi_logloss: 2.27596e-07	valid_1's multi_logloss: 0.00145403
[2379]	training's multi_logloss: 2.27595e-07	valid_1's multi_logloss: 0.00145403
[2380]	training's multi_logloss: 2.27595e-07	valid_1's multi_logloss: 0.001454
[2381]	training's multi_logloss: 2.27594e-07	valid_1's multi_logloss: 0.001454
[2382]	training's multi_logloss: 2.27594e-07	valid_1's multi_logloss: 0.00145397
[2383]	training's multi_logloss: 2.27593e-07	valid_1's multi_logloss: 0.00145397
[2384]	training's multi_logloss: 2.27593e-07	valid_1's multi_logloss: 0.00145395
[2385]	training's multi_logloss: 2.27592e-07	valid_1's multi_logloss: 0.00145395
[2386]	training's multi_logloss:

[2476]	training's multi_logloss: 2.27503e-07	valid_1's multi_logloss: 0.00145296
[2477]	training's multi_logloss: 2.27502e-07	valid_1's multi_logloss: 0.00145294
[2478]	training's multi_logloss: 2.27502e-07	valid_1's multi_logloss: 0.00145294
[2479]	training's multi_logloss: 2.27502e-07	valid_1's multi_logloss: 0.00145291
[2480]	training's multi_logloss: 2.27501e-07	valid_1's multi_logloss: 0.00145291
[2481]	training's multi_logloss: 2.275e-07	valid_1's multi_logloss: 0.00145289
[2482]	training's multi_logloss: 2.275e-07	valid_1's multi_logloss: 0.00145286
[2483]	training's multi_logloss: 2.275e-07	valid_1's multi_logloss: 0.00145286
[2484]	training's multi_logloss: 2.27499e-07	valid_1's multi_logloss: 0.00145288
[2485]	training's multi_logloss: 2.27499e-07	valid_1's multi_logloss: 0.00145286
[2486]	training's multi_logloss: 2.27498e-07	valid_1's multi_logloss: 0.00145286
[2487]	training's multi_logloss: 2.27498e-07	valid_1's multi_logloss: 0.00145283
[2488]	training's multi_logloss: 2

[2578]	training's multi_logloss: 2.27458e-07	valid_1's multi_logloss: 0.00145187
[2579]	training's multi_logloss: 2.27457e-07	valid_1's multi_logloss: 0.00145184
[2580]	training's multi_logloss: 2.27457e-07	valid_1's multi_logloss: 0.00145184
[2581]	training's multi_logloss: 2.27457e-07	valid_1's multi_logloss: 0.00145182
[2582]	training's multi_logloss: 2.27456e-07	valid_1's multi_logloss: 0.00145182
[2583]	training's multi_logloss: 2.27456e-07	valid_1's multi_logloss: 0.00145184
[2584]	training's multi_logloss: 2.27455e-07	valid_1's multi_logloss: 0.00145181
[2585]	training's multi_logloss: 2.27455e-07	valid_1's multi_logloss: 0.00145179
[2586]	training's multi_logloss: 2.27455e-07	valid_1's multi_logloss: 0.00145179
[2587]	training's multi_logloss: 2.27454e-07	valid_1's multi_logloss: 0.00145176
[2588]	training's multi_logloss: 2.27454e-07	valid_1's multi_logloss: 0.00145176
[2589]	training's multi_logloss: 2.27453e-07	valid_1's multi_logloss: 0.00145174
[2590]	training's multi_logl

[2680]	training's multi_logloss: 2.27419e-07	valid_1's multi_logloss: 0.00145079
[2681]	training's multi_logloss: 2.27419e-07	valid_1's multi_logloss: 0.00145079
[2682]	training's multi_logloss: 2.27419e-07	valid_1's multi_logloss: 0.00145077
[2683]	training's multi_logloss: 2.27418e-07	valid_1's multi_logloss: 0.00145079
[2684]	training's multi_logloss: 2.27418e-07	valid_1's multi_logloss: 0.00145078
[2685]	training's multi_logloss: 2.27418e-07	valid_1's multi_logloss: 0.00145076
[2686]	training's multi_logloss: 2.27417e-07	valid_1's multi_logloss: 0.00145074
[2687]	training's multi_logloss: 2.27417e-07	valid_1's multi_logloss: 0.00145074
[2688]	training's multi_logloss: 2.27416e-07	valid_1's multi_logloss: 0.00145074
[2689]	training's multi_logloss: 2.27416e-07	valid_1's multi_logloss: 0.00145072
[2690]	training's multi_logloss: 2.27416e-07	valid_1's multi_logloss: 0.00145072
[2691]	training's multi_logloss: 2.27415e-07	valid_1's multi_logloss: 0.00145069
[2692]	training's multi_logl

[2782]	training's multi_logloss: 2.27386e-07	valid_1's multi_logloss: 0.0014498
[2783]	training's multi_logloss: 2.27386e-07	valid_1's multi_logloss: 0.0014498
[2784]	training's multi_logloss: 2.27386e-07	valid_1's multi_logloss: 0.0014498
[2785]	training's multi_logloss: 2.27385e-07	valid_1's multi_logloss: 0.00144978
[2786]	training's multi_logloss: 2.27385e-07	valid_1's multi_logloss: 0.00144978
[2787]	training's multi_logloss: 2.27385e-07	valid_1's multi_logloss: 0.00144976
[2788]	training's multi_logloss: 2.27384e-07	valid_1's multi_logloss: 0.00144975
[2789]	training's multi_logloss: 2.27384e-07	valid_1's multi_logloss: 0.00144973
[2790]	training's multi_logloss: 2.27384e-07	valid_1's multi_logloss: 0.00144973
[2791]	training's multi_logloss: 2.27383e-07	valid_1's multi_logloss: 0.00144971
[2792]	training's multi_logloss: 2.27383e-07	valid_1's multi_logloss: 0.00144971
[2793]	training's multi_logloss: 2.27383e-07	valid_1's multi_logloss: 0.00144969
[2794]	training's multi_logloss

[2884]	training's multi_logloss: 2.27357e-07	valid_1's multi_logloss: 0.00144886
[2885]	training's multi_logloss: 2.27357e-07	valid_1's multi_logloss: 0.00144886
[2886]	training's multi_logloss: 2.27357e-07	valid_1's multi_logloss: 0.00144884
[2887]	training's multi_logloss: 2.27356e-07	valid_1's multi_logloss: 0.00144883
[2888]	training's multi_logloss: 2.27356e-07	valid_1's multi_logloss: 0.00144881
[2889]	training's multi_logloss: 2.27356e-07	valid_1's multi_logloss: 0.00144881
[2890]	training's multi_logloss: 2.27356e-07	valid_1's multi_logloss: 0.00144879
[2891]	training's multi_logloss: 2.27355e-07	valid_1's multi_logloss: 0.00144877
[2892]	training's multi_logloss: 2.27355e-07	valid_1's multi_logloss: 0.00144877
[2893]	training's multi_logloss: 2.27355e-07	valid_1's multi_logloss: 0.00144875
[2894]	training's multi_logloss: 2.27355e-07	valid_1's multi_logloss: 0.00144877
[2895]	training's multi_logloss: 2.27354e-07	valid_1's multi_logloss: 0.00144877
[2896]	training's multi_logl

[2986]	training's multi_logloss: 2.27332e-07	valid_1's multi_logloss: 0.00144796
[2987]	training's multi_logloss: 2.27332e-07	valid_1's multi_logloss: 0.00144794
[2988]	training's multi_logloss: 2.27331e-07	valid_1's multi_logloss: 0.00144792
[2989]	training's multi_logloss: 2.27331e-07	valid_1's multi_logloss: 0.00144792
[2990]	training's multi_logloss: 2.27331e-07	valid_1's multi_logloss: 0.0014479
[2991]	training's multi_logloss: 2.27331e-07	valid_1's multi_logloss: 0.0014479
[2992]	training's multi_logloss: 2.2733e-07	valid_1's multi_logloss: 0.00144791
[2993]	training's multi_logloss: 2.2733e-07	valid_1's multi_logloss: 0.00144789
[2994]	training's multi_logloss: 2.2733e-07	valid_1's multi_logloss: 0.00144789
[2995]	training's multi_logloss: 2.2733e-07	valid_1's multi_logloss: 0.00144787
[2996]	training's multi_logloss: 2.2733e-07	valid_1's multi_logloss: 0.00144787
[2997]	training's multi_logloss: 2.27329e-07	valid_1's multi_logloss: 0.00144785
[2998]	training's multi_logloss: 2.

[3088]	training's multi_logloss: 2.27309e-07	valid_1's multi_logloss: 0.00144705
[3089]	training's multi_logloss: 2.27309e-07	valid_1's multi_logloss: 0.00144705
[3090]	training's multi_logloss: 2.27309e-07	valid_1's multi_logloss: 0.00144703
[3091]	training's multi_logloss: 2.27309e-07	valid_1's multi_logloss: 0.00144703
[3092]	training's multi_logloss: 2.27309e-07	valid_1's multi_logloss: 0.00144701
[3093]	training's multi_logloss: 2.27308e-07	valid_1's multi_logloss: 0.00144699
[3094]	training's multi_logloss: 2.27308e-07	valid_1's multi_logloss: 0.00144701
[3095]	training's multi_logloss: 2.27308e-07	valid_1's multi_logloss: 0.00144699
[3096]	training's multi_logloss: 2.27308e-07	valid_1's multi_logloss: 0.00144699
[3097]	training's multi_logloss: 2.27308e-07	valid_1's multi_logloss: 0.00144697
[3098]	training's multi_logloss: 2.27307e-07	valid_1's multi_logloss: 0.00144697
[3099]	training's multi_logloss: 2.27307e-07	valid_1's multi_logloss: 0.00144697
[3100]	training's multi_logl

[3190]	training's multi_logloss: 2.27289e-07	valid_1's multi_logloss: 0.00144618
[3191]	training's multi_logloss: 2.27289e-07	valid_1's multi_logloss: 0.00144618
[3192]	training's multi_logloss: 2.27289e-07	valid_1's multi_logloss: 0.00144619
[3193]	training's multi_logloss: 2.27289e-07	valid_1's multi_logloss: 0.00144618
[3194]	training's multi_logloss: 2.27289e-07	valid_1's multi_logloss: 0.00144617
[3195]	training's multi_logloss: 2.27288e-07	valid_1's multi_logloss: 0.00144616
[3196]	training's multi_logloss: 2.27288e-07	valid_1's multi_logloss: 0.00144615
[3197]	training's multi_logloss: 2.27288e-07	valid_1's multi_logloss: 0.00144615
[3198]	training's multi_logloss: 2.27288e-07	valid_1's multi_logloss: 0.00144615
[3199]	training's multi_logloss: 2.27288e-07	valid_1's multi_logloss: 0.00144614
[3200]	training's multi_logloss: 2.27287e-07	valid_1's multi_logloss: 0.00144612
[3201]	training's multi_logloss: 2.27287e-07	valid_1's multi_logloss: 0.00144613
[3202]	training's multi_logl

[3292]	training's multi_logloss: 2.27271e-07	valid_1's multi_logloss: 0.0014454
[3293]	training's multi_logloss: 2.27271e-07	valid_1's multi_logloss: 0.00144538
[3294]	training's multi_logloss: 2.2727e-07	valid_1's multi_logloss: 0.00144538
[3295]	training's multi_logloss: 2.2727e-07	valid_1's multi_logloss: 0.00144536
[3296]	training's multi_logloss: 2.2727e-07	valid_1's multi_logloss: 0.00144536
[3297]	training's multi_logloss: 2.2727e-07	valid_1's multi_logloss: 0.00144534
[3298]	training's multi_logloss: 2.2727e-07	valid_1's multi_logloss: 0.00144534
[3299]	training's multi_logloss: 2.27269e-07	valid_1's multi_logloss: 0.00144532
[3300]	training's multi_logloss: 2.27269e-07	valid_1's multi_logloss: 0.00144532
[3301]	training's multi_logloss: 2.27269e-07	valid_1's multi_logloss: 0.0014453
[3302]	training's multi_logloss: 2.27269e-07	valid_1's multi_logloss: 0.0014453
[3303]	training's multi_logloss: 2.27269e-07	valid_1's multi_logloss: 0.00144532
[3304]	training's multi_logloss: 2.2

[3394]	training's multi_logloss: 2.27254e-07	valid_1's multi_logloss: 0.00144461
[3395]	training's multi_logloss: 2.27254e-07	valid_1's multi_logloss: 0.0014446
[3396]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.00144459
[3397]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.0014446
[3398]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.00144459
[3399]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.00144458
[3400]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.00144459
[3401]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.00144457
[3402]	training's multi_logloss: 2.27253e-07	valid_1's multi_logloss: 0.00144457
[3403]	training's multi_logloss: 2.27252e-07	valid_1's multi_logloss: 0.00144456
[3404]	training's multi_logloss: 2.27252e-07	valid_1's multi_logloss: 0.00144456
[3405]	training's multi_logloss: 2.27252e-07	valid_1's multi_logloss: 0.00144454
[3406]	training's multi_loglos

[3496]	training's multi_logloss: 2.27238e-07	valid_1's multi_logloss: 0.00144387
[3497]	training's multi_logloss: 2.27238e-07	valid_1's multi_logloss: 0.00144387
[3498]	training's multi_logloss: 2.27238e-07	valid_1's multi_logloss: 0.00144385
[3499]	training's multi_logloss: 2.27238e-07	valid_1's multi_logloss: 0.00144385
[3500]	training's multi_logloss: 2.27238e-07	valid_1's multi_logloss: 0.00144385
[3501]	training's multi_logloss: 2.27237e-07	valid_1's multi_logloss: 0.00144383
[3502]	training's multi_logloss: 2.27237e-07	valid_1's multi_logloss: 0.00144382
[3503]	training's multi_logloss: 2.27237e-07	valid_1's multi_logloss: 0.00144383
[3504]	training's multi_logloss: 2.27237e-07	valid_1's multi_logloss: 0.00144381
[3505]	training's multi_logloss: 2.27237e-07	valid_1's multi_logloss: 0.00144381
[3506]	training's multi_logloss: 2.27237e-07	valid_1's multi_logloss: 0.0014438
[3507]	training's multi_logloss: 2.27236e-07	valid_1's multi_logloss: 0.0014438
[3508]	training's multi_loglos

[3598]	training's multi_logloss: 2.27224e-07	valid_1's multi_logloss: 0.00144315
[3599]	training's multi_logloss: 2.27224e-07	valid_1's multi_logloss: 0.00144314
[3600]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.00144314
[3601]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.00144312
[3602]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.00144312
[3603]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.00144314
[3604]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.00144312
[3605]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.00144312
[3606]	training's multi_logloss: 2.27223e-07	valid_1's multi_logloss: 0.0014431
[3607]	training's multi_logloss: 2.27222e-07	valid_1's multi_logloss: 0.0014431
[3608]	training's multi_logloss: 2.27222e-07	valid_1's multi_logloss: 0.00144309
[3609]	training's multi_logloss: 2.27222e-07	valid_1's multi_logloss: 0.00144309
[3610]	training's multi_loglos

[3700]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144247
[3701]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144247
[3702]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144246
[3703]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144246
[3704]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144247
[3705]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144245
[3706]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144244
[3707]	training's multi_logloss: 2.2721e-07	valid_1's multi_logloss: 0.00144244
[3708]	training's multi_logloss: 2.27209e-07	valid_1's multi_logloss: 0.00144242
[3709]	training's multi_logloss: 2.27209e-07	valid_1's multi_logloss: 0.00144242
[3710]	training's multi_logloss: 2.27209e-07	valid_1's multi_logloss: 0.00144241
[3711]	training's multi_logloss: 2.27209e-07	valid_1's multi_logloss: 0.00144241
[3712]	training's multi_logloss: 2.2

[3802]	training's multi_logloss: 2.27198e-07	valid_1's multi_logloss: 0.00144182
[3803]	training's multi_logloss: 2.27198e-07	valid_1's multi_logloss: 0.00144181
[3804]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144181
[3805]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.0014418
[3806]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144178
[3807]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144178
[3808]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144176
[3809]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144176
[3810]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144175
[3811]	training's multi_logloss: 2.27197e-07	valid_1's multi_logloss: 0.00144175
[3812]	training's multi_logloss: 2.27196e-07	valid_1's multi_logloss: 0.00144173
[3813]	training's multi_logloss: 2.27196e-07	valid_1's multi_logloss: 0.00144173
[3814]	training's multi_loglo

[3904]	training's multi_logloss: 2.27186e-07	valid_1's multi_logloss: 0.00144115
[3905]	training's multi_logloss: 2.27186e-07	valid_1's multi_logloss: 0.00144117
[3906]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144115
[3907]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144115
[3908]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144114
[3909]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144114
[3910]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144112
[3911]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144112
[3912]	training's multi_logloss: 2.27185e-07	valid_1's multi_logloss: 0.00144111
[3913]	training's multi_logloss: 2.27184e-07	valid_1's multi_logloss: 0.00144111
[3914]	training's multi_logloss: 2.27184e-07	valid_1's multi_logloss: 0.00144109
[3915]	training's multi_logloss: 2.27184e-07	valid_1's multi_logloss: 0.00144108
[3916]	training's multi_logl

# classification report

In [130]:
y_pred_proba2 = lightgbm_model.predict(X_test)

In [131]:
y_pred2 = [np.argmax(line) for line in y_pred_proba2]

In [132]:
label_enc2 = LabelEncoder().fit(y_test)
y_labeled2 = label_enc2.transform(y_test)

In [133]:
print(classification_report(y_labeled2, y_pred2))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       931
           1       1.00      1.00      1.00        87
           2       1.00      1.00      1.00      1167
           3       1.00      1.00      1.00       309
           4       1.00      1.00      1.00      1494
           5       1.00      1.00      1.00      3017
           6       1.00      1.00      1.00      2307
           7       1.00      1.00      1.00        62
           8       0.00      0.00      0.00         2
           9       0.99      1.00      1.00       237
          10       1.00      1.00      1.00       131
          11       1.00      1.00      1.00        80
          12       1.00      0.99      1.00       173
          13       1.00      1.00      1.00       158
          14       1.00      1.00      1.00       207
          15       1.00      1.00      1.00        43
          16       1.00      1.00      1.00       652
          17       1.00    

In [134]:

lightgbm_model.best_iteration

4000

In [135]:
lightgbm_model.best_score

defaultdict(dict,
            {'training': {'multi_logloss': 2.2717442869098275e-07},
             'valid_1': {'multi_logloss': 0.0014405784111352438}})

In [136]:

lightgbm_model.feature_importance()

array([1264,  791,  106, ..., 1908,  848, 1329])

In [142]:
np.shape(y_pred2)
y_pred2 = np.array(y_pred2)
y_pred2 = y_pred2.reshape(23919,1)
np.shape(y_pred2)

(23919, 1)

In [144]:
confusion_matrix(y_test, y_pred2)


array([[ 931,    0,    0, ...,    0,    0,    0],
       [   0,   87,    0, ...,    0,    0,    0],
       [   0,    0, 1167, ...,    0,    0,    0],
       ...,
       [   0,    0,    0, ...,  188,    0,    0],
       [   0,    0,    0, ...,    0,  295,    0],
       [   0,    0,    0, ...,    0,    0, 2100]])