これをベースに実験をしていこう

## 目次

### 1. [Utils ツール](#utils)
### 2. [Read data データの読み込み](#read)
### 3. [Preprocessers 前処理](#preprocess)
### 4. [Features 特徴抽出](#features)
### 5. [Metrics 評価指標](#metrics)
### 6. [Classifiers 分類器](#classifiers)
### 7. [Integration 統合](#integration)
### 8. [result 結果](#result)

In [1]:
#訓練データの正解データ
from PIL import Image
import pandas as pd
import numpy as np
import os,cv2
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
basepath = "../../first_retailing/"

## <a name="utils">Utils

In [3]:
#utils
#画像のpixelを作成
def get_img_array(path,resize):
    from PIL import ImageOps
    img_array = np.asarray(ImageOps.grayscale(Image.open(path)).resize([resize,resize]))#.getdata()
    return img_array

#画像を表示
def show_img(path):
    img_array = Image.open(path).resize([128,128])
    img_array.show()

#精度
def M_accuracy(true, pred):
    M_acc = {}
    for i in range(24):
        y_id = pred[true["category_id"].values==i] == i
        M_acc[i]=np.mean(y_id)
    return  np.mean([j for j in M_acc.values()])


#pickle書き出し(<4GB)
import pickle
def write_pickle(pickle_path,v):
    with open(pickle_path, mode='wb') as f:
        pickle.dump(v, f)
    print("Finished Writing pickle")

#pickle 読み込み 
def read_pickle(pickle_path):
    with open(pickle_path, mode='rb') as f:
        read = pickle.load(f)
    return read

def show_acc(clf):
    y_train_pred =  clf.predict(X_train)
    y_test_pred =  clf.predict(X_test)
    train_accuracy = M_accuracy(true=y_train, pred=y_train_pred)
    test_accuracy  = M_accuracy(true=y_test, pred=y_test_pred)
    print("Train Accuracy: %s\n Test Accuracy: %s"%(train_accuracy, test_accuracy))
    
#kerasのhistoryをプロット
def plot_history(history):
    # 精度の履歴をプロット
    plt.plot(history.history['acc'],"o-",label="accuracy")
    plt.plot(history.history['val_acc'],"o-",label="val_acc")
    plt.title('model accuracy')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend(loc="lower right")
    plt.show()

    # 損失の履歴をプロット
    plt.plot(history.history['loss'],"o-",label="loss",)
    plt.plot(history.history['val_loss'],"o-",label="val_loss")
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='lower right')
    plt.show()
    # modelに学習させた時の変化の様子をplot

## <a name="read">Read data

In [4]:
y = pd.read_csv(basepath+"train_master.tsv",sep="\t")
X = np.load("../X_20.npy")
test = np.load("../test_20.npy") #test_0 ~ test_len()の順番

resize = 20

In [5]:
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test= train_test_split(X, y ,test_size=0.5, random_state=0)



## <a name="preprocess">Preprocessers

In [None]:
from sklearn.decomposition import PCA, IncrementalPCA,KernelPCA,FactorAnalysis,FastICA,TruncatedSVD,NMF
from sklearn.manifold import TSNE, MDS

#すべての画像のピクセルごとの平均値を、それぞれの画像から引く
def pp_mean_subtraction_per_pixel(X_train, X_test):
    """
    input: array with shape(row,resize,resize,3)
    output: shape(row,resize,resize,3), shape(row,resize,resize,3)
    """
    X_train_after = np.zeros(X_train.shape)
    X_test_after  = np.zeros(X_test.shape)
    shape = X_train.shape
    for f in range(shape[1]):
        for s in range(shape[2]):
            for t in range(shape[3]):
                mean = np.mean(X_train[:,f,s,t])
                X_train_after[:,f,s,t] = X_train[:,f,s,t] - mean
                X_test_after[:,f,s,t]  = X_test[:,f,s,t] - mean
    return X_train_after, X_test_after


#すべての画像のピクセルごとに正規化
def pp_normalization_per_pixel(X_train, X_test):
    """
    input: array with shape(row,resize,resize,3)
    output: same shape array
    """
    X_train_after = np.zeros(X_train.shape)
    X_test_after  = np.zeros(X_test.shape)
    shape = X_train.shape
    for f in range(shape[1]):
        for s in range(shape[2]):
            for t in range(shape[3]):
                mean = np.mean(X_train[:,f,s,t])
                std  = np.std(X_train[:,f,s,t])
                X_train_after[:,f,s,t] = (X_train[:,f,s,t] - mean)/std
                X_test_after[:,f,s,t]  = (X_test[:,f,s,t] - mean)/std
    return X_train_after, X_test_after


#画像サイズを削減
def pp_random_crop(X_train,X_test,size):
    """
    input: array with shape(row,resize,resize,3)
    output: array with shape(row,size,size,3)
    """
    np.random.seed(0)
    first = np.sort(np.random.choice(X_train.shape[1], size, replace=False))
    X_train_after,X_test_after = X_train[:,first,:,:], X_test[:,first,:,:]
    
    np.random.seed(1)
    second = np.sort(np.random.choice(X_train.shape[2], size, replace=False))   
    X_train_after,X_test_after = X_train_after[:,:,second,:], X_test_after[:,:,second,:]
    return X_train_after,X_test_after

#次元削除
def pp_decompotion(types, X_train, X_test, n_comp):
    decomposer = {"pca":PCA, "ipca":IncrementalPCA, "kpca":KernelPCA, "fa":FactorAnalysis, 
                  "fica":FastICA, "svd":TruncatedSVD, "nmf":NMF, "tsne":TSNE, "mds":MDS}
    try:
        dec = decomposer[types]
        dec = dec(n_components=n_comp)
        X_train_dec = dec.fit_transform(X_train)
        X_test_dec = dec.transform(X_test)    
        return X_train_dec, X_test_dec    
    
    except:
        print("そんなのありません")
        print([k for k in decomposer.key])
        
#resample
#パラメータを変更しても良い http://contrib.scikit-learn.org/imbalanced-learn/api.html

from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
def over_sampling(types, X_train, y_train):
    RANDOM_STATE = 0
    samplers = {'ADASYN':ADASYN(random_state=RANDOM_STATE),
                'ROS':RandomOverSampler(random_state=RANDOM_STATE),
                'SMOTE':SMOTE(random_state=RANDOM_STATE)}
    method = samplers[types]
    X_resampled, y_resampled = method.fit_sample(X_train, y_train)
    
    return  X_resampled, y_resampled


from imblearn.under_sampling import CondensedNearestNeighbour,AllKNN, EditedNearestNeighbours,RepeatedEditedNearestNeighbours
from imblearn.under_sampling import NearMiss, NeighbourhoodCleaningRule, OneSidedSelection, TomekLinks
def under_sampling(types, X_train, y_train):
    RANDOM_STATE = 0
    samplers = {'CNN':CondensedNearestNeighbour(random_state=RANDOM_STATE),
                'ENN':EditedNearestNeighbours(random_state=RANDOM_STATE),
                'AKNN':AllKNN(random_state=RANDOM_STATE),                
                'RENN':RepeatedEditedNearestNeighbours(random_state=RANDOM_STATE),   
                'NM':NearMiss(random_state=RANDOM_STATE),   
                'NCR':NeighbourhoodCleaningRule(random_state=RANDOM_STATE),   
                'OSS':OneSidedSelection(random_state=RANDOM_STATE),   
                'TL':TomekLinks(random_state=RANDOM_STATE),                   
               }
    method = samplers[types]
    X_resampled, y_resampled = method.fit_sample(X_train, y_train)
    
    return  X_resampled, y_resampled    
    
    
from imblearn.combine import SMOTEENN, SMOTETomek
def hybrid_sampling(types, X_train, y_train):
    RANDOM_STATE = 0
    samplers = {'SMOTEENN':SMOTEENN(random_state=RANDOM_STATE),
                'SMOTETomek':SMOTETomek(random_state=RANDOM_STATE)}
    method = samplers[types]
    X_resampled, y_resampled = method.fit_sample(X_train, y_train)
    
    return  X_resampled, y_resampled

#カテゴリか否かの2値に変換
def to_binary(y=y, target=0):
    return pd.DataFrame([1 if value==target else 0 for value in y["category_id"].values], index=y.index)

#### 使い方

In [None]:
#ピクセル位置ごとに平均値除去
X_train_pp, X_test_pp = pp_mean_subtraction_per_pixel(X_train,X_test)


#ピクセル位置ごとに正規化
#X_train_pp, X_test_pp = pp_normalization_per_pixel(X_train,X_test)


#ランダムにsize*sizeになるまで除去
X_train_pp,X_test_pp = pp_random_crop(X_train_pp, X_test_pp, size=18)


#普通の次元削除(faが良さそう)
#X_train_pp, X_test_pp = pp_decompotion(types="fa",X_train,X_test,n_comp=10)

#ラベル1か否かの変換
#to_binary(y_train, target=1)

In [None]:
X_train_pp.shape

#### resample

In [None]:
#oversample: 'ADASYN', 'ROS', 'SMOTE'
#X_train_resampled, y_train_resampled = over_sampling('ADASYN', X_train, y_train["category_id"])

#undersample: 'CNN', 'ENN', 'AKNN', 'RENN', 'NM', 'NCR', 'OSS', 'TL'
#X_train_resampled, y_train_resampled = under_sampling('ENN', X_train, y_train["category_id"])

#hybridsample: 'SMOTEENN', 'SMOTETomek'
#X_train_resampled, y_train_resampled = hybrid_sampling("SMOTEENN", X_train, y_train["category_id"])

## <a name="features">Features

In [None]:
#サブモジュール
from scipy import stats

#最小値
def p0(array):
    return stats.scoreatpercentile(array, 0)

def p1(array):
    return stats.scoreatpercentile(array, 10)

def p2(array):
    return stats.scoreatpercentile(array, 20)

def p3(array):
    return stats.scoreatpercentile(array, 30)

def p4(array):
    return stats.scoreatpercentile(array, 40)

#中央値
def p5(array):
    return stats.scoreatpercentile(array, 50)

def p6(array):
    return stats.scoreatpercentile(array, 60)

def p7(array):
    return stats.scoreatpercentile(array, 70)

def p8(array):
    return stats.scoreatpercentile(array, 80)

def p9(array):
    return stats.scoreatpercentile(array, 90)

#最大値
def p10(array):
    return stats.scoreatpercentile(array, 100)


def return_rgb_features(function, r_array, g_array, b_array):
    return [function(r_array), function(g_array), function(b_array), function(r_array+g_array), function(g_array+b_array), function(b_array+r_array), function(r_array+g_array+b_array)]

In [None]:
X_train[0].shape

In [None]:
#R,G,B,RG,GB,BR,RGBのヒストグラムの代表値を取得(直接フォルダから)
def ft_extract_histogram_from_path(resize=100, imgpath= basepath +"train/train", num_of_img=len(y)):
    """
    input: array with shape(row,col,3)
    output: array with shape(row,91)
    """
    img_feature_cols=["R_mean", "G_mean", "B_mean", "RG_mean", "GB_mean", "BR_mean","RGB_mean",
             "R_median", "G_median", "B_median", "RG_median", "GB_median", "BR_median","RGB_median",
             "R_std", "G_std", "B_std", "RG_std", "GB_std", "BR_std","RGB_std",
             "R_max", "G_max", "B_max", "RG_max", "GB_max", "BR_max","RGB_max",
             "R_min", "G_min", "B_min", "RG_min", "GB_min", "BR_min","RGB_min",
             "R_1", "G_1", "B_1", "RG_1", "GB_1", "BR_1","RGB_1",
             "R_2", "G_2", "B_2", "RG_2", "GB_2", "BR_2","RGB_2",                  
             "R_3", "G_3", "B_3", "RG_3", "GB_3", "BR_3","RGB_3",    
             "R_4", "G_4", "B_4", "RG_4", "GB_4", "BR_4","RGB_4",
             "R_6", "G_6", "B_6", "RG_6", "GB_6", "BR_6","RGB_6",                  
             "R_7", "G_7", "B_7", "RG_7", "GB_7", "BR_7","RGB_7",  
             "R_8", "G_8", "B_8", "RG_8", "GB_8", "BR_8","RGB_8",
             "R_9", "G_9", "B_9", "RG_9", "GB_9", "BR_9","RGB_9"]
    img_feature = pd.DataFrame(columns=img_feature_cols)

    for fn in range(num_of_img):
        img_array = cv2.resize(cv2.imread(imgpath+"_%s.jpg"%(fn)),(resize,resize))
        r_array = []
        g_array = []
        b_array = []
        for i in range(resize):
            for j in range(resize):  
                if sum(img_array[i,j]) != 765: #(255*3)
                    r_array.append(img_array[i,j,0])
                    g_array.append(img_array[i,j,1])
                    b_array.append(img_array[i,j,2])

        add = pd.DataFrame([return_rgb_features(np.mean, r_array=r_array, g_array=g_array, b_array = b_array) + 
                            return_rgb_features(np.median, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(np.std, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(np.max, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(np.min, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p1, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p2, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p3, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p4, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p6, r_array=r_array, g_array=g_array, b_array = b_array)+                        
                            return_rgb_features(p7, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p8, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p9, r_array=r_array, g_array=g_array, b_array = b_array)]                       
                           , columns=img_feature_cols)
        img_feature = img_feature.append(add)
    img_feature.index = range(len(img_feature))
    return img_feature

#すでにXとかを読み込んでる場合にヒストグラムを取得
def ft_extract_histogram_from_object(X_array,index):
    img_feature_cols=["R_mean", "G_mean", "B_mean", "RG_mean", "GB_mean", "BR_mean","RGB_mean",
             "R_median", "G_median", "B_median", "RG_median", "GB_median", "BR_median","RGB_median",
             "R_std", "G_std", "B_std", "RG_std", "GB_std", "BR_std","RGB_std",
             "R_max", "G_max", "B_max", "RG_max", "GB_max", "BR_max","RGB_max",
             "R_min", "G_min", "B_min", "RG_min", "GB_min", "BR_min","RGB_min",
             "R_1", "G_1", "B_1", "RG_1", "GB_1", "BR_1","RGB_1",
             "R_2", "G_2", "B_2", "RG_2", "GB_2", "BR_2","RGB_2",                  
             "R_3", "G_3", "B_3", "RG_3", "GB_3", "BR_3","RGB_3",    
             "R_4", "G_4", "B_4", "RG_4", "GB_4", "BR_4","RGB_4",
             "R_6", "G_6", "B_6", "RG_6", "GB_6", "BR_6","RGB_6",                  
             "R_7", "G_7", "B_7", "RG_7", "GB_7", "BR_7","RGB_7",  
             "R_8", "G_8", "B_8", "RG_8", "GB_8", "BR_8","RGB_8",
             "R_9", "G_9", "B_9", "RG_9", "GB_9", "BR_9","RGB_9"]
    img_feature = pd.DataFrame(columns=img_feature_cols)
    
    for img_array in X_array:
        r_array = []
        g_array = []
        b_array = []
        for i in range(len(img_array)):
            for j in range(len(img_array[i])):  
                if sum(img_array[i,j]) != 765: #(255*3)
                    r_array.append(img_array[i,j,0])
                    g_array.append(img_array[i,j,1])
                    b_array.append(img_array[i,j,2])

        add = pd.DataFrame([return_rgb_features(np.mean, r_array=r_array, g_array=g_array, b_array = b_array) + 
                            return_rgb_features(np.median, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(np.std, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(np.max, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(np.min, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p1, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p2, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p3, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p4, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p6, r_array=r_array, g_array=g_array, b_array = b_array)+                        
                            return_rgb_features(p7, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p8, r_array=r_array, g_array=g_array, b_array = b_array)+
                            return_rgb_features(p9, r_array=r_array, g_array=g_array, b_array = b_array)]                       
                           , columns=img_feature_cols)
        img_feature = img_feature.append(add)
    img_feature.index = index
    return img_feature


#画像を分割して、R,G,B,RG,GB,BR,RGBのヒストグラムの代表値を取得
def ft_extract_partial_histogram(X_array=X_train,size=5,stride=5,index=y_train.index):
    c_max = X_array.shape[1]
    r_max = X_array.shape[2]    
    total_size = len(range(0,c_max-size+1,stride))*len(range(0,r_max-size+1,stride))
    print("output size: (%s*%s)"%(len(X_array),total_size*91))
    
    img_feature_cols = ["f%s"%(f) for f in range(total_size*91)]
    img_feature = pd.DataFrame(columns=img_feature_cols)
    
    for xt in X_array:
        add2 = [] #featureをまとめるやつ
        for i in range(0,c_max-size+1,stride):
            for j in range(0,r_max-size+1,stride):
                if (i+size) > c_max: #上限を超えてしまう場合(多分ない)
                    over = (i+size)-c_max
                    i = i-over
                if (j+size) > r_max: #上限を超えてしまう場合(多分ない)
                    over = (j+size)-r_max
                    j = j-over     
                    
                #size*sizeの画像のヒストグラムを取得する
                img_array = xt[i:i+size,j:j+size]
                r_array = []
                g_array = []
                b_array = []
                
                for s in range(len(img_array)):
                    for t in range(len(img_array[s])):  
                        if sum(img_array[s,t]) != 765: #背景の白は飛ばす
                            r_array.append(img_array[s,t,0])
                            g_array.append(img_array[s,t,1])
                            b_array.append(img_array[s,t,2])
                            
                #画像がすべて白だった場合
                if r_array == []:
                    r_array = g_array = b_array = list(np.zeros(91))
                    
                #個々のfeature
                add = [return_rgb_features(np.mean, r_array=r_array, g_array=g_array, b_array = b_array) + 
                                    return_rgb_features(np.median, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(np.std, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(np.max, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(np.min, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p1, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p2, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p3, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p4, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p6, r_array=r_array, g_array=g_array, b_array = b_array)+                        
                                    return_rgb_features(p7, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p8, r_array=r_array, g_array=g_array, b_array = b_array)+
                                    return_rgb_features(p9, r_array=r_array, g_array=g_array, b_array = b_array)][0]   
                #個々のfeatureを横にまとめる                
                add2 = add2 + add.copy()
    
        add_append = pd.DataFrame([add2], columns=img_feature_cols)
        img_feature = img_feature.append(add_append)
    img_feature.index = index
    return img_feature


#rawピクセル reshapeをする
def ft_raw(rgb_array, cnn, resize):
    if cnn:
        return rgb_array.reshape(len(rgb_array), resize, resize, 3)
    else:
        return rgb_array.reshape(len(rgb_array), -1)

In [None]:
#X_train_his = ft_extract_histogram_from_object(X_train,index=y_train.index)
#X_test_his  = ft_extract_histogram_from_object(X_test, index=y_test.index)

In [None]:
X_train_his = ft_extract_partial_histogram(X_train,size=10,stride=10,index=y_train.index)
X_test_his  = ft_extract_partial_histogram(X_test ,size=10,stride=10,index=y_test.index)

## <a name="metrics">Metrics

In [None]:
from sklearn.metrics import f1_score, roc_auc_score, classification_report, roc_curve

#ROC_AUC
#閾値を変化させた場合の真陽率, 偽陽率の関係(only binary class)
#http://qiita.com/HirofumiYashima/items/3f089f266f0404122cc9
def mt_auc(clf, X_train, y_train):
    y_train_proba = clf.predict_proba(X_train)[:,1]
    fpr, tpr, thresholds = roc_curve(y_train, y_score=y_train_proba, pos_label=2)
    return roc_auc_score(y_true=y_train, y_score=y_train_proba)

#精度
def mt_M_accuracy(y_true, y_pred):
    M_acc = {}
    for i in range(24):
        y_id = y_pred[y_true==i] == i
        M_acc[i]=np.mean(y_id)
    return  np.mean([j for j in M_acc.values()]), M_acc

#学習済み学習器で精度を出力
def mt_show_accs(clf,keras,X_train, X_test, y_train, y_test):
    if keras:
        y_train_pred = clf.predict_classes(X_train)
        y_test_pred  = clf.predict_classes(X_test) 
        
    else:
        y_train_pred = clf.predict(X_train)
        y_test_pred  = clf.predict(X_test)        
    print("<------- Train ------->")    
    print(classification_report(y_true=y_train, y_pred=y_train_pred))
    print("<------- Test ------->")    
    print(classification_report(y_true=y_test,  y_pred=y_test_pred))    
    #return y_train_pred, y_test_pred
    train_accuracy, train_M_acc = mt_M_accuracy(y_true=y_train, y_pred=y_train_pred)
    test_accuracy, test_M_acc  = mt_M_accuracy(y_true=y_test, y_pred=y_test_pred)
    print("\nTrain Accuracy: %s\n Test Accuracy: %s"%(train_accuracy, test_accuracy))    
    return train_M_acc, test_M_acc

## <a name="classifiers">Classifiers

In [None]:
np.random.seed(0)
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier,ExtraTreesClassifier,IsolationForest
from sklearn.naive_bayes import GaussianNB,MultinomialNB, BernoulliNB
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import cross_val_score
import xgboost as xgb

classifiers = [
    IsolationForest(random_state=0),
    ExtraTreesClassifier(),
    SVC(class_weight=None),
    SVC(class_weight='auto'),
    LinearSVC(),
    DecisionTreeClassifier( random_state=0 ),
    RandomForestClassifier( random_state=0 ),
    AdaBoostClassifier( random_state=0 ),
    GradientBoostingClassifier( random_state=0 ),
    KNeighborsClassifier(),
    xgb.XGBClassifier(),
    GaussianNB(),
    MultinomialNB(), 
    BernoulliNB()
]

In [None]:
xtc = xgb.XGBClassifier(n_estimators=1000)
xtc.fit(X_train_his,y_train["category_id"])

In [None]:
train_result,test_result = mt_show_accs(xtc, 
                                        keras=False, 
                                        X_train=X_train_his, 
                                        X_test=X_test_his, 
                                        y_train=y_train["category_id"].values, 
                                        y_test=y_test["category_id"].values)

## <a name="integration">Integration

In [None]:
#24個のモデルを統合
def integrate24models():
    return 1

## <a name="result">Result