In [1]:
import pandas as pd
import numpy as np
import os
import pickle
from glob import glob
import warnings
warnings.filterwarnings('ignore')

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, log_loss

from sklearn.neural_network import MLPClassifier
from catboost import Pool, CatBoostClassifier

from bert_sklearn import BertClassifier

In [2]:
train = pd.read_csv("../data/train.csv", index_col="id")
test = pd.read_csv("../data/test.csv", index_col="id")
submission = pd.read_csv("../data/sample_submission.csv")

In [3]:
def create_dir(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)
        print("Created Directory :", dir)
    else:
        print("Directory already existed :", dir)
create_dir("../pickle")
create_dir("../model")
create_dir("../submission")

Directory already existed : ../pickle
Directory already existed : ../model
Directory already existed : ../submission


In [4]:
train_x = train['text']
train_y = train['target']
test_x = test['text']

In [5]:
rows_train = train.shape[0] # 주어진 train data의 row 수
rows_test = test.shape[0] # 주어진 test data의 row 수
num_classes = len(train_y.unique())
num_trial = 100 # 파라미터 튜닝을 몇 번 진행하는지의 수
splits_hp = 5 # 파라미터 튜닝을 진행할 때의 kfold 수
splits_tr = 15 # 모델 트레이닝을 진행할 때의 kfold 수
basic_seed = 42 # default seed
num_seed_tr = 5 # 트레이닝 seed 개수
sel_seed = 3 # 선택할 seed 개수

In [6]:
pred_dict = {}
pred_test_dict = {}

In [7]:
lucky_seeds = np.random.randint(0, 1000, num_seed_tr)

for i, seed in enumerate(lucky_seeds):

    kfold = StratifiedKFold(n_splits=splits_tr, random_state=seed, shuffle=True)
    cv = np.zeros((rows_train, num_classes))
    pred_test = np.zeros((rows_test, num_classes))

    for n, (train_idx, val_idx) in enumerate(kfold.split(train_x, train_y)):
        
        x_train, x_val = train_x.iloc[train_idx], train_x.iloc[val_idx]
        y_train, y_val = train_y.iloc[train_idx].values.ravel(), train_y.iloc[val_idx].values.ravel()
        
#         print(f'fold {n+1} start')
        
        BERTModel = BertClassifier(bert_model="bert-base-cased", random_state=basic_seed,
                                   epochs=4, validation_fraction=0, train_batch_size=8, eval_batch_size=2)
        BERTModel.fit(x_train, y_train)
        
        cv[val_idx, :] = BERTModel.predict_proba(x_val)
        pred_test += BERTModel.predict_proba(test_x) / splits_tr
        
        print(f'fold {n+1}', 'log_loss :', log_loss(y_val, cv[val_idx]))
        print(f'fold {n+1}', 'accuracy_score :', accuracy_score(y_val, np.argmax(cv[val_idx], axis=1)))
        
    pred_dict['bert'+str(seed)] = cv
    pred_test_dict['bert'+str(seed)] = pred_test
    print(f'seed {seed}', 'log_loss :', log_loss(train_y, cv))
    print(f'seed {seed}', 'accuracy_score :', accuracy_score(train_y, np.argmax(cv, axis=1)))

Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [08:35<00:00,  2.09it/s, loss=1.55]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.731]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.407]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.235]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 17.06it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.77it/s]


fold 1 log_loss : 1.0057300674007976
fold 1 accuracy_score : 0.7337662337662337
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.67it/s, loss=1.55]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.734]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.407]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:47<00:00,  2.65it/s, loss=0.236]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.95it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.75it/s]


fold 2 log_loss : 1.0477646129171647
fold 2 accuracy_score : 0.724025974025974
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.729]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.418]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.244]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 17.03it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.69it/s]


fold 3 log_loss : 1.0058677466020995
fold 3 accuracy_score : 0.724025974025974
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=1.53]
Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.72]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.392]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.229]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.93it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.65it/s]


fold 4 log_loss : 1.182348691795586
fold 4 accuracy_score : 0.6801948051948052
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.727]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.405]
Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.24]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.96it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.64it/s]


fold 5 log_loss : 0.8792634128709832
fold 5 accuracy_score : 0.7532467532467533
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.717]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.398]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.228]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.92it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.62it/s]


fold 6 log_loss : 0.961283171279737
fold 6 accuracy_score : 0.7402597402597403
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.722]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.398]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:45<00:00,  2.66it/s, loss=0.235]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.53it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.64it/s]


fold 7 log_loss : 0.9017799040638326
fold 7 accuracy_score : 0.7288961038961039
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8617, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.727]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.411]
Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.24]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 17.02it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:03<00:00, 37.53it/s]


fold 8 log_loss : 0.9164883734317969
fold 8 accuracy_score : 0.7483766233766234
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.723]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:45<00:00,  2.66it/s, loss=0.398]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.227]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.93it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.58it/s]


fold 9 log_loss : 0.9783276064052373
fold 9 accuracy_score : 0.7430894308943089
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.721]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.394]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.226]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.84it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.62it/s]


fold 10 log_loss : 0.9699831385037273
fold 10 accuracy_score : 0.7252032520325203
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.721]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.397]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.229]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 17.02it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.58it/s]


fold 11 log_loss : 0.965720923263639
fold 11 accuracy_score : 0.7154471544715447
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=1.55]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.67it/s, loss=0.736]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.419]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:44<00:00,  2.66it/s, loss=0.238]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.87it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.70it/s]


fold 12 log_loss : 0.8927514750458407
fold 12 accuracy_score : 0.7414634146341463
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:41<00:00,  2.68it/s, loss=1.55]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:41<00:00,  2.68it/s, loss=0.722]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.395]
Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:41<00:00,  2.68it/s, loss=0.23]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:17<00:00, 17.16it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:01<00:00, 37.98it/s]


fold 13 log_loss : 1.037271712600715
fold 13 accuracy_score : 0.7219512195121951
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:41<00:00,  2.68it/s, loss=0.728]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.398]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.229]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:17<00:00, 17.19it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.77it/s]


fold 14 log_loss : 1.0132728217006193
fold 14 accuracy_score : 0.7382113821138211
Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 8618, validation data size: 0


Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=1.54]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:43<00:00,  2.67it/s, loss=0.735]
Training  : 100%|██████████████████████████████████████████████████████| 1078/1078 [06:41<00:00,  2.68it/s, loss=0.395]
Training  : 100%|███████████████████████████████████████████████████████| 1078/1078 [06:42<00:00,  2.68it/s, loss=0.23]
Predicting: 100%|████████████████████████████████████████████████████████████████████| 308/308 [00:18<00:00, 16.40it/s]
Predicting: 100%|██████████████████████████████████████████████████████████████████| 4617/4617 [02:02<00:00, 37.76it/s]

fold 15 log_loss : 0.9823714066750221
fold 15 accuracy_score : 0.734959349593496
seed 0 log_loss : 0.9826859028141225
seed 0 accuracy_score : 0.7302068666738871





In [8]:
pred_dict

{'bert0': array([[1.00496174e-04, 1.24635262e-04, 2.01725619e-04, ...,
         5.10851794e-04, 5.70034936e-05, 1.59385076e-04],
        [3.60107457e-04, 1.12816738e-03, 5.04482188e-04, ...,
         6.21390645e-04, 1.92494903e-04, 1.22765472e-04],
        [1.78482354e-01, 1.39642181e-03, 3.91481211e-04, ...,
         2.64126738e-03, 3.76746524e-03, 7.22015619e-01],
        ...,
        [1.00949011e-03, 2.22144707e-04, 4.13263944e-04, ...,
         9.92110312e-01, 8.57076084e-04, 4.63111181e-04],
        [1.50617445e-04, 2.20812717e-03, 4.76216432e-03, ...,
         2.20376067e-04, 4.76652145e-04, 1.00054033e-03],
        [3.43922037e-03, 3.89210327e-04, 2.00755894e-03, ...,
         1.68654369e-03, 6.16816461e-01, 6.88752579e-03]])}

In [9]:
pred2 = pred_dict['bert0']
pred_test2 = pred_test_dict['bert0']

In [10]:
def load_dict(model):
    with open('../pickle/pred_dict_'+model+'.pickle', 'rb') as fw:
        pred_dict_new_local = pickle.load(fw)
    with open('../pickle/pred_test_dict_'+model+'.pickle', 'rb') as fw:
        pred_test_dict_new_local = pickle.load(fw)
    return pred_dict_new_local, pred_test_dict_new_local

In [11]:
pred_dict_mlp, pred_test_dict_mlp = load_dict('mlp_cv15')

In [12]:
pred = np.zeros((rows_train, num_classes))
for _, value in pred_dict_mlp.items():
    pred += value
pred /= len(pred_dict_mlp)

In [13]:
pred_test = np.zeros((rows_test, num_classes))
for _, value in pred_test_dict_mlp.items():
    pred_test += value
pred_test /= len(pred_test_dict_mlp)

In [21]:
print(f'accuracy_score: {accuracy_score(train_y, np.argmax(pred*0.86+pred2*0.14, axis=1)):.6f}')

accuracy_score: 0.796274


In [22]:
pred_test = np.argmax(pred_test*0.86+pred_test2*0.14, axis=1)

In [None]:
pred_dict['bert0'][0]

In [None]:
model = BertClassifier(bert_model="bert-base-multilingual-uncased", epochs=3, learning_rate=4e-05, validation_fraction=0)

In [None]:
lucky_seeds = np.random.randint(0, 1000, num_seed_tr)

for i, seed in enumerate(lucky_seeds):

    kfold = StratifiedKFold(n_splits=splits_tr, random_state=seed, shuffle=True)
    cv = np.zeros((rows_train, num_classes))
    pred_test = np.zeros((rows_test, num_classes))

    for n, (train_idx, val_idx) in enumerate(kfold.split(train_x, train_y)):
        
        x_train, x_val = train_x.iloc[train_idx], train_x.iloc[val_idx]
        y_train, y_val = train_y.iloc[train_idx].values.ravel(), train_y.iloc[val_idx].values.ravel()
        
        vectorizer = TfidfVectorizer(ngram_range=(1, 2))
        vectorizer.fit(x_train)
        x_train = vectorizer.transform(x_train)
        x_val = vectorizer.transform(x_val)
        x_test = vectorizer.transform(test_x)
        
        print(f'fold {n+1} start')
        
        MLPModel = MLPClassifier(max_iter=12, random_state=basic_seed, verbose=False)
        MLPModel.fit(x_train, y_train)
        
        cv[val_idx, :] = MLPModel.predict_proba(x_val)

#         cat_best_hyperparams = {"iterations": 10000, "learning_rate": 0.3}
#         catmodel = CatBoostClassifier(**cat_best_hyperparams)
#         catmodel.fit(x_train, y_train, eval_set=[(x_val, y_val)], early_stopping_rounds=50, verbose=10)
        
#         cv[val_idx] = catmodel.predict(x_val)
        pred_test += MLPModel.predict_proba(x_test) / splits_tr
        
        print(f'fold {n+1}', 'log_loss :', log_loss(y_val, cv[val_idx]))
        print(f'fold {n+1}', 'accuracy_score :', accuracy_score(y_val, np.argmax(cv[val_idx], axis=1)))
        
    pred_dict['mlp'+str(seed)] = cv
    pred_test_dict['mlp'+str(seed)] = pred_test
    print(f'seed {seed}', 'log_loss :', log_loss(train_y, cv))
    print(f'seed {seed}', 'accuracy_score :', accuracy_score(train_y, np.argmax(cv, axis=1)))

In [None]:
def sort_dict(model, pred_dict, pred_test_dict):
    pred_dict_local = {}
    for key, value in pred_dict.items():
        if model in key:
            pred_dict_local[key]=value

    pred_test_dict_local = {}
    for key, value in pred_test_dict.items():
        if model in key:
            pred_test_dict_local[key]=value

    pred_dict_new_local = dict(sorted(
        pred_dict_local.items(), 
        key=lambda x:accuracy_score((train_y), np.argmax(list(x[1]), axis=1)), reverse=False)[:5])
    pred_test_dict_new_local = {}
    for key, value in pred_dict_new_local.items():
        pred_test_dict_new_local[key]=pred_test_dict_local[key]
        
    return pred_dict_new_local, pred_test_dict_new_local

In [None]:
def save_dict(model, pred_dict, pred_test_dict):
    with open('../pickle/pred_dict_'+model+'.pickle', 'wb') as fw:
        pickle.dump(pred_dict, fw)
    with open('../pickle/pred_test_dict_'+model+'.pickle', 'wb') as fw:
        pickle.dump(pred_test_dict, fw)

In [None]:
pred_dict_bert, pred_test_dict_bert = sort_dict('bert', pred_dict, pred_test_dict)
save_dict('bert', pred_dict_bert, pred_test_dict_bert)

In [None]:
pred_dict_mlp, pred_test_dict_mlp = sort_dict('mlp', pred_dict, pred_test_dict)
save_dict('mlp', pred_dict_mlp, pred_test_dict_mlp)

In [None]:
pred = np.zeros((rows_train, num_classes))
for _, value in pred_dict_mlp.items():
    pred += value
pred /= len(pred_dict_mlp)

In [None]:
print(f'accuracy_score: {accuracy_score(train_y, np.argmax(pred, axis=1)):.6f}')

In [None]:
pred_test = np.zeros((rows_test, num_classes))
for _, value in pred_test_dict_mlp.items():
    pred_test += value
pred_test /= len(pred_test_dict_mlp)

In [None]:
def load_dict(model):
    with open('../pickle/pred_dict_'+model+'.pickle', 'rb') as fw:
        pred_dict_new_local = pickle.load(fw)
    with open('../pickle/pred_test_dict_'+model+'.pickle', 'rb') as fw:
        pred_test_dict_new_local = pickle.load(fw)
    return pred_dict_new_local, pred_test_dict_new_local

In [None]:
pred_dict_mlp2, pred_test_dict_mlp2 = load_dict('mlp')

In [None]:
pred2 = np.zeros((rows_train, num_classes))
for _, value in pred_dict_mlp2.items():
    pred2 += value
pred2 /= len(pred_dict_mlp2)

In [None]:
pred_test2 = np.zeros((rows_test, num_classes))
for _, value in pred_test_dict_mlp2.items():
    pred_test2 += value
pred_test2 /= len(pred_test_dict_mlp2)

In [None]:
print(f'accuracy_score: {accuracy_score(train_y, np.argmax(pred*0.6+pred2*0.4, axis=1)):.6f}')

In [None]:
pred_test = np.argmax(pred_test, axis=1)

In [23]:
submission["target"] = pred_test

In [24]:
submission['target'].value_counts()

15    567
6     538
3     532
16    507
1     505
5     501
7     500
13    499
2     491
9     490
10    481
17    480
14    465
12    447
4     444
8     443
11    442
0     401
18    359
19    141
Name: target, dtype: int64

In [25]:
submission_name = '20220410'
submission_number = '1'
submission.to_csv(f'../submission/{submission_name}-{submission_number}.csv', index = False)

In [26]:
submission

Unnamed: 0,id,target
0,0,3
1,1,16
2,2,11
3,3,8
4,4,13
...,...,...
9228,9228,16
9229,9229,1
9230,9230,4
9231,9231,0
