In [1]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import gc
import math
import random
import pickle
import pandas as pd
import numpy as np
import multiprocessing
from tqdm.auto import tqdm

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler

from transformers import get_cosine_schedule_with_warmup

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset, sampler

from imblearn.over_sampling import SMOTE

In [2]:
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda', index=1)

In [3]:
train = pd.read_csv('./data/df_train00.csv')
test = pd.read_csv('./data/df_test00.csv')              

In [4]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import gc
import math
import random
import pickle
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold

from xgboost import XGBClassifier, XGBRegressor
from catboost import CatBoostClassifier, CatBoostRegressor
from imblearn.over_sampling import SMOTE, SMOTENC

In [5]:
xgb_params = {
    'booster': 'gbtree',
    'grow_policy': 'lossguide',
    'max_depth': 0,
    'learning_rate': 0.4,
    # 'n_estimators': 25,
    'reg_lambda': 100,
    'reg_alpha' : 10,
    'subsample': 0.9,
    'num_parallel_tree': 1,
    # 'colsample_bytree' : 0.9,
    'colsample_bynode' : 0.9
    # 'rate_drop': 0.3
}

In [6]:
random_seed = 6327
strategy = {0:300, 1:300}

In [7]:
train['class'].value_counts()

1    114
2     79
0     69
Name: class, dtype: int64

In [8]:
ae = pd.read_csv("./data/ae_values.csv")
# target_idx = train[train['class'] != 0].index.tolist()
train2 = train.copy().reset_index(drop=True)
# train2['class'] = train2['class'] -1

train2 = pd.concat([train2, ae[:len(train2)]], axis=1)
test2 = pd.concat([test, ae[len(train2):].reset_index(drop=True)], axis=1)
train2['class'].value_counts()

1    114
2     79
0     69
Name: class, dtype: int64

In [15]:
y1 = (train2['class'].values == 0).astype(int)
y2 = (train2['class'].values == 1).astype(int)
y3 = (train2['class'].values == 2).astype(int)

X = train2.drop(['id', 'class'], axis=1)
X.iloc[:,:16] = X.iloc[:,:16].astype('category')


X_test = test2.drop(['id', 'class'], axis=1)
X_test.iloc[:,:16] = X_test.iloc[:,:16].astype('category')

smote = SMOTENC(categorical_features=[x for x in range(16)], random_state=random_seed, sampling_strategy=strategy)
X_train1, y_train1 = smote.fit_resample(X, y1)
X_train2, y_train2 = smote.fit_resample(X, y2)
X_train3, y_train3 = smote.fit_resample(X, y3)

X_train1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 40 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   trait         600 non-null    category
 1   SNP_01        600 non-null    category
 2   SNP_02        600 non-null    category
 3   SNP_03        600 non-null    category
 4   SNP_04        600 non-null    category
 5   SNP_05        600 non-null    category
 6   SNP_06        600 non-null    category
 7   SNP_07        600 non-null    category
 8   SNP_08        600 non-null    category
 9   SNP_09        600 non-null    category
 10  SNP_10        600 non-null    category
 11  SNP_11        600 non-null    category
 12  SNP_12        600 non-null    category
 13  SNP_13        600 non-null    category
 14  SNP_14        600 non-null    category
 15  SNP_15        600 non-null    category
 16  SNP_01_ratio  600 non-null    float64 
 17  SNP_02_ratio  600 non-null    float64 
 18  SNP_03_rat

In [18]:
def lgbmc(inputX, inputY) :  
    var_categ = inputX.columns.tolist()[:16]
    model = CatBoostClassifier(
        cat_features=var_categ,
        iterations=100,
        learning_rate=0.3,
        task_type='GPU',
        devices='0',
        # random_state=random_seed
        )
  
    model.fit(
        inputX, inputY,
        eval_set=(inputX, inputY),
        verbose=10
        );     

    pred = model.predict(inputX)
    score = f1_score(inputY, pred, average='macro')
    print(score)
    
    return model

In [19]:
def lgbmr(inputX, inputY) :  
    var_categ = inputX.columns.tolist()[:16]
    model = CatBoostRegressor(
        cat_features=var_categ,
        iterations=100,
        learning_rate=0.3,
        task_type='GPU',
        devices='5',
        # random_state=random_seed
        )
  
    model.fit(
        inputX, inputY,
        eval_set=(inputX, inputY),
        verbose=10
        );     

    pred = model.predict(inputX)
    score = f1_score(inputY, np.round(pred), average='macro')
    print(score)
    
    return model

In [20]:
model1_c = lgbmc(X_train1, y_train1)
model1_r = lgbmr(X_train1, y_train1)

0:	learn: 0.1889728	test: 0.1889728	best: 0.1889728 (0)	total: 10.3ms	remaining: 1.02s
10:	learn: 0.0024916	test: 0.0024916	best: 0.0024916 (10)	total: 100ms	remaining: 812ms
20:	learn: 0.0010751	test: 0.0010751	best: 0.0010751 (20)	total: 192ms	remaining: 722ms
30:	learn: 0.0007326	test: 0.0007326	best: 0.0007326 (30)	total: 287ms	remaining: 639ms
40:	learn: 0.0005559	test: 0.0005559	best: 0.0005559 (40)	total: 381ms	remaining: 548ms
50:	learn: 0.0004556	test: 0.0004556	best: 0.0004556 (50)	total: 474ms	remaining: 456ms
60:	learn: 0.0003832	test: 0.0003832	best: 0.0003832 (60)	total: 568ms	remaining: 363ms
70:	learn: 0.0003355	test: 0.0003355	best: 0.0003355 (70)	total: 661ms	remaining: 270ms
80:	learn: 0.0002961	test: 0.0002961	best: 0.0002961 (80)	total: 755ms	remaining: 177ms
90:	learn: 0.0002679	test: 0.0002679	best: 0.0002679 (90)	total: 848ms	remaining: 83.9ms
99:	learn: 0.0002459	test: 0.0002459	best: 0.0002459 (99)	total: 933ms	remaining: 0us
bestTest = 0.0002458828191
bestIte

In [21]:
model2_c = lgbmc(X_train2, y_train2)
model2_r = lgbmr(X_train2, y_train2)

0:	learn: 0.3508702	test: 0.3508702	best: 0.3508702 (0)	total: 9.86ms	remaining: 977ms
10:	learn: 0.0375930	test: 0.0373823	best: 0.0373823 (10)	total: 101ms	remaining: 815ms
20:	learn: 0.0205067	test: 0.0209386	best: 0.0209386 (20)	total: 194ms	remaining: 731ms
30:	learn: 0.0114990	test: 0.0117605	best: 0.0117605 (30)	total: 287ms	remaining: 639ms
40:	learn: 0.0078347	test: 0.0082826	best: 0.0082826 (40)	total: 381ms	remaining: 548ms
50:	learn: 0.0054869	test: 0.0059151	best: 0.0059151 (50)	total: 475ms	remaining: 456ms
60:	learn: 0.0042227	test: 0.0045699	best: 0.0045699 (60)	total: 569ms	remaining: 364ms
70:	learn: 0.0032153	test: 0.0035079	best: 0.0035079 (70)	total: 663ms	remaining: 271ms
80:	learn: 0.0025846	test: 0.0028277	best: 0.0028277 (80)	total: 756ms	remaining: 177ms
90:	learn: 0.0022902	test: 0.0025311	best: 0.0025311 (90)	total: 850ms	remaining: 84ms
99:	learn: 0.0020304	test: 0.0022614	best: 0.0022614 (99)	total: 932ms	remaining: 0us
bestTest = 0.002261374394
bestIterat

In [22]:
model3_c = lgbmc(X_train3, y_train3)
model3_r = lgbmr(X_train3, y_train3)

0:	learn: 0.3426727	test: 0.3426727	best: 0.3426727 (0)	total: 9.42ms	remaining: 933ms
10:	learn: 0.0434551	test: 0.0449024	best: 0.0449024 (10)	total: 99.1ms	remaining: 802ms
20:	learn: 0.0203931	test: 0.0213878	best: 0.0213878 (20)	total: 190ms	remaining: 716ms
30:	learn: 0.0138036	test: 0.0149495	best: 0.0149495 (30)	total: 281ms	remaining: 625ms
40:	learn: 0.0114773	test: 0.0132440	best: 0.0132384 (39)	total: 372ms	remaining: 535ms
50:	learn: 0.0069133	test: 0.0084164	best: 0.0084164 (50)	total: 463ms	remaining: 445ms
60:	learn: 0.0055428	test: 0.0071577	best: 0.0071365 (59)	total: 554ms	remaining: 354ms
70:	learn: 0.0044716	test: 0.0057233	best: 0.0057233 (70)	total: 646ms	remaining: 264ms
80:	learn: 0.0039251	test: 0.0051740	best: 0.0051740 (80)	total: 739ms	remaining: 173ms
90:	learn: 0.0033844	test: 0.0046206	best: 0.0046206 (90)	total: 830ms	remaining: 82ms
99:	learn: 0.0029166	test: 0.0041564	best: 0.0041564 (99)	total: 911ms	remaining: 0us
bestTest = 0.004156390429
bestItera

In [23]:
pred1 = model1_c.predict_proba(X_test)[:,1]
pred2 = model1_r.predict(X_test)
pred3 = model2_c.predict_proba(X_test)[:,1]
pred4 = model2_r.predict(X_test)
pred5 = model3_c.predict_proba(X_test)[:,1]
pred6 = model3_r.predict(X_test)

A_prob = pred1+pred2
B_prob = pred3+pred4
C_prob = pred5+pred6

In [26]:
high1 = pd.read_csv("./data/submit_0.99078.csv")
high2 = pd.read_csv("./data/submit_0.99078_2.csv")
high3 = pd.read_csv("./data/submit_0.99078_3.csv")
high4 = pd.read_csv("./data/submit_0.99078_4.csv")

total = pd.DataFrame()
total['high1'] = high1['class']
total['high2'] = high2['class']
total['high3'] = high3['class']
total['high4'] = high4['class']

total['pred1'] = pred1
total['pred2'] = pred2
total['pred3'] = pred3
total['pred4'] = pred4
total['pred5'] = pred5
total['pred6'] = pred6

total['a_prob'] = A_prob
total['b_prob'] = B_prob
total['c_prob'] = C_prob

total

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob
0,A,A,A,A,0.999921,1.000859,0.000191,-0.038390,0.000839,0.009809,2.000779,-0.038199,0.010647
1,B,B,B,B,0.002353,0.003054,0.999000,1.020866,0.000981,0.075278,0.005407,2.019865,0.076259
2,C,C,C,C,0.000171,-0.000354,0.001147,-0.036302,0.999456,0.971063,-0.000183,-0.035155,1.970518
3,C,C,C,C,0.000703,0.059251,0.989858,0.827082,0.257562,0.353230,0.059954,1.816940,0.610793
4,A,A,A,A,0.999488,0.999781,0.000555,0.030069,0.000620,0.020374,1.999270,0.030624,0.020994
...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,B,B,B,B,0.000536,0.002013,0.988348,0.750444,0.018456,0.205311,0.002549,1.738793,0.223767
171,C,C,C,C,0.000188,-0.003437,0.011012,-0.042793,0.999041,1.134835,-0.003250,-0.031781,2.133875
172,C,C,C,C,0.000186,-0.002761,0.018094,0.246725,0.991324,0.973074,-0.002575,0.264819,1.964398
173,B,B,B,B,0.000153,-0.001297,0.997888,0.769182,0.001223,0.072467,-0.001144,1.767070,0.073690


In [60]:
a_index = total[total.a_prob >= 0.9].index.tolist()
b_index = total[total.b_prob >= 0.9].index.tolist()
c_index = total[total.c_prob >= 0.9].index.tolist()

print(len(a_index), len(b_index), len(c_index), len(a_index)+len(b_index)+len(c_index))

51 86 39 176


In [61]:
total.iloc[a_index]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob
0,A,A,A,A,0.999921,1.000859,0.000191,-0.03839,0.000839,0.009809,2.000779,-0.038199,0.010647
4,A,A,A,A,0.999488,0.999781,0.000555,0.030069,0.00062,0.020374,1.99927,0.030624,0.020994
8,A,A,A,A,0.999876,0.988324,0.000545,0.256447,0.003883,0.205078,1.9882,0.256992,0.20896
9,A,A,A,A,0.999959,1.000483,6.8e-05,-0.010181,0.000149,0.007725,2.000442,-0.010113,0.007874
13,A,A,A,A,0.999745,0.99996,0.000348,0.00583,0.000402,-0.005606,1.999705,0.006178,-0.005204
16,A,A,A,A,0.999912,0.999759,3.1e-05,-0.002186,0.00019,0.004804,1.999671,-0.002155,0.004994
23,A,A,A,A,0.99993,0.999686,3.1e-05,0.013661,0.000111,-0.026143,1.999617,0.013692,-0.026032
27,A,A,A,A,0.999626,1.000556,0.00014,-0.022307,0.000537,0.05816,2.000181,-0.022167,0.058696
28,A,A,A,A,0.999921,0.949956,0.000261,0.0949,0.000565,0.02183,1.949877,0.095162,0.022395
30,A,A,A,A,0.999902,1.000141,0.003403,-0.061683,0.002186,0.039576,2.000043,-0.05828,0.041762


In [62]:
total.iloc[b_index]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob
1,B,B,B,B,0.002353,0.003054,0.999000,1.020866,0.000981,0.075278,0.005407,2.019865,0.076259
3,C,C,C,C,0.000703,0.059251,0.989858,0.827082,0.257562,0.353230,0.059954,1.816940,0.610793
5,C,C,C,C,0.000287,-0.001167,0.975314,0.626997,0.490775,0.540581,-0.000880,1.602311,1.031356
7,B,B,B,B,0.000120,-0.004663,0.993730,0.930551,0.002552,0.005946,-0.004543,1.924281,0.008497
11,B,B,B,B,0.000224,-0.001828,0.998290,0.926002,0.002585,0.034880,-0.001604,1.924292,0.037465
...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,B,B,B,B,0.000447,-0.002098,0.974286,0.753907,0.004919,0.139048,-0.001651,1.728193,0.143968
168,B,B,B,C,0.000285,-0.001529,0.762196,0.648140,0.062561,0.234181,-0.001244,1.410336,0.296742
170,B,B,B,B,0.000536,0.002013,0.988348,0.750444,0.018456,0.205311,0.002549,1.738793,0.223767
173,B,B,B,B,0.000153,-0.001297,0.997888,0.769182,0.001223,0.072467,-0.001144,1.767070,0.073690


In [63]:
total.iloc[c_index]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob
2,C,C,C,C,0.000171,-0.000354,0.001147,-0.036302,0.999456,0.971063,-0.000183,-0.035155,1.970518
5,C,C,C,C,0.000287,-0.001167,0.975314,0.626997,0.490775,0.540581,-0.00088,1.602311,1.031356
6,C,C,C,C,0.000109,0.006541,0.003705,-0.067221,0.994077,0.868775,0.00665,-0.063517,1.862852
10,C,C,C,C,0.000125,0.000154,0.000183,-0.014949,0.999559,0.993161,0.000279,-0.014766,1.99272
19,C,C,C,C,0.000146,-0.00099,0.042537,0.341699,0.862689,0.7132,-0.000845,0.384236,1.575889
33,C,C,C,C,0.000392,1.6e-05,0.098809,0.10821,0.974197,0.806499,0.000409,0.207019,1.780696
36,C,C,C,C,0.001072,0.041298,0.00036,-0.003534,0.999174,0.995138,0.04237,-0.003174,1.994312
39,C,C,C,C,0.001254,-0.001326,0.008967,-0.021032,0.993091,0.820476,-7.2e-05,-0.012065,1.813567
42,C,C,C,C,0.000137,0.002287,0.023491,0.266034,0.988771,0.715164,0.002424,0.289526,1.703935
47,C,C,C,C,0.000523,-0.000134,0.004917,0.152239,0.997634,0.874592,0.000389,0.157156,1.872226


In [64]:
inter_index = [x for x in b_index if x in c_index]
inter_index

[5, 119]

In [65]:
total.iloc[inter_index]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob
5,C,C,C,C,0.000287,-0.001167,0.975314,0.626997,0.490775,0.540581,-0.00088,1.602311,1.031356
119,C,C,C,C,0.000229,0.000358,0.413266,0.665065,0.578031,0.414068,0.000587,1.078331,0.992099


In [69]:
total.iloc[[12]]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob
12,B,C,C,C,0.000149,0.006001,0.357163,0.510651,0.053113,0.285449,0.006151,0.867814,0.338562


In [74]:
total.iloc[:, -3:].shape

(175, 3)

In [75]:
total_index = np.argmax(total.iloc[:, -3:].values, axis=1)
total_index

array([0, 1, 2, 1, 0, 1, 2, 1, 0, 0, 2, 1, 1, 0, 1, 1, 0, 1, 1, 2, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 2, 0, 1, 2, 1, 1, 2, 0, 1, 2, 1,
       1, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 1, 2, 0, 1, 2, 0, 1, 2, 2, 2, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 2, 1, 2, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1,
       2, 0, 1, 1, 2, 1, 1, 2, 0, 1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 1,
       2, 1, 1, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 2, 2, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 0, 1, 2, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 2, 0, 1, 2, 2, 1, 0, 0, 2, 1, 1, 0, 1, 2, 2, 1, 1])

In [46]:
# answer = np.zeros(len(X_test))
# answer[c_index] = 2
# answer[b_index] = 1
# answer

array([0., 1., 2., 1., 0., 1., 2., 1., 0., 0., 2., 1., 0., 0., 1., 1., 0.,
       1., 1., 2., 1., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 2.,
       0., 1., 2., 1., 1., 2., 0., 1., 2., 1., 1., 1., 1., 2., 1., 2., 0.,
       1., 0., 1., 1., 1., 2., 0., 1., 2., 0., 1., 2., 2., 2., 0., 1., 0.,
       0., 1., 1., 1., 0., 0., 2., 1., 2., 1., 1., 1., 2., 1., 0., 1., 1.,
       1., 1., 1., 2., 0., 1., 1., 2., 1., 1., 2., 0., 1., 0., 2., 0., 1.,
       1., 2., 0., 0., 2., 1., 0., 1., 2., 1., 1., 1., 1., 0., 0., 2., 1.,
       1., 0., 1., 1., 2., 2., 1., 1., 1., 0., 1., 0., 0., 1., 1., 1., 2.,
       0., 0., 1., 0., 0., 0., 2., 1., 1., 1., 0., 1., 2., 0., 0., 1., 0.,
       1., 1., 0., 0., 1., 2., 0., 1., 2., 2., 1., 0., 0., 2., 1., 1., 0.,
       1., 2., 2., 1., 1.])

In [71]:
# submit.iloc[[12]]

Unnamed: 0,id,class
12,TEST_012,A


In [76]:
submit = high1.copy()
submit['class'] = total_index
submit['class'] = submit['class'].map(lambda x : 'A' if x==0 else ('B' if x==1 else 'C'))
submit

Unnamed: 0,id,class
0,TEST_000,A
1,TEST_001,B
2,TEST_002,C
3,TEST_003,B
4,TEST_004,A
...,...,...
170,TEST_170,B
171,TEST_171,C
172,TEST_172,C
173,TEST_173,B


In [77]:
df1 = pd.read_csv("./data/submit_0.99078.csv")
df2 = pd.read_csv("./data/submit_0.99078_2.csv")
df3 = pd.read_csv("./data/submit_0.99078_3.csv")
df4 = pd.read_csv("./data/submit_0.99078_4.csv")

In [78]:
# submit 기준
display(submit[submit['class'] != df1['class']],submit[submit['class'] != df2['class']],submit[submit['class'] != df3['class']],submit[submit['class'] != df4['class']]) 

Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
119,TEST_119,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
119,TEST_119,B
126,TEST_126,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
119,TEST_119,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
119,TEST_119,B
168,TEST_168,B


In [84]:
total['answer'] = total_index
total.iloc[[3, 5, 12, 119, 168], [0,1,2,3,-4, -3, -2, -1]]

Unnamed: 0,high1,high2,high3,high4,a_prob,b_prob,c_prob,answer
3,C,C,C,C,0.059954,1.81694,0.610793,1
5,C,C,C,C,-0.00088,1.602311,1.031356,1
12,B,C,C,C,0.006151,0.867814,0.338562,1
119,C,C,C,C,0.000587,1.078331,0.992099,1
168,B,B,B,C,-0.001244,1.410336,0.296742,1


In [85]:
total[(total.b_prob > 1.0) & (total.c_prob > 0.5)]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob,answer
3,C,C,C,C,0.000703,0.059251,0.989858,0.827082,0.257562,0.35323,0.059954,1.81694,0.610793,1
5,C,C,C,C,0.000287,-0.001167,0.975314,0.626997,0.490775,0.540581,-0.00088,1.602311,1.031356,1
35,B,B,B,B,0.000482,0.001193,0.8119,0.645094,0.223033,0.364383,0.001676,1.456993,0.587417,1
119,C,C,C,C,0.000229,0.000358,0.413266,0.665065,0.578031,0.414068,0.000587,1.078331,0.992099,1
126,B,C,B,B,0.000968,0.049571,0.669163,0.379141,0.249582,0.501944,0.050539,1.048304,0.751527,1


In [None]:
total_index2 = np.where(a_prob > 1.0, 0, np.where(c_prob > 0.6, 2, 

In [None]:
# submit 기준
display(submit[submit['class'] != df1['class']],submit[submit['class'] != df2['class']],submit[submit['class'] != df3['class']],submit[submit['class'] != df4['class']]) 

In [79]:
sum(high1['class'] == submit['class'])

172

In [80]:
submit['class'].value_counts()

B    87
A    51
C    37
Name: class, dtype: int64

In [57]:
high_one['class'].value_counts()

B    84
A    51
C    40
Name: class, dtype: int64

In [81]:
submit.to_csv("submit.csv", index=False)

In [61]:
high1 = pd.read_csv("./data/submit_0.99078.csv")
high2 = pd.read_csv("./data/submit_0.99078_2.csv")
high3 = pd.read_csv("./data/submit_0.99078_3.csv")

In [62]:
display(high3[high3['class'] != high1['class']], high1[high3['class'] != high1['class']])

Unnamed: 0,id,class
12,TEST_012,C


Unnamed: 0,id,class
12,TEST_012,B


In [63]:
display(high3[high3['class'] != high2['class']], high2[high3['class'] != high2['class']])

Unnamed: 0,id,class
126,TEST_126,B


Unnamed: 0,id,class
126,TEST_126,C
