# CONCEPT 
1. A분류/B분류/C분류
2. B&C분류 / A&B&C분류
- 총 열 개(Classifier&Regressor)의 모델 성능을 복합적으로 고려

In [1]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import gc
import math
import random
import pickle
import pandas as pd
import numpy as np
import multiprocessing
from tqdm.auto import tqdm

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler

from transformers import get_cosine_schedule_with_warmup

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset, sampler

from imblearn.over_sampling import SMOTE

In [2]:
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda', index=1)

In [3]:
train = pd.read_csv('./data/df_train00.csv')
test = pd.read_csv('./data/df_test00.csv')              

In [4]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import gc
import math
import random
import pickle
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold

from xgboost import XGBClassifier, XGBRegressor
from catboost import CatBoostClassifier, CatBoostRegressor
from imblearn.over_sampling import SMOTE, SMOTENC

In [5]:
random_seed = 6327
# strategy = {0:300, 1:300}
strategy = 'auto'

In [6]:
train['class'].value_counts()

1    114
2     79
0     69
Name: class, dtype: int64

In [7]:
ae = pd.read_csv("./data/ae_values.csv")
# target_idx = train[train['class'] != 0].index.tolist()
train2 = train.copy().reset_index(drop=True)
# train2['class'] = train2['class'] -1

train2 = pd.concat([train2, ae[:len(train2)]], axis=1)
test2 = pd.concat([test, ae[len(train2):].reset_index(drop=True)], axis=1)
train2['class'].value_counts()

1    114
2     79
0     69
Name: class, dtype: int64

# STEP 01
- 각 레이블 독립적으로 구분하는 모델

In [10]:
y1 = (train2['class'].values == 0).astype(int)
y2 = (train2['class'].values == 1).astype(int)
y3 = (train2['class'].values == 2).astype(int)

X = train2.drop(['id', 'class'], axis=1)
X.iloc[:,:16] = X.iloc[:,:16].astype('category')


X_test = test2.drop(['id', 'class'], axis=1)
X_test.iloc[:,:16] = X_test.iloc[:,:16].astype('category')

smote = SMOTENC(categorical_features=[x for x in range(16)], random_state=random_seed, sampling_strategy=strategy)
X_train1, y_train1 = smote.fit_resample(X, y1)
X_train2, y_train2 = smote.fit_resample(X, y2)
X_train3, y_train3 = smote.fit_resample(X, y3)

X_train1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 386 entries, 0 to 385
Data columns (total 40 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   trait         386 non-null    category
 1   SNP_01        386 non-null    category
 2   SNP_02        386 non-null    category
 3   SNP_03        386 non-null    category
 4   SNP_04        386 non-null    category
 5   SNP_05        386 non-null    category
 6   SNP_06        386 non-null    category
 7   SNP_07        386 non-null    category
 8   SNP_08        386 non-null    category
 9   SNP_09        386 non-null    category
 10  SNP_10        386 non-null    category
 11  SNP_11        386 non-null    category
 12  SNP_12        386 non-null    category
 13  SNP_13        386 non-null    category
 14  SNP_14        386 non-null    category
 15  SNP_15        386 non-null    category
 16  SNP_01_ratio  386 non-null    float64 
 17  SNP_02_ratio  386 non-null    float64 
 18  SNP_03_rat

In [11]:
def lgbmc(inputX, inputY) :  
    var_categ = inputX.columns.tolist()[:16]
    model = CatBoostClassifier(
        cat_features=var_categ,
        iterations=100,
        learning_rate=0.3,
        task_type='GPU',
        devices='0',
        # random_state=random_seed
        )
  
    model.fit(
        inputX, inputY,
        eval_set=(inputX, inputY),
        verbose=10
        );     

    pred = model.predict(inputX)
    score = f1_score(inputY, pred, average='macro')
    print(score)
    
    return model

In [12]:
def lgbmr(inputX, inputY) :  
    var_categ = inputX.columns.tolist()[:16]
    model = CatBoostRegressor(
        cat_features=var_categ,
        iterations=100,
        learning_rate=0.3,
        task_type='GPU',
        devices='5',
        # random_state=random_seed
        )
  
    model.fit(
        inputX, inputY,
        eval_set=(inputX, inputY),
        verbose=10
        );     

    pred = model.predict(inputX)
    score = f1_score(inputY, np.round(pred), average='macro')
    print(score)
    
    return model

In [13]:
model1_c = lgbmc(X_train1, y_train1)
model1_r = lgbmr(X_train1, y_train1)

0:	learn: 0.2422734	test: 0.2422734	best: 0.2422734 (0)	total: 10.7ms	remaining: 1.06s
10:	learn: 0.0024732	test: 0.0024732	best: 0.0024732 (10)	total: 85.9ms	remaining: 695ms
20:	learn: 0.0012889	test: 0.0012889	best: 0.0012889 (20)	total: 169ms	remaining: 637ms
30:	learn: 0.0009346	test: 0.0009346	best: 0.0009346 (30)	total: 257ms	remaining: 572ms
40:	learn: 0.0007633	test: 0.0007633	best: 0.0007633 (40)	total: 345ms	remaining: 496ms
50:	learn: 0.0006567	test: 0.0006567	best: 0.0006567 (50)	total: 434ms	remaining: 417ms
60:	learn: 0.0005852	test: 0.0005852	best: 0.0005852 (60)	total: 525ms	remaining: 336ms
70:	learn: 0.0005218	test: 0.0005218	best: 0.0005218 (70)	total: 617ms	remaining: 252ms
80:	learn: 0.0004646	test: 0.0004646	best: 0.0004646 (80)	total: 708ms	remaining: 166ms
90:	learn: 0.0004204	test: 0.0004204	best: 0.0004204 (90)	total: 800ms	remaining: 79.1ms
99:	learn: 0.0003934	test: 0.0003934	best: 0.0003934 (99)	total: 882ms	remaining: 0us
bestTest = 0.0003934294526
bestIt

In [14]:
model2_c = lgbmc(X_train2, y_train2)
model2_r = lgbmr(X_train2, y_train2)

0:	learn: 0.4139620	test: 0.4139620	best: 0.4139620 (0)	total: 10.2ms	remaining: 1.01s
10:	learn: 0.0575991	test: 0.0573224	best: 0.0573224 (10)	total: 104ms	remaining: 843ms
20:	learn: 0.0243740	test: 0.0242327	best: 0.0242327 (20)	total: 198ms	remaining: 745ms
30:	learn: 0.0149788	test: 0.0151622	best: 0.0151622 (30)	total: 291ms	remaining: 649ms
40:	learn: 0.0103808	test: 0.0108976	best: 0.0108976 (40)	total: 385ms	remaining: 554ms
50:	learn: 0.0073975	test: 0.0078385	best: 0.0078385 (50)	total: 479ms	remaining: 460ms
60:	learn: 0.0059250	test: 0.0062440	best: 0.0062440 (60)	total: 572ms	remaining: 366ms
70:	learn: 0.0049316	test: 0.0052002	best: 0.0052002 (70)	total: 666ms	remaining: 272ms
80:	learn: 0.0041397	test: 0.0044099	best: 0.0044099 (80)	total: 762ms	remaining: 179ms
90:	learn: 0.0035761	test: 0.0037947	best: 0.0037947 (90)	total: 856ms	remaining: 84.6ms
99:	learn: 0.0032139	test: 0.0034402	best: 0.0034402 (99)	total: 940ms	remaining: 0us
bestTest = 0.003440195644
bestIter

In [15]:
model3_c = lgbmc(X_train3, y_train3)
model3_r = lgbmr(X_train3, y_train3)

0:	learn: 0.3921792	test: 0.3921793	best: 0.3921793 (0)	total: 9.89ms	remaining: 979ms
10:	learn: 0.0622227	test: 0.0666263	best: 0.0666263 (10)	total: 101ms	remaining: 816ms
20:	learn: 0.0288102	test: 0.0352800	best: 0.0352800 (20)	total: 192ms	remaining: 722ms
30:	learn: 0.0169108	test: 0.0222529	best: 0.0222529 (30)	total: 283ms	remaining: 631ms
40:	learn: 0.0111593	test: 0.0154949	best: 0.0154949 (40)	total: 375ms	remaining: 540ms
50:	learn: 0.0080469	test: 0.0115959	best: 0.0115959 (50)	total: 466ms	remaining: 448ms
60:	learn: 0.0061185	test: 0.0090925	best: 0.0090925 (60)	total: 558ms	remaining: 357ms
70:	learn: 0.0050392	test: 0.0075263	best: 0.0075263 (70)	total: 649ms	remaining: 265ms
80:	learn: 0.0042235	test: 0.0064108	best: 0.0064108 (80)	total: 740ms	remaining: 174ms
90:	learn: 0.0036148	test: 0.0054743	best: 0.0054743 (90)	total: 832ms	remaining: 82.3ms
99:	learn: 0.0031750	test: 0.0049117	best: 0.0049117 (99)	total: 914ms	remaining: 0us
bestTest = 0.00491170121
bestItera

# STEP 02. 복합적으로 고려하는 모델
- B와 C만 있을 때 구분
- 모두 있을 때 각각 구분

In [56]:
train3 = train2[train2['class'] != 0].copy()

y4 = (train3['class'].values).astype(int)
y5 = (train2['class'].values).astype(int)

X2 = train3.drop(['id', 'class'], axis=1)
X2.iloc[:,:16] = X2.iloc[:,:16].astype('category')

strategy = {0:1000, 1:1200, 2:1000}

smote = SMOTENC(categorical_features=[x for x in range(16)], random_state=random_seed, sampling_strategy=strategy)
# X_train4, y_train4 = smote.fit_resample(X2, y4)
X_train5, y_train5 = smote.fit_resample(X, y5)
train3['class'].value_counts()

1    114
2     79
Name: class, dtype: int64

In [18]:
model4_c = lgbmc(X_train4, y_train4)
model4_r = lgbmr(X_train4, y_train4)

0:	learn: 0.4162189	test: 0.4162189	best: 0.4162189 (0)	total: 10ms	remaining: 993ms
10:	learn: 0.0829145	test: 0.0887026	best: 0.0887026 (10)	total: 101ms	remaining: 821ms
20:	learn: 0.0379168	test: 0.0430507	best: 0.0430507 (20)	total: 193ms	remaining: 725ms
30:	learn: 0.0222392	test: 0.0255037	best: 0.0255037 (30)	total: 284ms	remaining: 633ms
40:	learn: 0.0142214	test: 0.0162685	best: 0.0162685 (40)	total: 376ms	remaining: 541ms
50:	learn: 0.0096522	test: 0.0110608	best: 0.0110608 (50)	total: 467ms	remaining: 449ms
60:	learn: 0.0074818	test: 0.0085387	best: 0.0085387 (60)	total: 558ms	remaining: 357ms
70:	learn: 0.0060766	test: 0.0069879	best: 0.0069879 (70)	total: 650ms	remaining: 265ms
80:	learn: 0.0051053	test: 0.0059330	best: 0.0059330 (80)	total: 741ms	remaining: 174ms
90:	learn: 0.0044678	test: 0.0052236	best: 0.0052236 (90)	total: 833ms	remaining: 82.4ms
99:	learn: 0.0040028	test: 0.0046775	best: 0.0046775 (99)	total: 916ms	remaining: 0us
bestTest = 0.004677464042
bestIterat

In [57]:
model5_c = lgbmc(X_train5, y_train5)
model5_r = lgbmr(X_train5, y_train5)

0:	learn: 0.6616010	test: 0.6616010	best: 0.6616010 (0)	total: 10.3ms	remaining: 1.02s
10:	learn: 0.0660964	test: 0.0660964	best: 0.0660964 (10)	total: 112ms	remaining: 906ms
20:	learn: 0.0242129	test: 0.0247588	best: 0.0247588 (20)	total: 195ms	remaining: 733ms
30:	learn: 0.0149028	test: 0.0154844	best: 0.0154844 (30)	total: 273ms	remaining: 609ms
40:	learn: 0.0094414	test: 0.0098562	best: 0.0098562 (40)	total: 354ms	remaining: 510ms
50:	learn: 0.0064220	test: 0.0066747	best: 0.0066747 (50)	total: 438ms	remaining: 421ms
60:	learn: 0.0045585	test: 0.0048789	best: 0.0048789 (60)	total: 527ms	remaining: 337ms
70:	learn: 0.0038130	test: 0.0040652	best: 0.0040652 (70)	total: 606ms	remaining: 247ms
80:	learn: 0.0032293	test: 0.0034587	best: 0.0034587 (80)	total: 685ms	remaining: 161ms
90:	learn: 0.0027373	test: 0.0029296	best: 0.0029296 (90)	total: 768ms	remaining: 76ms
99:	learn: 0.0024304	test: 0.0026088	best: 0.0026088 (99)	total: 851ms	remaining: 0us
bestTest = 0.002608806491
bestIterat

In [58]:
pred1 = model1_c.predict_proba(X_test)[:,1]
pred2 = model1_r.predict(X_test)
pred3 = model2_c.predict_proba(X_test)[:,1]
pred4 = model2_r.predict(X_test)
pred5 = model3_c.predict_proba(X_test)[:,1]
pred6 = model3_r.predict(X_test)

pred7 = model4_c.predict_proba(X_test)[:,1]
pred8 = model4_r.predict(X_test)
pred9 = np.argmax(model5_c.predict_proba(X_test),axis=1)
pred0 = model5_r.predict(X_test)

A_prob = pred1+pred2 / 2
B_prob = pred3+pred4 / 2
C_prob = pred5+pred6 / 2
BC_prob = pred7+pred8 / 2
ABC_prob = pred9+pred0 / 2

In [59]:
high1 = pd.read_csv("./data/submit_0.99078.csv")
high2 = pd.read_csv("./data/submit_0.99078_2.csv")
high3 = pd.read_csv("./data/submit_0.99078_3.csv")
high4 = pd.read_csv("./data/submit_0.99078_4.csv")

total = pd.DataFrame()
total['high1'] = high1['class']
total['high2'] = high2['class']
total['high3'] = high3['class']
total['high4'] = high4['class']

total['pred1'] = pred1
total['pred2'] = pred2
total['pred3'] = pred3
total['pred4'] = pred4
total['pred5'] = pred5
total['pred6'] = pred6
total['pred7'] = pred7
total['pred8'] = pred8
total['pred9'] = pred9
total['pred0'] = pred0

total['a_prob'] = A_prob
total['b_prob'] = B_prob
total['c_prob'] = C_prob
total['bc_prob'] = BC_prob
total['abc_prob'] = ABC_prob

total

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred0,a_prob,b_prob,c_prob,bc_prob,abc_prob
0,A,A,A,A,0.999483,1.004699,0.000081,0.004018,0.001718,0.044945,0.909998,1.669063,0,0.004006,1.501832,0.002090,0.024191,1.744529,0.002003
1,B,B,B,B,0.000557,-0.002707,0.998758,0.966298,0.004119,-0.076402,0.000896,0.991614,1,1.073069,-0.000796,1.481907,-0.034082,0.496703,1.536535
2,C,C,C,C,0.000173,-0.002007,0.001666,-0.027194,0.998199,0.986303,0.998450,1.987668,2,2.083347,-0.000831,-0.011931,1.491350,1.992284,3.041674
3,C,C,C,C,0.000399,0.000960,0.988190,0.946795,0.182660,0.638103,0.109921,1.158754,1,1.343669,0.000879,1.461587,0.501712,0.689298,1.671835
4,A,A,A,A,0.999262,0.993104,0.000984,-0.022770,0.001125,0.029051,0.599112,1.624723,0,0.009843,1.495814,-0.010401,0.015651,1.411473,0.004922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,B,B,B,B,0.002130,0.026596,0.958230,0.599111,0.046063,0.184332,0.025094,1.233863,1,1.124871,0.015428,1.257786,0.138229,0.642025,1.562436
171,C,C,C,C,0.000179,0.026779,0.003407,0.059159,0.998787,1.015549,0.994834,1.842339,2,1.940666,0.013569,0.032987,1.506562,1.916003,2.970333
172,C,C,C,C,0.000183,0.028902,0.079206,0.227685,0.951216,0.739339,0.976604,1.672137,2,2.233474,0.014634,0.193048,1.320886,1.812672,3.116737
173,B,B,B,B,0.000183,0.012213,0.994862,0.710493,0.002318,0.040413,0.008432,1.234097,1,1.064421,0.006289,1.350108,0.022524,0.625481,1.532211


In [60]:
a_index = total[total.a_prob >= 0.5].index.tolist()
b_index = total[total.b_prob >= 0.5].index.tolist()
c_index = total[total.c_prob >= 0.5].index.tolist()
ab_index = total[(total.b_prob >= 0.5) & (total.a_prob >= 0.5)].index.tolist()
ac_index = total[(total.a_prob >= 0.5) & (total.c_prob >= 0.5)].index.tolist()
bc_index = total[(total.b_prob >= 0.5) & (total.c_prob >= 0.5)].index.tolist()
abc_index = total[(total.a_prob >= 0.3) & (total.b_prob >= 0.3) & (total.c_prob >= 0.3)].index.tolist()

print(len(a_index), len(b_index), len(c_index), len(ab_index), len(ac_index), len(bc_index), len(abc_index), len(a_index)+len(b_index)+len(c_index))

51 88 40 0 0 4 0 179


### Summary
1. A랑은 헷갈려하지 않는다.
2. B와 C랑은 헷갈려하기 때문에 이에 대한 기준이 필요해보인다.

In [61]:
total.iloc[bc_index]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred0,a_prob,b_prob,c_prob,bc_prob,abc_prob
3,C,C,C,C,0.000399,0.00096,0.98819,0.946795,0.18266,0.638103,0.109921,1.158754,1,1.343669,0.000879,1.461587,0.501712,0.689298,1.671835
5,C,C,C,C,0.000776,0.005514,0.873065,0.594236,0.528572,0.645404,0.422213,1.601587,1,1.557938,0.003533,1.170183,0.851274,1.223007,1.778969
119,C,C,C,C,0.000297,0.035377,0.614062,0.68807,0.49029,0.549346,0.662537,1.479001,2,1.380361,0.017985,0.958097,0.764963,1.402038,2.69018
162,C,C,C,C,0.000293,0.014264,0.428344,0.399616,0.624675,0.577271,0.681421,1.582274,1,1.712575,0.007425,0.628151,0.91331,1.472558,1.856288


In [62]:
total['target'] = total.pred9.map(lambda x : 'A' if x==0 else ('B' if x==1 else 'C'))
total[total.high1 != total.target]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred0,a_prob,b_prob,c_prob,bc_prob,abc_prob,target
3,C,C,C,C,0.000399,0.00096,0.98819,0.946795,0.18266,0.638103,0.109921,1.158754,1,1.343669,0.000879,1.461587,0.501712,0.689298,1.671835,B
5,C,C,C,C,0.000776,0.005514,0.873065,0.594236,0.528572,0.645404,0.422213,1.601587,1,1.557938,0.003533,1.170183,0.851274,1.223007,1.778969,B
19,C,C,C,C,0.000253,0.004449,0.057813,0.344592,0.927892,0.754328,0.934534,1.756545,1,1.646898,0.002477,0.230109,1.305056,1.812806,1.823449,B
162,C,C,C,C,0.000293,0.014264,0.428344,0.399616,0.624675,0.577271,0.681421,1.582274,1,1.712575,0.007425,0.628151,0.91331,1.472558,1.856288,B


In [51]:
submit = high1.copy()
submit['class'] = total.target
submit

Unnamed: 0,id,class
0,TEST_000,A
1,TEST_001,B
2,TEST_002,C
3,TEST_003,B
4,TEST_004,A
...,...,...
170,TEST_170,B
171,TEST_171,C
172,TEST_172,C
173,TEST_173,B


In [None]:
submit.to_csv("submit.csv", index=False)

In [54]:
df1 = pd.read_csv("./data/submit_0.99078.csv")
df2 = pd.read_csv("./data/submit_0.99078_2.csv")
df3 = pd.read_csv("./data/submit_0.99078_3.csv")
df4 = pd.read_csv("./data/submit_0.99078_4.csv")

In [55]:
# submit 기준
display(submit[submit['class'] != df1['class']],submit[submit['class'] != df2['class']],submit[submit['class'] != df3['class']],submit[submit['class'] != df4['class']]) 

Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
126,TEST_126,C


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
126,TEST_126,C


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
126,TEST_126,C
168,TEST_168,B


In [84]:
total['answer'] = total_index
total.iloc[[3, 5, 12, 119, 168], [0,1,2,3,-4, -3, -2, -1]]

Unnamed: 0,high1,high2,high3,high4,a_prob,b_prob,c_prob,answer
3,C,C,C,C,0.059954,1.81694,0.610793,1
5,C,C,C,C,-0.00088,1.602311,1.031356,1
12,B,C,C,C,0.006151,0.867814,0.338562,1
119,C,C,C,C,0.000587,1.078331,0.992099,1
168,B,B,B,C,-0.001244,1.410336,0.296742,1


In [85]:
total[(total.b_prob > 1.0) & (total.c_prob > 0.5)]

Unnamed: 0,high1,high2,high3,high4,pred1,pred2,pred3,pred4,pred5,pred6,a_prob,b_prob,c_prob,answer
3,C,C,C,C,0.000703,0.059251,0.989858,0.827082,0.257562,0.35323,0.059954,1.81694,0.610793,1
5,C,C,C,C,0.000287,-0.001167,0.975314,0.626997,0.490775,0.540581,-0.00088,1.602311,1.031356,1
35,B,B,B,B,0.000482,0.001193,0.8119,0.645094,0.223033,0.364383,0.001676,1.456993,0.587417,1
119,C,C,C,C,0.000229,0.000358,0.413266,0.665065,0.578031,0.414068,0.000587,1.078331,0.992099,1
126,B,C,B,B,0.000968,0.049571,0.669163,0.379141,0.249582,0.501944,0.050539,1.048304,0.751527,1


In [None]:
total_index2 = np.where(a_prob > 1.0, 0, np.where(c_prob > 0.6, 2, 

In [None]:
# submit 기준
display(submit[submit['class'] != df1['class']],submit[submit['class'] != df2['class']],submit[submit['class'] != df3['class']],submit[submit['class'] != df4['class']]) 

In [79]:
sum(high1['class'] == submit['class'])

172

In [63]:
submit['class'].value_counts()

B    85
A    51
C    39
Name: class, dtype: int64

In [65]:
high1['class'].value_counts()

B    84
A    51
C    40
Name: class, dtype: int64

In [52]:
submit.to_csv("submit.csv", index=False)

In [68]:
high1 = pd.read_csv("./data/submit_0.99078.csv")
high2 = pd.read_csv("./data/submit_0.99078_2.csv")
high3 = pd.read_csv("./data/submit_0.99078_3.csv")
high4 = pd.read_csv("./data/submit_0.99078_4.csv")

middle1 = pd.read_csv("./data/submit_0.98142.csv")

low1 = pd.read_csv("./submit_0.9622.csv")
low2 = pd.read_csv("./submit_0.9622_2.csv")
low3 = pd.read_csv("./submit_0.9719.csv")

In [70]:
targets = [high1, high2, high3, high4, middle1, low1, low2, low3]

idx_list = []
for i in range(len(targets)-1) :
    target = targets[i]
    for j in range(len(targets[i+1:])) :
        diff_idx = target[target['class'] != targets[j]['class']].index.tolist()
        idx_list += diff_idx
        
idx_list = sorted(list(set(idx_list)))
idx_list

[3, 5, 12, 119, 126, 168]

In [72]:
idx_list = sorted(idx_list+[162])

In [73]:
display(high1.iloc[idx_list],high2.iloc[idx_list],high3.iloc[idx_list],high4.iloc[idx_list],middle1.iloc[idx_list],low1.iloc[idx_list],low2.iloc[idx_list],low3.iloc[idx_list])

Unnamed: 0,id,class
3,TEST_003,C
5,TEST_005,C
12,TEST_012,B
119,TEST_119,C
126,TEST_126,B
162,TEST_162,C
168,TEST_168,B


Unnamed: 0,id,class
3,TEST_003,C
5,TEST_005,C
12,TEST_012,C
119,TEST_119,C
126,TEST_126,C
162,TEST_162,C
168,TEST_168,B


Unnamed: 0,id,class
3,TEST_003,C
5,TEST_005,C
12,TEST_012,C
119,TEST_119,C
126,TEST_126,B
162,TEST_162,C
168,TEST_168,B


Unnamed: 0,id,class
3,TEST_003,C
5,TEST_005,C
12,TEST_012,C
119,TEST_119,C
126,TEST_126,B
162,TEST_162,C
168,TEST_168,C


Unnamed: 0,id,class
3,TEST_003,C
5,TEST_005,B
12,TEST_012,B
119,TEST_119,C
126,TEST_126,C
162,TEST_162,C
168,TEST_168,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,A
119,TEST_119,B
126,TEST_126,B
162,TEST_162,C
168,TEST_168,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
119,TEST_119,B
126,TEST_126,B
162,TEST_162,C
168,TEST_168,B


Unnamed: 0,id,class
3,TEST_003,B
5,TEST_005,B
12,TEST_012,B
119,TEST_119,C
126,TEST_126,C
162,TEST_162,C
168,TEST_168,B


In [67]:
display(low3[low3['class'] != low1['class']], low1[low3['class'] != low1['class']])

Unnamed: 0,id,class
12,TEST_012,B
119,TEST_119,C
126,TEST_126,C


Unnamed: 0,id,class
12,TEST_012,A
119,TEST_119,B
126,TEST_126,B


In [62]:
display(high3[high3['class'] != high1['class']], high1[high3['class'] != high1['class']])

Unnamed: 0,id,class
12,TEST_012,C


Unnamed: 0,id,class
12,TEST_012,B


In [63]:
display(high3[high3['class'] != high2['class']], high2[high3['class'] != high2['class']])

Unnamed: 0,id,class
126,TEST_126,B


Unnamed: 0,id,class
126,TEST_126,C
