In [42]:
import torch
import catboost
from torch import nn
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from catboost import CatBoostClassifier
import gc
import numpy as np
from tqdm import tqdm
from torch.utils.data import TensorDataset,DataLoader
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from scipy.special import softmax
import warnings 
warnings.filterwarnings('ignore')
gc.collect()

46

In [57]:
def backbone(self, x):
    x = self.module.conv1(x)
    x = self.module.bn1(x)
    x = self.module.relu(x)

    x = self.module.layer1(x)
    x = self.module.layer2(x)
    x = self.module.layer3(x)

    return x

def preprocessing_Y(file_path):
    Y = pd.read_csv(file_path).iloc[:,:]
    Y = Y[Y.Filename != 'train_01046']
    enc = OneHotEncoder().fit(Y[['Label']])
    Y_one_hot = enc.transform(Y[['Label']]).toarray()
    Y_one_hot = torch.FloatTensor(Y_one_hot)
    print('Y_ont_hot shape',Y_one_hot.shape)
    print('Y_df shape',Y.shape)
    return Y_one_hot,Y

def load_pretrain_senet(model_path):
    model = torch.hub.load(
        'moskomule/senet.pytorch',
        'se_resnet20',
        num_classes=6)
    model.conv1 = nn.Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    model.fc = nn.Sequential(
    nn.Linear(64,64),
    nn.LeakyReLU(),
    nn.Dropout(0.5),#三個臭皮匠勝過一個諸葛亮
    nn.Linear(64,6))
    model = torch.nn.DataParallel(model, device_ids=[0, 1, 2 ,3])#平行運算模式
    model.load_state_dict(torch.load(model_path))#載入權重
    model.eval()#評估模式
    model.fc =  nn.Sequential(model.module.fc,nn.Softmax(dim=1))#取機率
    model.backbone = backbone.__get__(model)
    return model

def get_senet_output(senet,data):
    return senet(data)

def get_all_senet_output(data):
    y = senet(data)
    return y.detach().cpu().numpy()

def get_X_numpy(X):
    X_train_np = np.array([[]])
    data_iter = DataLoader(TensorDataset(torch.FloatTensor(X.to(torch.float32))),batch_size=256)
    for bx in tqdm(data_iter):
        bx = bx[0]
        bx = bx.to('cuda:0')
        y_hat = get_all_senet_output(bx)
        y_hat = softmax(y_hat,axis=1)
        if len(X_train_np) == 1:
            X_train_np = y_hat
        else:
            X_train_np = np.vstack((X_train_np,y_hat))
    return X_train_np

def get_X_numpy_backbone(X):
    X_train_np = np.array([[]])
    data_iter = DataLoader(TensorDataset(torch.FloatTensor(X.to(torch.float32))),batch_size=64)
    for bx in tqdm(data_iter):
        bx = bx[0]
        bx = bx.to('cuda:0')
        y_hat = senet.backbone(bx).detach().cpu().numpy()
        y_hat = y_hat.reshape(-1,64*20*40)
        if len(X_train_np) == 1:
            X_train_np = y_hat
        else:
            X_train_np = np.vstack((X_train_np,y_hat))
    return X_train_np

# 路徑

In [44]:
Y_train_path = 'train/meta_train.csv'
senet = 'senet20_2021-06-09-01-16-42_random_state529_validacc_0.9132610508757297用訓練資料1199當驗證集.pt'

# load k 個 senet

In [45]:
senet = load_pretrain_senet(senet).to('cuda:0')
senet

Using cache found in /root/.cache/torch/hub/moskomule_senet.pytorch_master


DataParallel(
  (module): CifarSEResNet(
    (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (layer1): Sequential(
      (0): CifarSEBasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (se): SELayer(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (fc): Sequential(
            (0): Linear(in_features=16, out_features=1, bias=False)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=1, out_features=16, bias=False

# 訓練 和 測試資料

In [46]:
X_train_all = torch.load('X_train_吳啟聖教的方法.pt')
X_test_all = torch.load('X_test_吳啟聖教的方法.pt')
Y_train_all,Y_train_df = preprocessing_Y(Y_train_path)
map_dict = {}
for l in Y_train_df.Label.unique():
    map_dict[l] = Y_train_df[Y_train_df.Label==l].sample(1)['Remark'].values[0]
map_dict

Y_ont_hot shape torch.Size([1199, 6])
Y_df shape (1199, 3)


{0: 'Barking',
 1: 'Howling',
 2: 'Crying',
 3: 'COSmoke',
 4: 'GlassBreaking',
 5: 'Cat'}

In [47]:
Y_test_all = pd.read_csv('vote_V12_排行榜0.976667.csv') #拿目前分數最高的
Y_test_all.head(5)
values = Y_test_all.iloc[:,1:].values.argmax(axis=1) #轉換成one hot格式
n_values = np.max(values) + 1
np.eye(n_values)[values][:5]
Y_test_all.iloc[:,1:] = np.eye(n_values)[values] #轉換成df格式
Y_test_all
Y_test_all = torch.FloatTensor(Y_test_all.iloc[:10000,1:].values) #轉換成tensor格式
Y_test_all,Y_test_all.shape

(tensor([[0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0., 0.],
         ...,
         [0., 1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0.]]), torch.Size([10000, 6]))

In [48]:
from sklearn.model_selection import train_test_split
X_train = X_test_all
X_valid = X_train_all
y_train = Y_test_all
y_valid = Y_train_all
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)
print(pd.DataFrame(y_train.argmax(axis=1)).value_counts())
print(pd.DataFrame(y_valid.argmax(axis=1)).value_counts())

torch.Size([10000, 1, 80, 157])
torch.Size([10000, 6])
torch.Size([1199, 1, 80, 157])
torch.Size([1199, 6])
5    3669
2    1884
1    1566
0    1424
3     924
4     533
dtype: int64
0    200
1    200
2    200
3    200
5    200
4    199
dtype: int64


In [53]:
from torch.utils.data import TensorDataset,DataLoader
trainset = TensorDataset(torch.FloatTensor(X_train.to(torch.float32)),torch.FloatTensor(y_train))
vaildset = TensorDataset(torch.FloatTensor(X_valid.to(torch.float32)),torch.FloatTensor(y_valid))
train_iter = DataLoader(trainset,batch_size=128,num_workers=4)
vaild_iter = DataLoader(vaildset,batch_size=128,num_workers=4)

In [50]:
device = 'cuda:0'
for bx,by in train_iter:
    print(bx.shape,bx.device) # batch,channel,freq,time
    print(by.shape,by.device) # batch,n_class
    print(senet.backbone(bx.to(device)).size())
    print(senet(bx[:3]))
    break

torch.Size([32, 1, 80, 157]) cpu
torch.Size([32, 6]) cpu
torch.Size([32, 64, 20, 40])
tensor([[ -8.6800,  -8.0282,  -9.5131,  -8.2114,  -4.1467,   4.0617],
        [ -9.1055, -10.6999,  -9.2309,  -8.1376,  -7.3477,   5.1262],
        [  2.5682,  -4.9359,  -6.0060,  -6.9322,  -5.8338,  -7.1740]],
       device='cuda:0', grad_fn=<GatherBackward>)


In [51]:
from sklearn.metrics import confusion_matrix,accuracy_score
def plot_confusion_matrix(model,data_iter,map_dict=map_dict):
    y_true = np.array([])
    y_pred = np.array([])
    for bx,by in tqdm(data_iter):
        bx = bx.to(device)
        by = by.to(device)
        y_hat = model(bx)
        try:
            if len(y_true) == 0:
                y_true = by.argmax(axis=1).detach().cpu().numpy()
                y_pred = y_hat.argmax(axis=1).detach().cpu().numpy()
            else:
                y_true = np.hstack((y_true,by.argmax(axis=1).detach().cpu().numpy()))
                y_pred = np.hstack((y_pred,y_hat.argmax(axis=1).detach().cpu().numpy()))
        except:
            pass
    cm = pd.DataFrame(confusion_matrix(y_pred,y_true))
    cm.columns = list(map_dict.values())
    acc = accuracy_score(y_pred,y_true)
    return cm,acc

In [54]:
model = senet
model.eval()
cm,acc = plot_confusion_matrix(model.to(device),train_iter)
print(acc)
cm

100%|██████████| 79/79 [00:12<00:00,  6.46it/s]

0.8786





Unnamed: 0,Barking,Howling,Crying,COSmoke,GlassBreaking,Cat
0,1117,15,78,3,33,150
1,100,1436,89,2,3,49
2,50,49,1617,5,0,11
3,22,29,27,904,18,60
4,40,0,1,1,400,87
5,95,37,72,9,79,3312


In [55]:
cm,acc = plot_confusion_matrix(model.to(device),vaild_iter)
print(acc)
cm

100%|██████████| 10/10 [00:03<00:00,  2.72it/s]

0.9132610508757297





Unnamed: 0,Barking,Howling,Crying,COSmoke,GlassBreaking,Cat
0,183,9,5,1,7,4
1,9,178,27,0,0,2
2,4,10,165,1,0,5
3,2,0,0,196,0,0
4,2,0,0,0,192,8
5,0,3,3,2,0,181


# 有辦法在提升驗證集的表現嗎?

# 接 LogisticRegression 修正senet的弱點

In [58]:
X_train_np = get_X_numpy(X_train)
y_train_np = y_train.detach().numpy().argmax(axis=1)
X_valid_np = get_X_numpy(X_valid)
y_valid_np = y_valid.detach().numpy().argmax(axis=1)
lg = LogisticRegression()
lg.fit(X_train_np,y_train_np)
y_true = y_valid_np
y_pred = lg.predict(X_valid_np )
print(accuracy_score(y_pred,y_true))
cm = pd.DataFrame(confusion_matrix(y_pred,y_true))
cm.columns = list(map_dict.values())
cm

100%|██████████| 40/40 [00:04<00:00,  8.90it/s]
100%|██████████| 5/5 [00:00<00:00,  7.08it/s]


0.914095079232694


Unnamed: 0,Barking,Howling,Crying,COSmoke,GlassBreaking,Cat
0,184,9,5,2,7,4
1,8,177,25,0,0,2
2,4,11,167,1,0,5
3,2,0,0,195,0,0
4,2,0,0,0,191,7
5,0,3,3,2,1,182


# cma-es優化 LogisticRegression

In [64]:
import optuna
sampler = optuna.samplers.CmaEsSampler()
study = optuna.create_study(sampler=sampler,direction='maximize')
search_iteration = 30
best_acc = 0
for epoch in tqdm(range(search_iteration)):
    trial = study.ask()
    class_weight = {}
    for i in [0,1,2,3,4,5]:
        class_weight[i] = trial.suggest_uniform(i,0,1)
    lg = LogisticRegression(class_weight=class_weight)
    lg.fit(X_train_np,y_train_np)
    y_true = y_valid_np
    y_pred = lg.predict(X_valid_np)
    acc = accuracy_score(y_pred,y_true)
    if acc>best_acc:
        best_acc = acc
    print(epoch,best_acc)
    study.tell(trial,acc)
    
study.best_params

[32m[I 2021-06-09 01:32:03,965][0m A new study created in memory with name: no-name-0456b1f3-0b08-4e74-be6d-6556ae7aaf87[0m
  3%|▎         | 1/30 [00:01<00:46,  1.62s/it]

0 0.9115929941618015


  7%|▋         | 2/30 [00:03<00:51,  1.83s/it]

1 0.9124270225187656


 10%|█         | 3/30 [00:05<00:51,  1.91s/it]

2 0.9132610508757297


 13%|█▎        | 4/30 [00:07<00:49,  1.91s/it]

3 0.9132610508757297


 17%|█▋        | 5/30 [00:09<00:50,  2.01s/it]

4 0.914095079232694


 20%|██        | 6/30 [00:11<00:47,  1.97s/it]

5 0.914095079232694


 23%|██▎       | 7/30 [00:13<00:46,  2.02s/it]

6 0.914095079232694


 27%|██▋       | 8/30 [00:16<00:47,  2.14s/it]

7 0.9157631359466222


 30%|███       | 9/30 [00:17<00:42,  2.03s/it]

8 0.9157631359466222


 33%|███▎      | 10/30 [00:20<00:41,  2.08s/it]

9 0.9157631359466222


 37%|███▋      | 11/30 [00:22<00:39,  2.09s/it]

10 0.9157631359466222


 40%|████      | 12/30 [00:24<00:36,  2.01s/it]

11 0.9157631359466222


 43%|████▎     | 13/30 [00:26<00:34,  2.00s/it]

12 0.9157631359466222


 47%|████▋     | 14/30 [00:28<00:33,  2.12s/it]

13 0.9157631359466222


 50%|█████     | 15/30 [00:31<00:33,  2.27s/it]

14 0.9157631359466222


 53%|█████▎    | 16/30 [00:33<00:30,  2.19s/it]

15 0.9157631359466222


 57%|█████▋    | 17/30 [00:34<00:27,  2.10s/it]

16 0.9157631359466222


 60%|██████    | 18/30 [00:36<00:24,  2.04s/it]

17 0.9157631359466222


 63%|██████▎   | 19/30 [00:38<00:21,  2.00s/it]

18 0.9157631359466222


 67%|██████▋   | 20/30 [00:40<00:20,  2.03s/it]

19 0.9157631359466222


 70%|███████   | 21/30 [00:42<00:18,  2.08s/it]

20 0.9165971643035863


 73%|███████▎  | 22/30 [00:45<00:16,  2.06s/it]

21 0.9165971643035863


 77%|███████▋  | 23/30 [00:48<00:17,  2.49s/it]

22 0.9165971643035863


 80%|████████  | 24/30 [00:50<00:13,  2.28s/it]

23 0.9165971643035863


 83%|████████▎ | 25/30 [00:52<00:10,  2.20s/it]

24 0.9165971643035863


 87%|████████▋ | 26/30 [00:54<00:08,  2.09s/it]

25 0.9165971643035863


 90%|█████████ | 27/30 [00:56<00:06,  2.08s/it]

26 0.9165971643035863


 93%|█████████▎| 28/30 [00:58<00:04,  2.06s/it]

27 0.9165971643035863


 97%|█████████▋| 29/30 [01:00<00:02,  2.19s/it]

28 0.9165971643035863


100%|██████████| 30/30 [01:02<00:00,  2.07s/it]

29 0.9165971643035863





{0: 0.32075114426832485,
 1: 0.6841917833656804,
 2: 0.5465503416337142,
 3: 0.5631688623175395,
 4: 0.5543402324448308,
 5: 0.5645734633271056}

In [66]:
X_train_np = get_X_numpy(X_train)
y_train_np = y_train.detach().numpy().argmax(axis=1)
X_valid_np = get_X_numpy(X_valid)
y_valid_np = y_valid.detach().numpy().argmax(axis=1)
lg = LogisticRegression(class_weight=study.best_params)
lg.fit(X_train_np,y_train_np)
y_true = y_valid_np
y_pred = lg.predict(X_valid_np)
acc = accuracy_score(y_pred,y_true)
print(acc)
cm = pd.DataFrame(confusion_matrix(y_pred,y_true))
cm.columns = list(map_dict.values())
cm

100%|██████████| 40/40 [00:04<00:00,  8.84it/s]
100%|██████████| 5/5 [00:00<00:00, 10.52it/s]


0.9165971643035863


Unnamed: 0,Barking,Howling,Crying,COSmoke,GlassBreaking,Cat
0,182,8,4,1,6,2
1,8,178,26,0,0,2
2,5,11,167,1,0,5
3,2,0,0,196,0,0
4,2,0,0,0,192,7
5,1,3,3,2,1,184


# 套用private_test

In [67]:
X_public_np = get_X_numpy(X_train)
X_private_np = get_X_numpy(X_private)
all_test_data = np.vstack((X_public_np,X_private_np))

NameError: name 'X_private' is not defined

In [68]:
final_prob = lg.predict_proba(all_test_data)
final_prob.sum(axis=1)

NameError: name 'X_private_np' is not defined

In [69]:
sample_submit = pd.read_csv('sample_submission.csv')
sample_submit.iloc[:30000,1:] = final_prob #三萬筆
sample_submit

NameError: name 'final_prob' is not defined

In [73]:
print(sample_submit.shape)
print(sample_submit.sum(axis=1))

(30000, 7)
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
29995    1.0
29996    1.0
29997    1.0
29998    1.0
29999    1.0
Length: 30000, dtype: float64


In [74]:
acc

0.9165971643035863

In [75]:
#sample_submit.to_csv(f'submit_valid_acc_{acc}_senet_1199筆當驗證_接logistic做最後修正_預測private_test.csv',index=False)
#print('done')