In [1]:
import argparse
import random
import numpy as np
import os
import torch
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
from torchvision import models
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
import glob
import librosa
import torchaudio
import torchaudio.transforms as AT
import matplotlib.pyplot as plt
import json

from covidxdataset import COVIDxDataset, COVIDxDataset2
from audiodataset import CoswaraDataset, ConcatDataset, CoswaraDataset2, CoswaraDataset3, ConcatDataset_pair
import util as util
from util import Mel2Samp
from train import train, validation, mm_train, mm_pair_train, mm_pair_valid
from model import transfer_resNet, ResNet54, ResNet22, ResNet38, resnet50, MMNet, CLS

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

In [2]:
# audio
a_model = transfer_resNet(2)
# a_model = ResNet22(2)

# image
i_model = resnet50()
num_ftrs = i_model.fc.in_features
i_model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
i_model.fc = nn.Linear(num_ftrs, 2)


In [3]:
a_model.load_state_dict(torch.load('model/save3/a_model_0.830_0.955'))
i_model.load_state_dict(torch.load('model/save3/i_model_0.830_0.955'))

<All keys matched successfully>

In [4]:
mmnet = MMNet(a_model, i_model)

In [6]:
torch.save(mmnet.state_dict(), './model/mmnet1')

In [3]:
data = np.load('mels_full.npz', allow_pickle=True)
aX, ay = data['x'], data['y']
data = np.load('images_tr_full.npz', allow_pickle=True)
iX_tr, y_tr = data['x'], data['y']
data = np.load('images_test.npz', allow_pickle=True)
iX_te, y_te = data['x'], data['y']

In [4]:
seed = 20
seed_everything(seed)
aX_tr, aX_te, ay_tr, ay_te = train_test_split(aX, ay, test_size=0.2, shuffle=True, stratify=ay, random_state=seed)

num_classes = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# audio
a_model = transfer_resNet(num_classes)
# a_model = ResNet22(2)
a_model.to(device)
# image
i_model = resnet50(pretrained=True)
num_ftrs = i_model.fc.in_features
i_model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
i_model.fc = nn.Linear(num_ftrs, num_classes)
i_model.to(device)
cls = CLS()
cls.to(device)

batch_size = 80
train_params = {'batch_size': batch_size,
                'shuffle': True,
                'num_workers': 15}

test_params = {'batch_size': batch_size,
               'shuffle': False,
               'num_workers': 15}

optimizer = torch.optim.Adam(list(i_model.parameters()) + list(a_model.parameters()) + list(cls.parameters()), lr=0.00005)
optimizer2 = torch.optim.Adam(list(i_model.parameters()) + list(a_model.parameters()) + list(cls.parameters()), lr=0.00005)
                    
train_dataset = ConcatDataset_pair(aX_tr, ay_tr, iX_tr, y_tr, mode='train')             
val_dataset = ConcatDataset_pair(aX_te, ay_te, iX_te, y_te, mode='test')
# test_dataset = ConcatDataset(cos_test_dataset, cx_test_dataset)

train_loader = DataLoader(train_dataset, **train_params)
test_loader = DataLoader(val_dataset, **test_params)

# print(model)
num_epochs = 35
best_pred_loss = 1000.0
lr_sch = ReduceLROnPlateau(optimizer, factor=0.5, patience=2, min_lr=1e-7, verbose=True)
lr_sch2 = ReduceLROnPlateau(optimizer2, factor=0.5, patience=2, min_lr=1e-7, verbose=True)
# lr_sch = ExponentialLR(optimizer, gamma=0.975)

best_acc = 0
for epoch in range(1, num_epochs + 1):
    mm_pair_train(device, batch_size, a_model, i_model, cls, train_loader, optimizer, optimizer2, epoch, None)
    val_metrics, confusion_matrix = mm_pair_valid(device, batch_size, a_model, i_model, cls, test_loader, epoch, None)
    
    random.shuffle(train_dataset.Training_nn)
    random.shuffle(train_dataset.Training_np)
    random.shuffle(train_dataset.Training_pn)
    random.shuffle(train_dataset.Training_pp)
    len_pp = len(train_dataset.Training_pp)
    train_dataset.datas = train_dataset.Training_nn[:len_pp//2] + train_dataset.Training_pp[:len_pp//2] + train_dataset.Training_np[:len_pp//2] + train_dataset.Training_pn[:len_pp//2]
    
    if val_metrics.avg('accuracy') > 0.93:
        best_acc = val_metrics.avg('accuracy')
        torch.save(a_model.state_dict(), '/model/pair1/a_model_' + str(best_acc)[:5] + '_' +str(val_metrics.avg('accuracy'))[:5])
        torch.save(i_model.state_dict(), '/model/pair1/i_model_' + str(best_acc)[:5] + '_' +str(val_metrics.avg('accuracy'))[:5])
        print('save!!')
                   
    lr_sch.step(val_metrics.avg('loss'))
    lr_sch2.step(val_metrics.avg('loss'))
#     lr_sch.step()

train audio examples =  1886 [1753  133]
train image examples =  1886 [1753  133]
Class P :  3090698  N :  466298
test audio examples =  472 [439  33]
test image examples =  472 [439  33]
ImageEpoch: 1	Sample:    1/35376	Loss:1809.6594	Accuracy:0.56
AudioEpoch: 1	Sample:    1/35376	Loss:1809.6594	Accuracy:0.86
Training Image
 SUMMARY EPOCH: 1	Sample:35376/35376	Loss:180.6439	Accuracy:0.99

Training Audio
 SUMMARY EPOCH: 1	Sample:35376/35376	Loss:180.6439	Accuracy:0.94

A_Confusion Matrix
[[3.3177e+04 1.2000e+01]
 [2.1870e+03 0.0000e+00]]

Validation
 SUMMARY EPOCH: 1	Sample:  472/  472	Loss:0.2023	Accuracy:0.96

Confusion Matrix
[[439.   0.]
 [ 18.  15.]]
save!!
ImageEpoch: 2	Sample:    1/35376	Loss:129.5409	Accuracy:1.00
AudioEpoch: 2	Sample:    1/35376	Loss:129.5409	Accuracy:0.95
Training Image
 SUMMARY EPOCH: 2	Sample:35376/35376	Loss:111.7096	Accuracy:1.00

Training Audio
 SUMMARY EPOCH: 2	Sample:35376/35376	Loss:111.7096	Accuracy:0.93

A_Confusion Matrix
[[33072.     0.]
 [ 2304. 

In [None]:
# i_ce + (1 - alpha) * a_ce + alpha * csa 0.5 / 0.00005 / mel
# shuffle // 10 -> 6708 (random) @save3 pretrain x
train examples =  259 [130 129]
test examples =  65 [32 33]
Training Image
 SUMMARY EPOCH:32	Sample: 6708/ 6708	Loss:32.9175	Accuracy:1.00

Training Audio
 SUMMARY EPOCH:32	Sample: 6708/ 6708	Loss:32.9175	Accuracy:1.00

A_Confusion Matrix
[[3.374e+03 2.000e+00]
 [2.200e+01 3.310e+03]]

Image: Validation
 SUMMARY EPOCH:32	Sample: 1579/ 1579	Loss:0.1712	Accuracy:0.95

Confusion Matrix
[[1415.   64.]
 [  19.   81.]]
Audio: Validation
 SUMMARY EPOCH:32	Sample:   65/   65	Loss:0.3008	Accuracy:0.83

Confusion Matrix
[[28.  4.]
 [ 7. 26.]]

In [None]:
# i_ce + (1 - alpha) * a_ce + alpha * csa 0.5 / 0.00005 / mel
# shuffle // 10 -> 6708 (random) @save2 pretrain x
Image: Validation
 SUMMARY EPOCH: 8	Sample: 1579/ 1579	Loss:0.3632	Accuracy:0.88

Confusion Matrix
[[1305.  174.]
 [  13.   87.]]
Audio: Validation
 SUMMARY EPOCH: 8	Sample:   65/   65	Loss:0.1928	Accuracy:0.86

Confusion Matrix
[[27.  5.]
 [ 4. 29.]]
save!!

In [None]:
# i_ce + (1 - alpha) * a_ce + alpha * csa 0.7 / 0.00005 / mel
# shuffle // 10 -> 6708 (random) @save1
Image: Validation
 SUMMARY EPOCH:29	Sample: 1579/ 1579	Loss:0.3980	Accuracy:0.87

Confusion Matrix
[[1288.  191.]
 [  10.   90.]]
Audio: Validation
 SUMMARY EPOCH:29	Sample:   65/   65	Loss:0.5410	Accuracy:0.80

Confusion Matrix
[[26.  6.]
 [ 7. 26.]]
save!!
# i_ce + (1 - alpha) * a_ce + alpha * csa 0.5 / 0.00005 / mel
# shuffle // 10 -> 6708 (random)
Training Image
 SUMMARY EPOCH:29	Sample: 6708/ 6708	Loss:45.2970	Accuracy:1.00

Training Audio
 SUMMARY EPOCH:29	Sample: 6708/ 6708	Loss:45.2970	Accuracy:0.98

A_Confusion Matrix
[[3322.   64.]
 [ 102. 3220.]]

Image: Validation
 SUMMARY EPOCH:29	Sample: 1579/ 1579	Loss:0.2261	Accuracy:0.93

Confusion Matrix
[[1388.   91.]
 [  14.   86.]]
Audio: Validation
 SUMMARY EPOCH:29	Sample:   65/   65	Loss:0.5479	Accuracy:0.80

Confusion Matrix
[[26.  6.]
 [ 7. 26.]]
# i_ce + (1 - alpha) * a_ce + alpha * csa 0.5 / 0.00005 / mel
# shuffle // 10 -> 6708
Training Image
 SUMMARY EPOCH:18	Sample: 6708/ 6708	Loss:39.7886	Accuracy:1.00

Training Audio
 SUMMARY EPOCH:18	Sample: 6708/ 6708	Loss:39.7886	Accuracy:0.99

A_Confusion Matrix
[[3313.   30.]
 [  36. 3329.]]

Image: Validation
 SUMMARY EPOCH:18	Sample: 1579/ 1579	Loss:0.2895	Accuracy:0.90

Confusion Matrix
[[1338.  141.]
 [  11.   89.]]
Audio: Validation
 SUMMARY EPOCH:18	Sample:   65/   65	Loss:0.7218	Accuracy:0.80

Confusion Matrix
[[26.  6.]
 [ 7. 26.]]

In [None]:
test_metrics, confusion_matrix = validation(device, batch_size, num_classes, model, test_loader, epoch, None)