In [1]:
import argparse
import random
import numpy as np
import os
import torch
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
from torchvision import models
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
import glob
import librosa
import torchaudio
import torchaudio.transforms as AT
import matplotlib.pyplot as plt
import json

from covidxdataset import COVIDxDataset, COVIDxDataset2
from audiodataset import CoswaraDataset, ConcatDataset, CoswaraDataset2, CoswaraDataset3
import util as util
from util import Mel2Samp
from train import train, validation, mm_train
from model import transfer_resNet, ResNet54, ResNet22, ResNet38, resnet50, MMNet

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

In [2]:
# audio
a_model = transfer_resNet(2)
# a_model = ResNet22(2)

# image
i_model = resnet50()
num_ftrs = i_model.fc.in_features
i_model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
i_model.fc = nn.Linear(num_ftrs, 2)


In [3]:
a_model.load_state_dict(torch.load('model/save3/a_model_0.830_0.955'))
i_model.load_state_dict(torch.load('model/save3/i_model_0.830_0.955'))

<All keys matched successfully>

In [4]:
mmnet = MMNet(a_model, i_model)

In [6]:
torch.save(mmnet.state_dict(), './model/mmnet1')

In [11]:
from PIL import Image
from util import read_filepaths
def load_image(img_path, dim):
    if not os.path.exists(img_path):
        print("IMAGE DOES NOT EXIST {}".format(img_path))
    image = Image.open(img_path).convert('RGB')
    image = image.resize(dim)

    return image

In [51]:
testfile = '/data/CovidX_dataset/test_split.txt'
trainfile = '/data/CovidX_dataset/train_split.txt'
dataset_path='/data/CovidX_dataset/'

paths, labels = read_filepaths(testfile)
imgs = []
for path in paths:
    image = load_image(dataset_path + 'test/' + path, (224,224))
    imgs.append(np.array(image))

In [3]:
data = np.load('audios_full.npz', allow_pickle=True)
aX, ay = data['x'], data['y']
data = np.load('images_tr_full.npz', allow_pickle=True)
iX_tr, y_tr = data['x'], data['y']
data = np.load('images_test.npz', allow_pickle=True)
iX_te, y_te = data['x'], data['y']

In [4]:
seed = 20
seed_everything(seed)
aX_tr, aX_te, ay_tr, ay_te = train_test_split(aX, ay, test_size=0.2, shuffle=True, stratify=ay, random_state=seed)

num_classes = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# audio
a_model = transfer_resNet(num_classes)
# a_model = ResNet22(2)
a_model.to(device)
# image
i_model = resnet50(pretrained=True)
num_ftrs = i_model.fc.in_features
i_model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
i_model.fc = nn.Linear(num_ftrs, num_classes)
i_model.to(device)

batch_size = 96
train_params = {'batch_size': batch_size,
                'shuffle': True,
                'num_workers': 15}

test_params = {'batch_size': batch_size,
               'shuffle': False,
               'num_workers': 15}

optimizer = torch.optim.Adam(list(i_model.parameters()) + list(a_model.parameters()), lr=0.00005)

# cos_train_dataset = CoswaraDataset(mode='train', n_classes=num_classes, segment_length=16000*4)
# cos_val_dataset = CoswaraDataset(mode='valid', n_classes=num_classes, segment_length=16000*4)
# cos_test_dataset = CoswaraDataset(mode='test', n_classes=num_classes, segment_length=16000*4)

# cos_train_dataset = CoswaraDataset2(x,y, mode='train', segment_length=16000*4)
# cos_test_dataset = CoswaraDataset2(x_test,y_test, mode='test', segment_length=16000*4)

# cos_train_dataset = CoswaraDataset3(aX_tr, ay_tr, mode='train')
cos_test_dataset = CoswaraDataset2(aX_te, ay_te, mode='test')
                                    
# cx_train_dataset = COVIDxDataset(mode='train', n_classes=num_classes, dim=(224, 224))
# cx_val_dataset = COVIDxDataset(mode='valid', n_classes=num_classes, dim=(224, 224))
# cx_test_dataset = COVIDxDataset(mode='test', n_classes=num_classes, dim=(224, 224))

# cx_train_dataset = COVIDxDataset2(iX_tr, y_tr, mode='train')
cx_test_dataset = COVIDxDataset2(iX_te, y_te, mode='test')
    
train_dataset = ConcatDataset(aX_tr, ay_tr, iX_tr, y_tr, mode='train')
# val_dataset = ConcatDataset(cos_val_dataset, cx_val_dataset)
# test_dataset = ConcatDataset(cos_test_dataset, cx_test_dataset)

train_loader = DataLoader(train_dataset, **train_params)
# val_loader = DataLoader(val_dataset, **test_params)
###
i_test_loader = DataLoader(cx_test_dataset, **test_params)
o_test_loader = DataLoader(cos_test_dataset, **test_params)

# print(model)
num_epochs = 35
best_pred_loss = 1000.0
lr_sch = ReduceLROnPlateau(optimizer, factor=0.5, patience=2, min_lr=1e-7, verbose=True)
# lr_sch = ExponentialLR(optimizer, gamma=0.975)

best_acc = 0
for epoch in range(1, num_epochs + 1):
    mm_train(device, batch_size, a_model, i_model, train_loader, optimizer, epoch, None)
    print('Image:', end=' ')
    i_val_metrics, confusion_matrix = validation(device, batch_size, num_classes, i_model, i_test_loader, epoch, None)
    print('Audio:', end=' ')
    val_metrics, confusion_matrix = validation(device, batch_size, num_classes, a_model, o_test_loader, epoch, None)
    
    
    random.shuffle(train_dataset.Training_nn)
    random.shuffle(train_dataset.Training_np)
    random.shuffle(train_dataset.Training_pn)
    random.shuffle(train_dataset.Training_pp)
    len_pp = len(train_dataset.Training_pp)
    train_dataset.datas = train_dataset.Training_nn[:len_pp//8] + train_dataset.Training_pp[:len_pp//8] + train_dataset.Training_np[:len_pp//8] + train_dataset.Training_pn[:len_pp//8]
    
    if val_metrics.avg('accuracy') > 0.87:
        best_acc = val_metrics.avg('accuracy')
        torch.save(a_model.state_dict(), '/model/save1/f2_a_model_' + str(best_acc)[:5] + '_' +str(val_metrics.avg('accuracy'))[:5])
        torch.save(i_model.state_dict(), '/model/save1/f2_i_model_' + str(best_acc)[:5] + '_' +str(i_val_metrics.avg('accuracy'))[:5])
        print('save!!')
                   
    lr_sch.step(val_metrics.avg('loss'))
#     lr_sch.step()

test examples =  472 [439  33]
test examples =  1579 [1479  100]
train audio examples =  1886 [1753  133]
train image examples =  13957 [13440   517]
Class P :  23629081  N :  2693821
ImageEpoch: 1	Sample:    1/34380	Loss:1689.0861	Accuracy:0.35
AudioEpoch: 1	Sample:    1/34380	Loss:1689.0861	Accuracy:0.50
Training Image
 SUMMARY EPOCH: 1	Sample:34380/34380	Loss:175.5489	Accuracy:0.94

Training Audio
 SUMMARY EPOCH: 1	Sample:34380/34380	Loss:175.5489	Accuracy:0.50

A_Confusion Matrix
[[   97. 17093.]
 [   95. 17095.]]

Image: Validation
 SUMMARY EPOCH: 1	Sample: 1579/ 1579	Loss:0.0788	Accuracy:0.97

Confusion Matrix
[[1448.   31.]
 [  13.   87.]]
Audio: Validation
 SUMMARY EPOCH: 1	Sample:  472/  472	Loss:1.9774	Accuracy:0.07

Confusion Matrix
[[  0. 439.]
 [  0.  33.]]
ImageEpoch: 2	Sample:    1/34380	Loss:145.9898	Accuracy:0.97
AudioEpoch: 2	Sample:    1/34380	Loss:145.9898	Accuracy:0.46
Training Image
 SUMMARY EPOCH: 2	Sample:34380/34380	Loss:130.2781	Accuracy:0.98

Training Audio
 