In [1]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
import cv2
import torch.utils.data as data
from glob import glob
from random import shuffle
from PIL import Image
import random
from tqdm import tqdm
from skimage.io import imread
import random
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

In [2]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),   # horizontal flip
        transforms.RandomVerticalFlip(),   # vertival flip
        #transforms.ColorJitter([0.8,1.3],0.3,0.3,0.3),
#         ImageNetPolicy(),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
    ]) 
}

def hsv_transform(img, hue_delta, sat_mult, val_mult):
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float)
        img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180
        img_hsv[:, :, 1] *= sat_mult
        img_hsv[:, :, 2] *= val_mult
        img_hsv[img_hsv > 255] = 255
        return cv2.cvtColor(np.round(img_hsv).astype(np.uint8), cv2.COLOR_HSV2BGR)
    
def random_hsv_transform(img, hue_vari, sat_vari, val_vari):
        hue_delta = np.random.randint(-hue_vari, hue_vari)
        sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)
        val_mult = 1 + np.random.uniform(-val_vari, val_vari)
        return hsv_transform(img, hue_delta, sat_mult, val_mult)

In [3]:
class MyDataset(data.Dataset): #继承Dataset
    def __init__(self, folder_name, name, transform=None, train=True):
        #self.hospitals = hospitals
        self.folder_name = folder_name
        self.name = name
        path = []    
        temp_path = []
        #print (len(glob(self.folder_name[0] + '/*/*/*/')))
        img_path = glob(self.folder_name[0] + '/*/*/*/*/*.npy')
        for j in img_path:
            if 'ASC-US' in j or 'ASC-H' in j:
                label = 1
            elif 'pos' in j:
                label = 2
            elif 'neg' in j:
                label = 0
            
            if 'CH' in j:
                hos = 0
            elif 'QL' in j:
                hos = 1
            elif 'YFY' in j:
                hos = 2
            elif 'TR' in j:
                hos =3
            path.append((j,label, hos))
                
#         torch.save(path,"/hdd/sd2/data/TCT/densenet/model/0611_more_TRdata_224/path.pth")
        
        self.path = path
        self.transform = transform
        self.train = train

        print(len(path),self.name)
        
    def __getitem__(self, index):
        img_path,target, hos = self.path[index] 
        if target == 0:
            target = np.array((0.95,0.05)).astype(np.float32)
        elif target == 1:
            target = np.array((0.3,0.7)).astype(np.float32)
        elif target == 2:
            target = np.array((0.05,0.95)).astype(np.float32)
        

        else:
            print("one-hot error!")
        #target = np.array((1-target,target)).astype(np.float32)
        ori_img = np.load(img_path)
        ori_img = random_hsv_transform(ori_img, 10, 0.1, 0.1)
        ori_img = cv2.resize(ori_img,(224,224))
        sample = Image.fromarray(ori_img) # array to image)
        if self.transform is not None:
            sample = self.transform(sample)

        if self.train:
            return sample, target, hos, img_path 
        else:
            return sample, target, hos, img_path
    
    def __len__(self):
        return len(self.path)

In [4]:
train_dirs =["/ssd2/yuyue/TCT_data/20200701_data/train/"]
val_dirs = ["/ssd2/yuyue/TCT_data/20200701_data/val/"]

In [5]:
len(glob("/ssd2/yuyue/TCT_data/20200701_data/train/*/*/*/*/*.npy"))

174569

In [6]:
train_dataset = MyDataset(train_dirs, "train", transform=data_transforms["train"])
val_dataset = MyDataset(val_dirs, "val", transform=data_transforms["val"])
# test_dataset = MyDataset(test_dirs,"test",transform=data_transforms["test"])

image_datasets = {"train":train_dataset, "val":val_dataset}
dataloaders = {"train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=64,
                                             shuffle=True, num_workers=4),
              "val":torch.utils.data.DataLoader(image_datasets["val"], batch_size=16,
                                             shuffle=True, num_workers=4)}
dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "val"]}

use_gpu = torch.cuda.is_available()
print(use_gpu)

174569 train
34203 val
True


In [7]:
def save_model(model, epoch):
    save_dir = "/hdd/sd5/tlc/TCT/Model_pth/"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    state_dict = model.state_dict()
    pth = os.path.join(save_dir, "DAN_network_weight.pth")
    torch.save(state_dict, pth)

def train_model(model, criterion_cls, criterion_domain, optimizer, scheduler, num_epochs=25):
    since = time.time()
#     model = DataParallel(model)
   # best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10.0
    best_epoch = 0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ["train",'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
#             count  = 0
            for data in dataloaders[phase]:
                
                # get the inputs
                inputs, labels , domains, paths = data
#                 print(labels[0].shape)
#                 print(labels)
                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                    domains = Variable(domains.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()
                
                # forward
                outputs_cls, outputs_domain= model(inputs, 1)
#                 outputs = nn.Softmax(dim=-1)(outputs)
                
    
                temp_outputs = torch.tensor([[0.5,0.5]]).cuda()
                temp_labels = torch.tensor([[0.5,0.5]]).cuda()
            
                temp_outputs_2 = torch.tensor([[0.5,0.5]]).cuda()
                temp_labels_2 = torch.tensor([[0.5,0.5]]).cuda()
            
                temp_outputs = temp_outputs.cuda()
                temp_labels = temp_labels.cuda()
                
                
                for idx in range(len(labels)):
                    if labels[idx][1] == 0.7 and outputs_cls[idx][1] > 0.7:
                        temp_outputs_2 = torch.cat((temp_outputs_2,outputs_cls[idx].unsqueeze(0)),0)
                        temp_labels_2 = torch.cat((temp_labels_2,labels[idx].unsqueeze(0)),0)
                    else:
                        temp_outputs = torch.cat((temp_outputs,outputs_cls[idx].unsqueeze(0)),0)
                        temp_labels = torch.cat((temp_labels,labels[idx].unsqueeze(0)),0)
                
                
                
                _, preds = torch.max(outputs_cls.data, 1) # pred值为output中最大值的位置（0是neg,1是pos）
                _, gts = torch.max(labels.data, 1)
                
#                 print(temp_outputs.shape)
#                 print(temp_labels)
                loss = criterion_cls(outputs_cls, labels) + 0.1*criterion_cls(temp_outputs_2, temp_labels_2) + 1.0 * criterion_domain(outputs_domain, domains)
#                 loss = criterion(outputs,labels)
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.item()* inputs.size(0)
                running_corrects += torch.sum(preds == gts)


            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            

            if phase == 'val' and epoch_loss < best_loss and epoch > 5:
                best_loss = epoch_loss
                best_epoch = epoch
                save_model(model, epoch)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Loss: {:4f}'.format(best_loss))
    print('Best val epoch: {:4f}'.format(best_epoch))

    # load best model weights
#     model.load_state_dict(best_model_wts)
    return model

In [8]:
import resnet

model_ft = resnet.resnet18(pretrained =True)
model_ft.fc = nn.Linear(model_ft.fc.in_features, 2)


# model_ft.load_state_dict(torch.load("/hdd/sd2/data/TCT/densenet/model/0604_TRdata_224/resnet18_asc0.8_pos0.95_0.5negweight_resize224_hard_neg_no_aug_2.pth"))
model_ft.cuda()    
if use_gpu:
    model_ft = model_ft.cuda()
#weight=torch.Tensor([[1,1]]).cuda()
# criterion = FocalLoss() 
w = torch.Tensor([0.5,0.5]).cuda()
criterion_cls = nn.BCELoss(w)
criterion_domain = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
# do not forget to change learning rate
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=1e-4, momentum=0.9,weight_decay=1e-4)
optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=1e-4,weight_decay=1e-4)
# Decay LR by a factor of 0.1 every 5 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.5)
# exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, T_max=5, eta_min=1e-6, last_epoch=-1)

In [9]:
model_ft = train_model(model_ft, criterion_cls, criterion_domain, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------




train Loss: 1.6705 Acc: 0.7755
val Loss: 1.5838 Acc: 0.8791
Epoch 1/19
----------
train Loss: 1.5249 Acc: 0.8549
val Loss: 1.5775 Acc: 0.9026
Epoch 2/19
----------
train Loss: 1.5014 Acc: 0.8993
val Loss: 1.5812 Acc: 0.9021
Epoch 3/19
----------
train Loss: 1.4920 Acc: 0.9156
val Loss: 1.6001 Acc: 0.8745
Epoch 4/19
----------
train Loss: 1.4866 Acc: 0.9261
val Loss: 1.5930 Acc: 0.8874
Epoch 5/19
----------
train Loss: 1.4775 Acc: 0.9408
val Loss: 1.5785 Acc: 0.9064
Epoch 6/19
----------
train Loss: 1.4740 Acc: 0.9469
val Loss: 1.5788 Acc: 0.9103
Epoch 7/19
----------
train Loss: 1.4720 Acc: 0.9502
val Loss: 1.5841 Acc: 0.8960
Epoch 8/19
----------
train Loss: 1.4672 Acc: 0.9583
val Loss: 1.5808 Acc: 0.9073
Epoch 9/19
----------
train Loss: 1.4654 Acc: 0.9615
val Loss: 1.5845 Acc: 0.9014
Epoch 10/19
----------
train Loss: 1.4638 Acc: 0.9639
val Loss: 1.5864 Acc: 0.9005
Epoch 11/19
----------
train Loss: 1.4614 Acc: 0.9679
val Loss: 1.5896 Acc: 0.8947
Epoch 12/19
----------
train Loss: 1