In [2]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
import cv2
import torch.utils.data as data
from glob import glob
from random import shuffle
from PIL import Image
import random
from tqdm import tqdm
from skimage.io import imread
import random
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

In [3]:
map_train = {}
map_val = {}
with open('./data_cls/5fold/train_comb.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        img_path = line.strip().split(' ')[0]
        label = float(line.strip().split(' ')[-1])
        map_train[img_path] = label
        
with open('./data_cls/val_0515.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        img_path = line.strip().split(' ')[0]
        label = float(line.strip().split(' ')[-1])
        map_val[img_path] = label

In [4]:
map_train

{'/ssd2/yuyue/TCT_data/20200515_data/train/neg/500_data/random_cut/1142edfdfd74152bab877d5d392be88d50e299ff_000260.npy': 0.0452025952935,
 '/ssd2/yuyue/TCT_data/20200515_data/train/hard_224_neg/YFY_data/6_cut/3982599c0a717e4d377ac1e649b4d263473afb77_000056_0.npy': 0.0982116448879,
 '/ssd2/yuyue/TCT_data/20200515_data/train/pos/500_data/6_cut/001282923c7ba6eadcf8730936f356fb586278b0_000737_5.npy': 0.945089263916,
 '/ssd2/yuyue/TCT_data/20200515_data/train/neg/500_data/random_cut/16bf184ec1a68cbc07cec0a6e46b23cf638a045d_000787.npy': 0.0454174819589,
 '/ssd2/yuyue/TCT_data/20200515_data/train/neg/270_data/random_cut/5354c7fa4ffd3990fc3881de40172af6e293c239_000338.npy': 0.0526541057229,
 '/ssd2/yuyue/TCT_data/20200515_data/train/asc/CH_data/random_cut/b6f5671bb42c8b1d87c2222c187b9edfa789e2fb_000626_2.npy': 0.592601820529,
 '/ssd2/yuyue/TCT_data/20200515_data/train/neg/270_data/6_cut/16798e2f1843fbdf47d11966c512eb0c3513ed31_001299.npy': 0.0495049658418,
 '/ssd2/yuyue/TCT_data/20200515_data/

In [5]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),   # horizontal flip
        transforms.RandomVerticalFlip(),   # vertival flip
        #transforms.ColorJitter([0.8,1.3],0.3,0.3,0.3),
#         ImageNetPolicy(),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
    ])
}

def hsv_transform(img, hue_delta, sat_mult, val_mult):
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float)
        img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180
        img_hsv[:, :, 1] *= sat_mult
        img_hsv[:, :, 2] *= val_mult
        img_hsv[img_hsv > 255] = 255
        return cv2.cvtColor(np.round(img_hsv).astype(np.uint8), cv2.COLOR_HSV2BGR)
    
def random_hsv_transform(img, hue_vari, sat_vari, val_vari):
        hue_delta = np.random.randint(-hue_vari, hue_vari)
        sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)
        val_mult = 1 + np.random.uniform(-val_vari, val_vari)
        return hsv_transform(img, hue_delta, sat_mult, val_mult)

In [6]:
class MyDataset(data.Dataset): #继承Dataset
    def __init__(self, folder_name, hospitals, name,transform=None,train=True):
        self.hospitals = hospitals
        self.folder_name = folder_name
        self.name = name
        path = []
        temp_path = []
        #print (len(glob(self.folder_name[0] + '/*/*/*/')))
        for folder in glob(self.folder_name[0] + '/*/*/*/'):
            #print (folder)
            for j in self.hospitals:
                #print (os.listdir(folder))
                if j not in os.listdir(folder):
                    continue
                else:
                    temp_path.extend(glob(folder + j + '/*.npy'))
        for j in temp_path:
            if 'ASC-US' in j or 'ASC-H' in j:
                label = 1
            elif 'pos' in j:
                label = 2
            elif 'neg' in j:
                label = 0
            path.append((j,label))
                
#         torch.save(path,"/hdd/sd2/data/TCT/densenet/model/0611_more_TRdata_224/path.pth")
        
        self.path = path
        self.transform = transform
        self.train = train

        print(len(path),self.name)
        
    def __getitem__(self, index):
        img_path,target = self.path[index] 
        if target == 0:
            target = np.array((0.95,0.05)).astype(np.float32)
        elif target == 1:
            target = np.array((0.3,0.7)).astype(np.float32)
        elif target == 2:
            target = np.array((0.05,0.95)).astype(np.float32)

        else:
            print("one-hot error!")
        #target = np.array((1-target,target)).astype(np.float32)
        ori_img = np.load(img_path) 
#         if  sample.shape != (544,544,3):
#             print(img_path)
#         sample = (sample*255).astype(np.uint8)
        ori_img = random_hsv_transform(ori_img, 10, 0.1, 0.1)
        ori_img = cv2.resize(ori_img,(224,224))
        image = np.zeros_like(ori_img)
        sample = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY)
        image[..., 0] = sample
        image[..., 1] = sample
        image[..., 2] = sample
        sample = Image.fromarray(image) # array to image)
        if self.transform is not None:
            sample = self.transform(sample)

        if self.train:
            return sample, target, img_path 
        else:
            return sample, target, img_path
    
    def __len__(self):
        return len(self.path)

In [7]:
train_dirs =["/ssd2/yuyue/TCT_data/20200701_data/train/"]
val_dirs = ["/ssd2/yuyue/TCT_data/20200701_data/val/"]
train_hospital = ['QL_0327_data', 'QL_1025_data', 'QL_270_data', 'QL_500_data']
val_hospital = ['YFY_1_data', 'YFY_2_data']

In [8]:
train_dataset = MyDataset(train_dirs, train_hospital, "train",transform=data_transforms["train"])
val_dataset = MyDataset(val_dirs, val_hospital, "val",transform=data_transforms["val"])
# test_dataset = MyDataset(test_dirs,"test",transform=data_transforms["test"])

image_datasets = {"train":train_dataset, "val":val_dataset}
dataloaders = {"train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=64,
                                             shuffle=True, num_workers=4),
              "val":torch.utils.data.DataLoader(image_datasets["val"], batch_size=16,
                                             shuffle=True, num_workers=4)}
dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "val"]}

use_gpu = torch.cuda.is_available()
print(use_gpu)

81317 train
15970 val
True


In [9]:
def save_model(model, epoch):
    save_dir = "/hdd/sd5/tlc/TCT/Model_pth/"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    state_dict = model.state_dict()
    pth = os.path.join(save_dir, "resnet18_1channel.pth")
    torch.save(state_dict, pth)

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
#     model = DataParallel(model)
   # best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10.0
    best_epoch = 0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ["train",'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
#             count  = 0
            for data in dataloaders[phase]:
                
                # get the inputs
                inputs, labels ,paths = data
#                 print(labels[0].shape)
#                 print(labels)
                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()
                
                # forward
                outputs,_ = model(inputs)
#                 outputs = nn.Softmax(dim=-1)(outputs)
                
    
                temp_outputs = torch.tensor([[0.5,0.5]]).cuda()
                temp_labels = torch.tensor([[0.5,0.5]]).cuda()
            
                temp_outputs_2 = torch.tensor([[0.5,0.5]]).cuda()
                temp_labels_2 = torch.tensor([[0.5,0.5]]).cuda()
            
                temp_outputs = temp_outputs.cuda()
                temp_labels = temp_labels.cuda()
                
                
                for idx in range(len(labels)):
                    if labels[idx][1] == 0.7 and outputs[idx][1] > 0.7:
                        temp_outputs_2 = torch.cat((temp_outputs_2,outputs[idx].unsqueeze(0)),0)
                        temp_labels_2 = torch.cat((temp_labels_2,labels[idx].unsqueeze(0)),0)
                    else:
                        temp_outputs = torch.cat((temp_outputs,outputs[idx].unsqueeze(0)),0)
                        temp_labels = torch.cat((temp_labels,labels[idx].unsqueeze(0)),0)
                
                
                
                _, preds = torch.max(outputs.data, 1) # pred值为output中最大值的位置（0是neg,1是pos）
                _, gts = torch.max(labels.data, 1)
                
#                 print(temp_outputs.shape)
#                 print(temp_labels)
                loss = criterion(outputs, labels) + 0.1*criterion(temp_outputs_2, temp_labels_2)
#                 loss = criterion(outputs,labels)
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.item()* inputs.size(0)
                running_corrects += torch.sum(preds == gts)


            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            

            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_epoch = epoch
                save_model(model, epoch)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Loss: {:4f}'.format(best_loss))
    print('Best val epoch: {:4f}'.format(best_epoch))

    # load best model weights
#     model.load_state_dict(best_model_wts)
    return model

In [10]:
import resnet

model_ft = resnet.resnet18(pretrained =True)
model_ft.fc = nn.Linear(model_ft.fc.in_features, 2)


# model_ft.load_state_dict(torch.load("/hdd/sd2/data/TCT/densenet/model/0604_TRdata_224/resnet18_asc0.8_pos0.95_0.5negweight_resize224_hard_neg_no_aug_2.pth"))
model_ft.cuda()    
if use_gpu:
    model_ft = model_ft.cuda()
#weight=torch.Tensor([[1,1]]).cuda()
# criterion = FocalLoss() 
w = torch.Tensor([0.5,0.5]).cuda()
criterion = nn.BCELoss(w)
# Observe that all parameters are being optimized
# do not forget to change learning rate
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=1e-4, momentum=0.9,weight_decay=1e-4)
optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=1e-4,weight_decay=1e-4)
# Decay LR by a factor of 0.1 every 5 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.5)
# exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, T_max=5, eta_min=1e-6, last_epoch=-1)

In [11]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------




train Loss: 0.2871 Acc: 0.7833
val Loss: 0.3318 Acc: 0.7245
Epoch 1/19
----------
train Loss: 0.2531 Acc: 0.8511
val Loss: 0.3740 Acc: 0.6597
Epoch 2/19
----------
train Loss: 0.2296 Acc: 0.8929
val Loss: 0.3693 Acc: 0.6683
Epoch 3/19
----------
train Loss: 0.2202 Acc: 0.9092
val Loss: 0.3198 Acc: 0.7532
Epoch 4/19
----------
train Loss: 0.2137 Acc: 0.9207
val Loss: 0.4939 Acc: 0.5220
Epoch 5/19
----------
train Loss: 0.2024 Acc: 0.9386
val Loss: 0.3896 Acc: 0.6633
Epoch 6/19
----------
train Loss: 0.1977 Acc: 0.9467
val Loss: 0.3538 Acc: 0.7035
Epoch 7/19
----------
train Loss: 0.1944 Acc: 0.9517
val Loss: 0.4324 Acc: 0.5732
Epoch 8/19
----------
train Loss: 0.1893 Acc: 0.9603
val Loss: 0.3930 Acc: 0.6604
Epoch 9/19
----------
train Loss: 0.1868 Acc: 0.9627
val Loss: 0.4360 Acc: 0.5615
Epoch 10/19
----------
train Loss: 0.1852 Acc: 0.9658
val Loss: 0.4067 Acc: 0.6312
Epoch 11/19
----------
train Loss: 0.1825 Acc: 0.9695
val Loss: 0.4141 Acc: 0.6082
Epoch 12/19
----------
train Loss: 0

KeyboardInterrupt: 