In [None]:
import os
import time
import copy
import numpy as np
import pandas as pd
from PIL import Image
import torch,torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets,models,transforms
import torch.optim as optim
# from torchsummary import summary
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix

In [None]:
# !pip install torchsummary

In [None]:
# 读取train.txt
train_df = pd.read_csv('../input/covidxct/train_COVIDx_CT-2A.txt', sep=" ", header=None)
train_df.columns=['filename', 'label', 'xmin','ymin','xmax','ymax']
train_df=train_df.drop(['xmin', 'ymin','xmax', 'ymax'], axis=1 )
# 读取test.txt
val_df = pd.read_csv('../input/covidxct/val_COVIDx_CT-2A.txt', sep=" ", header=None)
val_df.columns=['filename', 'label', 'xmin','ymin','xmax','ymax']
val_df=val_df.drop(['xmin', 'ymin','xmax', 'ymax'], axis=1 )

test_df = pd.read_csv('../input/covidxct/test_COVIDx_CT-2A.txt', sep=" ", header=None)
test_df.columns=['filename', 'label', 'xmin','ymin','xmax','ymax']
test_df=test_df.drop(['xmin', 'ymin','xmax', 'ymax'], axis=1 )

In [None]:
# labels={0:'Normal',1:'Pneumonia',2:'COVID-19'}
train_df.head()
train_df.label.value_counts()

In [None]:
image_path = '../input/covidxct/2A_images/'  #directory path
train_df['filename'] = image_path+train_df['filename']
val_df['filename'] = image_path+val_df['filename']
test_df['filename'] = image_path + test_df['filename']
train_df.head()

In [None]:
train_df = shuffle(train_df) # 打乱顺序
val_df = shuffle(val_df)
test_df = shuffle(test_df)
train_df.head()

In [None]:
labels={0:'Normal',1:'Pneumonia',2:'COVID-19'}
class_names=['Normal','Pneumonia','COVID-19']

train_df['label_n']=[labels[b] for b in train_df['label']]
val_df['label_n']=[labels[b] for b in val_df['label']]
test_df['label_n']=[labels[b] for b in test_df['label']]
train_df.head()

In [None]:
print(f"Negative and positive values of train: \n{train_df['label_n'].value_counts()}")
print(f"Negative and positive values of validation: \n{val_df['label_n'].value_counts()}")
print(f"Negative and positive values of test: \n{test_df['label_n'].value_counts()}")

In [None]:
train_df=train_df.reset_index()
val_df=val_df.reset_index()
test_df=test_df.reset_index()

In [None]:
class CovidDataset(Dataset):
    def __init__(self, dataset_df, transform=None):
        self.dataset_df = dataset_df
        self.transform = transform
        
    def __len__(self):
        return self.dataset_df.shape[0]
    
    def __getitem__(self, idx):
        image_name = self.dataset_df['filename'][idx]
        img = Image.open(image_name)
        label = self.dataset_df['label'][idx]
        
        if self.transform:
            img = self.transform(img)
        return img, label

In [None]:
batch_size = 64
input_channel = 3
input_size = (224,224)
crop_size=(320,350)
num_classes=3
num_epochs = 20

In [None]:
# transform = {
#     'train':transforms.Compose([
#         transforms.CenterCrop(crop_size),
#         transforms.Resize(input_size),
#         transforms.RandomHorizontalFlip(p=0.5),
#         transforms.RandomVerticalFlip(p=0.5),
#         transforms.Grayscale(input_channel),
#         transforms.ToTensor(),
#         transforms.Normalize([0.6349431,0.6349431,0.6349431],[0.32605055,0.32605055,0.32605055])
#     ]),
#     'test':transforms.Compose([
#         transforms.CenterCrop(crop_size),
#         transforms.Resize(input_size),
#         transforms.Grayscale(input_channel),
#         transforms.ToTensor(),
#         transforms.Normalize([0.63507175,0.63507175,0.63507175],[0.3278614,0.3278614,0.3278614])
#     ])
# }
transform = {
    'train':transforms.Compose([
        transforms.CenterCrop(crop_size),
        transforms.Resize(input_size),
#         transforms.RandomHorizontalFlip(p=0.5),
#         transforms.RandomVerticalFlip(p=0.5),
        transforms.Grayscale(input_channel),
        transforms.ToTensor(),
#         transforms.Normalize([0.6349431,0.6349431,0.6349431],[0.32605055,0.32605055,0.32605055])
    ]),
    'test':transforms.Compose([
        transforms.CenterCrop(crop_size),
        transforms.Resize(input_size),
        transforms.Grayscale(input_channel),
        transforms.ToTensor(),
#         transforms.Normalize([0.63507175,0.63507175,0.63507175],[0.3278614,0.3278614,0.3278614])
    ])
}

In [None]:
dataset_names=['train','val','test']
image_transforms = {'train':transform['train'], 'val':transform['test'],'test':transform['test']}

train_dataset = CovidDataset(train_df, transform=image_transforms['train'])
val_dataset = CovidDataset(val_df, transform=image_transforms['val'])
test_dataset = CovidDataset(test_df, transform=image_transforms['test'])

image_dataset = {'train':train_dataset, 'val':val_dataset,'test':test_dataset}

dataloaders = {x:DataLoader(image_dataset[x],batch_size=batch_size,shuffle=True,num_workers=4) for x in dataset_names}

dataset_sizes = {x:len(image_dataset[x]) for x in dataset_names}

# class_names = {}
# print(dataset_sizes)
# print(class_names)

In [None]:
a=image_dataset['train'][2][0]
a=transforms.ToPILImage()(a)
# a.show()
plt.imshow(a)
# len(a)

In [None]:
import cv2 as cv
from skimage import feature as skif
import numpy as np

#获取图像的lbp特征
def get_lbp_data(img, lbp_radius=1, lbp_point=8):
    # img = utils.change_image_rgb(image_path)
#     img = cv.imread(image_path)
    img=transforms.Grayscale(1)(img)
    img=transforms.ToPILImage()(img)
#     image = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    # 使用LBP方法提取图像的纹理特征.
    #lbp_point：选取中心像素周围的像素点的个数；lbp_radius：选取的区域的半径
    #以下为5种不同的方法提取的lbp特征，相应的提取到的特征维度也不一样
    #'default': original local binary pattern which is gray scale but notrotation invariant
    #'ror': extension of default implementation which is gray scale androtation invariant
    #'uniform': improved rotation invariance with uniform patterns andfiner quantization of the angular space which is gray scale and rotation invariant.
    #'nri_uniform': non rotation-invariant uniform patterns variantwhich is only gray scale invariant
    #'var': rotation invariant variance measures of the contrast of localimage texture which is rotation but not gray scale invariant
    lbp = skif.local_binary_pattern(img, lbp_point, lbp_radius, 'default')
    # 统计图像的直方图
    max_bins = int(lbp.max() + 1)
    #print(max_bins)
    # hist size:256
    hist, _ = np.histogram(lbp, density=True, bins=max_bins, range=(0, max_bins))
    return hist,lbp

plt.figure(figsize=(10,10))
for i in range(10,19):
    img,lab=image_dataset['train'][i]
    feature,lbp = get_lbp_data(img)  #调用函数
    plt.subplot(3,3,i-9),plt.title(lab)
    plt.imshow(lbp,plt.cm.gray),plt.axis('off')
print(feature) 

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
def show_tensor_img(tensor_img):
    to_pil = transforms.ToPILImage()
    img = tensor_img.cpu().clone()
    img = to_pil(img)
    plt.figure()
    plt.imshow(img)
    plt.show()

def show_img(idx):
  show_tensor_img(train_dataset[idx][0])
for i in range(4):
    show_img(i)

In [None]:
# def get_mean_std(dataset, ratio=0.01): # 计算样本的均值与方差
#     """Get mean and std by sample ratio
#     """
#     dataloader = torch.utils.data.DataLoader(dataset, batch_size=int(len(dataset)*ratio),shuffle=True, num_workers=2)
#     train = iter(dataloader).next()[0]   # 一个batch的数据
# #     print(train)
#     mean = np.mean(train.numpy(), axis=(0,2,3))
#     std = np.std(train.numpy(), axis=(0,2,3))
#     return mean, std
# train_mean, train_std = get_mean_std(train_dataset)
# val_mean, val_std = get_mean_std(val_dataset)
# test_mean, test_std = get_mean_std(test_dataset)

# print(train_mean, train_std)
# print(val_mean, val_std)
# print(test_mean,test_std)

In [None]:
import itertools
# 绘制混淆矩阵
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    Input
    - cm : 计算出的混淆矩阵的值
    - classes : 混淆矩阵中每一行每一列对应的列
    - normalize : True:显示百分比, False:显示个数
    """
    cm=cm.numpy()
    if normalize:
      cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
      print("Normalized confusion matrix")
    else:
      cm=cm.astype('int')
      print('Confusion matrix, without normalization')
    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '{:.2f}' if normalize else '{}'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      plt.text(i, j, fmt.format(cm[i, j]),horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

# 生成混淆矩阵
def confusion_matrix(preds, labels, conf_matrix):
    preds = torch.argmax(preds, 1)
    for p, t in zip(preds, labels):
        conf_matrix[t, p] += 1
    return conf_matrix

def calculate_all_prediction(conf_matrix):
    '''
    计算总精度：对角线上所有值除以总数
    '''
    total_sum = conf_matrix.sum()
    correct_sum = (np.diag(conf_matrix)).sum()
    prediction = round(100*float(correct_sum)/float(total_sum),2)
    return prediction
 
def calculate_label_prediction(conf_matrix,labelidx):
    '''
    计算某一个类标预测精度：该类被预测正确的数除以该类的总数
    '''
    label_total_sum = conf_matrix.sum(axis=0)[labelidx]
    label_correct_sum = conf_matrix[labelidx][labelidx]
    prediction = 0
    if label_total_sum != 0:
        prediction = round(100*float(label_correct_sum)/float(label_total_sum),2)
    return prediction
 
def calculate_label_recall(conf_matrix,labelidx):
    '''
    计算某一个类标的召回率：
    '''
    label_total_sum = conf_matrix.sum(axis=1)[labelidx]
    label_correct_sum = conf_matrix[labelidx][labelidx]
    recall = 0
    if label_total_sum != 0:
        recall = round(100*float(label_correct_sum)/float(label_total_sum),2)
    return recall
 
def calculate_f1(prediction,recall):
    if (prediction+recall)==0:
        return 0
    return round(2*prediction*recall/(prediction+recall),2)


In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# path='./vgg16/vgg16_0.pth'
# model=torch.load(path)
# model=model_all[0]
# model=model.to(device)

model=models.vgg16(pretrained=True)
# 将所有参数都设置为不计算梯度
for param in model.parameters():
    param.requires_grad=False
num_ftrs=model.classifier[6].in_features # feature_map 的大小
model.classifier[6]=nn.Linear(num_ftrs,num_classes) #重新设计全连接层
model=model.to(device)

criterion=nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.0001,betas=(0.9, 0.999)) #优化函数

In [None]:
def train(model,epoch,num_epochs,criterion,optimizer):
  model.train()
  print('-' * 100)
  print('Epoch {}/{}'.format(epoch, num_epochs - 1))
  running_loss = 0.0
  running_corrects = 0
  for idx, (inputs, labels) in enumerate(dataloaders['train']):# 对dataloader进行遍历，dataloader时包含数据及标签的元组
    inputs,labels=inputs.to(device),labels.to(device)
    outputs = model(inputs) # output接受结果
    _, preds = torch.max(outputs, 1)
    loss = criterion(outputs, labels)  # 默认平均，计算损失值

    #反向传播及更新
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if idx % 10 == 0:
        print('train iteration:{},loss:{},acc:{}%'.format( idx, loss.item(),torch.sum(preds == labels.data)/batch_size*100))
    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

  epoch_loss = running_loss / dataset_sizes['train']
  epoch_acc = running_corrects.double() / dataset_sizes['train']
  print('train_total Loss: {:.4f} Acc: {:.4f}%'.format( epoch_loss, epoch_acc*100))

In [None]:
def test(model,epoch,num_epochs,criterion,optimizer,best_acc):
  model.eval()
  running_loss = 0.0
  running_corrects = 0
  best_acc=best_acc
  best_model_wts=copy.deepcopy(model.state_dict())
  conf_matrix = torch.zeros(num_classes, num_classes) # 混淆矩阵初始化
  with torch.no_grad():
    for idx, (inputs, labels) in enumerate(dataloaders['val']):
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      _, preds = torch.max(outputs, 1)
      loss = criterion(outputs, labels)
      conf_matrix = confusion_matrix(outputs, labels, conf_matrix) # 生成混淆矩阵

      running_loss += loss.item() * inputs.size(0)
      running_corrects += torch.sum(preds == labels.data)

    plot_confusion_matrix(conf_matrix, classes=class_names, normalize=False, title='Normalized confusion matrix') # 混淆矩阵的可视化
    
  epoch_loss = running_loss / dataset_sizes['test'] 
  epoch_acc = running_corrects.double() / dataset_sizes['test']
  print('test_total Loss: {:.4f} Acc: {:.4f}%'.format( epoch_loss, epoch_acc*100))

  all_prediction = calculate_all_prediction(conf_matrix) # 总精度=准确率
  print('all_prediction:{}'.format(all_prediction))
  label_prediction = [] # 存放每个类的精确率
  label_recall = [] # 存放每个类的召回率
  for i in range(num_classes):
    label_prediction.append(calculate_label_prediction(conf_matrix,i))
    label_recall.append(calculate_label_recall(conf_matrix,i))

  keys=class_names
  values=list(range(num_classes))
  dictionary = dict(zip(keys, values))
  for ei,i in enumerate(dictionary):
    print(ei,'\t',i,'\t','prediction=',label_prediction[ei],'%,\trecall=',label_recall[ei],'%,\tf1=',calculate_f1(label_prediction[ei],label_recall[ei])) # 输出每个类的，精确率，召回率，F1
  p = round(np.array(label_prediction).sum()/len(label_prediction),2) # 总精确率
  r = round(np.array(label_recall).sum()/len(label_prediction),2) # 总召回率
  print('MACRO-averaged:\nprediction=',p,'%,recall=',r,'%,f1=',calculate_f1(p,r)) #输出总精确率和召回率

  if epoch_acc > best_acc:# 获取最好的模型和准确率
    best_acc=epoch_acc
    best_model_wts=copy.deepcopy(model.state_dict())
  model.load_state_dict(best_model_wts)

  return best_model_wts,best_acc,epoch_acc

In [None]:
if __name__ == '__main__':
  # best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0
  acc=[]
  for epoch in range(num_epochs):
    train(model,epoch,num_epochs,criterion,optimizer)
    best_model_wts,best_acc,epoch_acc=test(model,epoch,num_epochs,criterion,optimizer,best_acc)
    acc.append(epoch_acc)
  print(best_acc)
  # torch.save(best_model_wts, 'covid_net_model_best_acc.pt')

In [None]:
from collections.abc import Iterable

def set_freeze_by_idxs(model, idxs, freeze=True): #默认freeze=True是冻结层
    if not isinstance(idxs, Iterable):
        idxs = [idxs]
    num_child = len(list(model.children()))
    idxs = tuple(map(lambda idx: num_child + idx if idx < 0 else idx, idxs))
    for idx, child in enumerate(model.children()):
        if idx not in idxs:
            continue
        for param in child.parameters():
            param.requires_grad = not freeze
    return model
            
def freeze_by_idxs(model, idxs):
    return set_freeze_by_idxs(model, idxs, True)

def unfreeze_by_idxs(model, idxs):
    return set_freeze_by_idxs(model, idxs, False)
def set_parameter_requires_grad(model):#冻结特征提取层
    for param in model.parameters():
        param.requires_grad=False
    return model

def initialize_model(model_name,num_classes,use_pretrained,unfreeze_num):
    if model_name=='vgg16':
        model_pre=models.vgg16(pretrained=use_pretrained) # True：torchversion 已经预训练好的模型，False：随机初始化的模型
#         for i in model_pre.named_children():
#             print(i)
        model_pre=set_parameter_requires_grad(model_pre) # 冻结全部层
        num_ftrs=model_pre.classifier[6].in_features # feature_map 的大小
        model_pre.classifier[6]=nn.Linear(num_ftrs,num_classes) #重新设计全连接层
        if unfreeze_num==1:
            unfreeze=[-1]
        elif unfreeze_num==2:
            unfreeze=[-1,-3]
        elif unfreeze_num==3:
            unfreeze=[-1,-3,-5]
        else:
            unfreeze=[-1,-3,-5,-7]
        model_pre.features=unfreeze_by_idxs(model_pre.features,unfreeze)
        for param in model_pre.classifier.parameters():
            param.requires_grad=True
        input_size=224
    elif model_name=='vgg19':
        model_pre=models.vgg19(pretrained=use_pretrained) # True：torchversion 已经预训练好的模型，False：随机初始化的模型

        model_pre=set_parameter_requires_grad(model_pre) # 冻结全部层
        num_ftrs=model_pre.classifier[6].in_features # feature_map 的大小
        model_pre.classifier[6]=nn.Linear(num_ftrs,num_classes) #重新设计全连接层
        if unfreeze_num==1:
            unfreeze=[-1]
        elif unfreeze_num==2:
            unfreeze=[-1,-3]
        elif unfreeze_num==3:
            unfreeze=[-1,-3,-5]
        else:
            unfreeze=[-1,-3,-5,-7]
        model_pre.features=unfreeze_by_idxs(model_pre.features,unfreeze)
        for param in model_pre.classifier.parameters():
            param.requires_grad=True
        input_size=224
    elif model_name=='resnet101':
        model_pre=models.resnet101(pretrained=use_pretrained) # True：torchversion 已经预训练好的模型，False：随机初始化的模型
        
        model_pre=set_parameter_requires_grad(model_pre)
        num_ftrs=model_pre.fc.in_features # feature_map 的大小
        model_pre.fc=nn.Linear(num_ftrs,num_classes)
        
        for i in range(unfreeze_num):
            model_pre.layer4=unfreeze_by_idxs(model_pre.layer4,-i)
        for param in model_pre.fc.parameters():
            param.requires_grad=True
        input_size=224
    elif model_name=='resnet152':
        model_pre=models.resnet152(pretrained=use_pretrained) #True：torchversion 已经预训练好的模型，False：随机初始化的模型
        
        model_pre=set_parameter_requires_grad(model_pre)
        num_ftrs=model_pre.fc.in_features #feature_map 的大小
        model_pre.fc=nn.Linear(num_ftrs,num_classes)

        for i in range(unfreeze_num):
            model_pre.layer4=unfreeze_by_idxs(model_pre.layer4,-i)
        for param in model_pre.fc.parameters():
            param.requires_grad=True
        input_size=224
    elif model_name=='densenet161':
        model_pre=models.densenet161(pretrained=use_pretrained) #True：torchversion 已经预训练好的模型，False：随机初始化的模型
        
        model_pre=set_parameter_requires_grad(model_pre)
        num_ftrs=model_pre.classifier.in_features #feature_map 的大小
        model_pre.classifier=nn.Linear(num_ftrs,num_classes)
        
        for i in range(unfreeze_num):
            model_pre.features.denseblock4=unfreeze_by_idxs(model_pre.features.denseblock4,-i)
        for param in model_pre.classifier.parameters():
            param.requires_grad=True
        input_size=224
    elif model_name=='densenet201':
        model_pre=models.densenet201(pretrained=use_pretrained) #True：torchversion 已经预训练好的模型，False：随机初始化的模型
        
        model_pre=set_parameter_requires_grad(model_pre)
        num_ftrs=model_pre.classifier.in_features #feature_map 的大小
        model_pre.classifier=nn.Linear(num_ftrs,num_classes)
        
        for i in range(unfreeze_num):
            model_pre.features.denseblock4=unfreeze_by_idxs(model_pre.features.denseblock4,-i)
        for param in model_pre.classifier.parameters():
            param.requires_grad=True
        input_size=224
    else:
        print('model not implemented')
        return None,None
    return model_pre,input_size

In [None]:
from pathlib import Path
model_all=[]
dir='.'
def auto_net(model_name,num_classes,use_pretrained,unfreeze_num):
    
    for k in range(unfreeze_num):
        model,input_size=initialize_model(model_name,num_classes,use_pretrained,k+1)
        my_path=Path(dir+'/{}'.format(model_name))
        if not my_path.is_dir():    # 若指定的目录不存在，创建文件夹
            os.mkdir(my_path)
        torch.save(model,dir+'/{}/{}_{}.pth'.format(model_name,model_name,k)) # 0 1 2 3
            
        model_all.append(model)
#         print('------------------------删除{}层,添加{}层,倒数{}层求梯度------------------------'.format(i+1,j+1,k+1))
#         for m in model.named_children():
#             print(m)
#         print('----------------')
#         for n in model.named_parameters():
#             print(n)
    return model_all
model_name=['vgg16','vgg19','resnet101','resnet152','densenet161','densenet201'] # 'inception_v3','resnext101'
# for name in model_name:
#     model_all=auto_net(name,num_classes=2,use_pretrained=True,unfreeze_num=4)
model_all=auto_net(model_name[0],num_classes=2,use_pretrained=True,unfreeze_num=4)