In [14]:
%matplotlib inline

In [15]:
from __future__ import print_function 
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from tqdm import tqdm

torch.manual_seed(13)
np.random.seed(13)

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cpu = torch.device("cpu")

PyTorch Version:  2.0.0+cu117
Torchvision Version:  0.15.1+cu117


In [16]:

model_name = "resnet"

num_classes = 8

batch_size = 16

num_epochs = 5

feature_extract = True

PATH = './../models/resnet_%s'%(time.strftime("%Y%m%d-%H%M%S"))

EVAL_PATH = './../eval_result/'

In [17]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [18]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes) 
        input_size = 224

    elif model_name == "inception":
        """ Inception v3 
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()
    
    return model_ft, input_size

model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)



In [19]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


transform = transforms.Compose([
    transforms.RandomResizedCrop(input_size),
    transforms.RandomHorizontalFlip(),    
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    ])

print("Initializing Datasets and Dataloaders...")


import os
from PIL import Image
all_labels = ["zxb","nm","sy","tsg","mhb","sjz","tyht","other"]
print(len(all_labels))

label_dict = {}
for i, tag in enumerate(all_labels):
    label_dict[tag] = i


class BJTUDataset(torch.utils.data.Dataset):
    def __init__(self, root, train=True, transform=None):
        self.root = root
        self.transform = transform
        self.train = train
        self.image_files = os.listdir(root)
        self.image_paths = [os.path.join(root, imgf) for imgf in self.image_files]
        self.prelabels = [img.split('-')[0] for img in self.image_files]
        for i, label in enumerate(self.prelabels):
            if (label in all_labels) and (label != "other"):
                self.prelabels[i] = label
            else:
                self.prelabels[i] = "other"
                
        self.labels = [label_dict[label] for label in self.prelabels]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        label = self.labels[idx]
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, label

ROOT = ".\\..\\data\\bjtu-dataset-mixed\\"
data_root = os.path.join(ROOT, "Cleaned_Dataset_1024p\\")

trainset = BJTUDataset(root=data_root+'train', train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testset = BJTUDataset(root=data_root+'test', train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                          shuffle=False, num_workers=0)

dataloaders_dict = {'train':trainloader, 'val':testloader}

Initializing Datasets and Dataloaders...
8


In [20]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train() 
            else:
                model.eval()  

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history


def eval_model(model, eval_dataloader, criterion):
    since = time.time()

    val_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    model.eval() 

    running_loss = 0.0
    running_corrects = 0

    topKsamples = 60

    
    all_outputs = [[]] * len(all_labels)
    
    for inputs, labels in tqdm(eval_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels_copy = labels.clone().to(cpu).numpy()
        
        with torch.set_grad_enabled(False):
            
            outputs = model(inputs)
            
            for i in range(len(labels)):
                all_outputs[labels_copy[i]].append((outputs[i].clone().to(cpu).numpy() , labels_copy[i]))

            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    labels_precision = []

    labels_line = []

    for i in range(len(all_outputs)):
        labels_line.append([])
        #P@K Pn Kn
        labels_line[i].append((topKsamples,0))
        precision_label = 0
        all_outputs[i].sort(key=lambda x: x[0][i], reverse=True)
        all_outputs[i] = all_outputs[i][:topKsamples]

        for j in range(len(all_outputs[i])):
            all_outputs[i][j]=(np.argmax(all_outputs[i][j][0]),i)
        
        for j in range(len(all_outputs[i])):
            L=len(labels_line[i])
            Last = labels_line[i][-1]
            if all_outputs[i][j][0] == all_outputs[i][j][1]:
                precision_label += 1
                labels_line[i].append( [ Last[0], Last[1]+1 ] )
            else:
                labels_line[i].append( [ Last[0]-1, Last[1] ] )
        precision_label /= len(all_outputs[i])
        labels_precision.append(precision_label)
        print('Precision on label[{}:{}]:{:.5f} '.format(all_labels[i],i,precision_label))

        np.save(EVAL_PATH + 'topK_densenet.npy', np.array(labels_line))

    epoch_loss = running_loss / len(eval_dataloader.dataset)
    epoch_acc = running_corrects.double() / len(eval_dataloader.dataset)

    print('{} Loss: {:.4f} Acc: {:.4f}'.format('val', epoch_loss, epoch_acc))

    time_elapsed = time.time() - since
    print('Evaluation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(epoch_acc))

    return model, epoch_acc, labels_precision

In [21]:
model_ft = model_ft.to(device)

params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Params to learn:
	 fc.weight
	 fc.bias


In [22]:
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# # Train and evaluate
# model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))


In [23]:
# eval_model(model_ft, testloader, criterion)

In [24]:

# torch.save(model_ft.state_dict(), PATH+("_{}.pth".format(time.time())))

In [25]:
class Subset(torch.utils.data.Dataset):
    def __init__(self, dataset, indices):
        self.dataset = dataset
        self.indices = indices

    def __len__(self):
        if self.indices.shape == ():
            print('this happens: Subset')
            return 1
        else:
            return len(self.indices)

    def __getitem__(self, idx):
        return self.dataset[self.indices[idx]]

k=7

num_val_samples = int(len(trainset) / k)
dataset=trainset

print('len(dataset): ', len(dataset))
print('num_val_samples: ', num_val_samples)

arr=np.arange(len(dataset))
np.random.shuffle(arr)
all_pk = []

for i in range(k):
    print('Creating fold: %d/%d' % (i + 1, k)) 
    
    valid_idx = arr[i * num_val_samples:(i + 1) * num_val_samples]
    train_idx = np.concatenate([arr[:i * num_val_samples], arr[(i + 1) * num_val_samples:]], axis=0)
    train_dataset = Subset(dataset, train_idx)
    valid_dataset = Subset(dataset, valid_idx)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    
    dataloaders_dictk = {'train': train_loader, 'val': valid_loader}

    criterion = nn.CrossEntropyLoss()

    model_ft, hist = train_model(model_ft, dataloaders_dictk, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))
    
    model_ft, epoch_acc, pk = eval_model(model_ft, testloader, criterion)
    all_pk.append(pk)

    all_pk = np.array(all_pk)
    print(all_pk.shape)
    all_pk = np.mean(all_pk, axis=1)
    for i in range(len(all_pk)):
        print('Average Precision on label[{}:{}]:{:.5f} '.format(all_labels[i],i,all_pk[i]))
    
torch.save(model_ft.state_dict(), PATH+("_{}.pth".format(time.time())))

len(dataset):  7296
num_val_samples:  1042
Creating fold: 1/7
Epoch 1/5
----------


  5%|▍         | 19/391 [00:11<03:41,  1.68it/s]


KeyboardInterrupt: 

In [None]:
# PATH="resnet_net_3.pth"

# model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=False)

# model_ft=model_ft.to(device)

# model_ft.load_state_dict(torch.load(PATH))

# eval_model(model_ft, testloader, criterion)