In [1]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision import transforms
from torchvision.transforms import v2
import numpy as np
# import PIL

In [2]:
# import data
folder="./dataset"
# transform = transforms.Compose([transforms.ToTensor()])
# transform = transforms.Compose([transforms.Resize((224, 224))])
data_train_val=torchvision.datasets.FashionMNIST(root=folder,train=True,download=True,transform=None)
data_test=torchvision.datasets.FashionMNIST(root=folder,train=False,download=True,transform=None)

In [3]:
classes=data_test.classes

In [4]:
# import matplotlib.pyplot as plt
# figure = plt.figure(figsize = (8,8))
# cols, rows = 4, 4
# ids=torch.randint(len(data_test), size = (cols*rows,))
# for i in range (1, cols*rows + 1):
#     image, label = data_test[ids[i-1].item()]
#     figure.add_subplot(rows, cols, i)
#     plt.title(classes[label])
#     plt.axis('off')
#     plt.imshow(image.squeeze(), cmap='gray')
#     # plt.imshow(image.squeeze())
# plt.show()

In [5]:
# data_train_val.data[:2000].shape
data_test.data.shape
# type(data_train_val.data)
# data_test.targets.shape

torch.Size([10000, 28, 28])

### Data Preparation (10 points)
- (a) Data cleansing and transformation (5 points)

In [6]:
class dataset(torch.utils.data.Dataset):
    def __init__(self, inputs, targets, trans=None):
        self.x = inputs
        self.y = targets
        self.trans=trans

    def __len__(self):
        return self.x.size()[0]

    def __getitem__(self, idx):

        if self.trans == None:
            # print("no trans")
            return ( self.x[idx], self.y[idx])
        else:
            # print("trans")
            return ( self.trans(self.x[idx]), self.y[idx]) 

def ds_trans(ds_input,trans_flag):
    data_input=ds_input.data.clone().detach()
    labels_input=ds_input.targets
    if data_input.ndim <= 3:
        transform_train_val = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((64, 64)), 
            transforms.RandomHorizontalFlip(0.5),
            transforms.RandomAffine(degrees=(0,3), translate=(0,0.05), scale=None, shear=(0,0.05)),
            transforms.RandomResizedCrop(size=(64,64),scale=(0.9,1.0),ratio=(0.9,1.1)),
            transforms.ToTensor(),
            transforms.Normalize(0.5, 0.5)
        ])
        transform_test = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((64, 64)), 
            transforms.ToTensor(),
             transforms.Normalize(0.5, 0.5)
        ])
    elif data_input.ndim == 4:
        # Should not come here
        print("WRONG!")

    if torch.isnan(data_input).any():
        print("have NaN or Inf")
    data_input = data_input[:,None,:,:]
    print(data_input.shape)
    
    if trans_flag==1:
        flag_ds_new = dataset(data_input, labels_input, trans=transform_train_val)
    elif trans_flag==2:
        flag_ds_new = dataset(data_input, labels_input, trans=transform_test)
    else:
        flag_ds_new = dataset(data_input, labels_input, trans=None)
    return (flag_ds_new)
    

In [7]:
torchvision.__version__

'0.16.0'

In [8]:
ds_processed_train_val=ds_trans(ds_input=data_train_val,trans_flag=1)
ds_processed_test=ds_trans(ds_input=data_test,trans_flag=2)
# ds_processed.x.shape

torch.Size([60000, 1, 28, 28])
torch.Size([10000, 1, 28, 28])


- (b) Data splitting (i.e., training, validation, and test splits) (5 points)

In [9]:
from torch.utils.data.sampler import SubsetRandomSampler

random_seed=22
indices = list(range(data_train_val.targets.shape[0]))
np.random.seed(random_seed)
np.random.shuffle(indices)

indices_train=indices[:50000]
indices_val=indices[50000:]

ds_processed_train_sampler=SubsetRandomSampler(indices_train)
ds_processed_val_sampler=SubsetRandomSampler(indices_val)

# batch_size=64
batch_size=256
num_workers=10
loader_train = torch.utils.data.DataLoader(ds_processed_train_val, batch_size=batch_size,sampler=ds_processed_train_sampler,
                                            num_workers=num_workers)
loader_val = torch.utils.data.DataLoader(ds_processed_train_val, batch_size=batch_size,sampler=ds_processed_val_sampler,
                                            num_workers=num_workers)
loader_test = torch.utils.data.DataLoader(ds_processed_test, batch_size=batch_size,
                                          shuffle=True, num_workers=num_workers)

In [10]:
for batch_idx, (inputs, targets) in enumerate(loader_train):
    break
print(inputs.shape)
print(targets.shape)

torch.Size([256, 1, 64, 64])
torch.Size([256])


## Model

In [11]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def trainloop (dataloader, model, loss_fn, optimizer,device):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X,y) in enumerate(dataloader):
        X,y=X.to(device),y.to(device)
        
        # Compute the error rate
        prediction = model(X)
        loss = loss_fn(prediction, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch*len(X)
            print(f'loss:{loss:>7f} [{current:>5d}/{size:>5d}]')
            
def evaluate (dataloader, model, loss_fn,device):
    # size = len(dataloader.dataset)
    size = len(dataloader.sampler)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    Y_pred=[]
    Y_true=[]
    with torch.no_grad():
        for X,y in dataloader:
            X,y = X.to(device),y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            Y_pred.append(pred.argmax(1).cpu().numpy())
            Y_true.append(y.cpu().numpy())
    test_loss /= num_batches
    correct /= size
    print(f'Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')
    accuracy=100*correct
    Y_pred=np.concatenate(Y_pred)
    Y_true=np.concatenate(Y_true)
    accuracy2=accuracy_score(Y_true,Y_pred)
    # print(f'Accuracy2: {(accuracy2*100):>0.1f}')
    precision = precision_score(Y_true, Y_pred, average='micro')
    recall = recall_score(Y_true, Y_pred, average='micro')
    f1 = f1_score(Y_true, Y_pred, average='micro')
    cm = confusion_matrix(Y_true, Y_pred)
    return [accuracy, precision, recall, f1, test_loss], cm

In [12]:
# model2 = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=False)
# model2.features[0]


In [13]:
import torchvision
def build_train_model(loader_train,loader_val,loader_test,learning_rate,id_optimizer):
    device ='cuda' if torch.cuda.is_available else 'cpu'
    print(f'Using {device} device')
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'wide_resnet50_2', pretrained=True)
    # model=torchvision.models.vgg11()
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=False)
    # model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', weights='DenseNet121_Weights.IMAGENET1K_V1')
    model=torchvision.models.squeezenet1_0(weights='SqueezeNet1_0_Weights.IMAGENET1K_V1')
    
    # model
    # model.conv1=torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    # model.features[0]=torch.nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # vgg
    # model.features[0]=torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) # densenet121
    model.features[0]=torch.nn.Conv2d(1, 96, kernel_size=(7, 7), stride=(2, 2)) # SqueezeNet

    model=model.to(device)
    # model

    # Define hyperparameters
    # learning_rate = 1e-4
    epochs = 100

    # Loss Function
    loss_fn = nn.CrossEntropyLoss()
    # loss_fn = nn.MSELoss(reduction='sum')

    # Optimizer
    if id_optimizer==0:
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif id_optimizer==1:
        optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
    elif id_optimizer==2:
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    

    for i in range(epochs):
        print(f"Epoch {i+1}")
        # print(f"Epoch {i+1}\n-------------------------------")
        trainloop(loader_train, model, loss_fn, optimizer,device)
        # print("Training Dataset Error:")
        # evaluate(loader_train, model, loss_fn,device)
        # print("Validation Dataset Error:");
        # evaluate(loader_val, model, loss_fn,device)
        # print("Test Dataset Error:");
        # evaluate(loader_test, model, loss_fn,device)
    
    # Acc=np.zeros(3)
    # Loss=np.zeros(3)
    # print("Training Dataset Error:")
    # Acc[0],Loss[0]=evaluate(loader_train, model, loss_fn,device)
    # print("Validation Dataset Error:");
    # Acc[1],Loss[1]=evaluate(loader_val, model, loss_fn,device)
    # print("Test Dataset Error:");
    # Acc[2],Loss[2]=evaluate(loader_test, model, loss_fn,device)

    print("Training Dataset Error:")
    result_train,cm_train=evaluate(loader_train, model, loss_fn,device)
    print("Validation Dataset Error:");
    result_val,cm_val=evaluate(loader_val, model, loss_fn,device)
    print("Test Dataset Error:");
    result_test,cm_test=evaluate(loader_test, model, loss_fn,device)

    results=[result_train,result_val,result_test]

    return results,cm_val

In [14]:
def split_6_fold(index,ds_processed_train_val,ds_processed_test,indices):
    N=ds_processed_train_val.y.shape[0]
    nsub=int(N/6)
    # indices_train=indices[:50000]
    indices_train=[indices[i] for i in range(N) if i <index*nsub or i >=(index+1)*nsub]
    indices_val=indices[index*nsub:(index+1)*nsub]

    ds_processed_train_sampler=SubsetRandomSampler(indices_train)
    ds_processed_val_sampler=SubsetRandomSampler(indices_val)

    # batch_size=64
    batch_size=512
    num_workers=10
    loader_train = torch.utils.data.DataLoader(ds_processed_train_val, batch_size=batch_size,sampler=ds_processed_train_sampler,
                                                num_workers=num_workers)
    loader_val = torch.utils.data.DataLoader(ds_processed_train_val, batch_size=batch_size,sampler=ds_processed_val_sampler,
                                                num_workers=num_workers)
    loader_test = torch.utils.data.DataLoader(ds_processed_test, batch_size=batch_size,
                                            shuffle=True, num_workers=num_workers)
    return  loader_train,loader_val,loader_test

In [15]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
def one_iteration(learning_rate, id_optimizer,indices):
    # learning_rate=1e-4
    # id_optimizer=0
    ACC=[]
    LOSS=[]
    Results=[]
    CM_val=[]
    for i in range(6):
        print(f"Fold {i}\n-------------------------------")
        loader_train,loader_val,loader_test=split_6_fold(i,ds_processed_train_val,ds_processed_test,indices)
        results,cm_val=build_train_model(loader_train,loader_val,loader_test,learning_rate,id_optimizer)
        results=np.array(results)
        Results.append(results)
        CM_val.append(cm_val)
    Results=np.array(Results) # dim0: i from 0-5; dim1: train, val, test; dim2: acc, precision, recall, f1 score, loss
    Results_ave=np.mean(Results,0) #dim0: train, val, test; dim1: acc, precision, recall, f1 score, loss
    try:
        CM_val_ave=np.mean(CM_val,0)
        CM_val_ave_percent=CM_val_ave/np.sum(CM_val_ave)*100
        print("np.sum(CM_val_ave)=",np.sum(CM_val_ave)) # close to 10000. Correct
        # ACC=Results_ave[:,0]
        # LOSS=Results_ave[:,4]
        # print("ACC:\nAve: ",np.mean(ACC,0),"std: ",np.std(ACC,0))
        # print("LOSS:\nAve: ",np.mean(LOSS,0),"std: ",np.std(LOSS,0))
        print("Results_ave: \nacc, precision, recall, f1 score, loss")
        print(Results_ave)
        print("Confusion Matrix: \n",CM_val_ave_percent)
        classes_id=['0','1','2','3','4','5','6','7','8','9']
        # print(classes)
        disp = ConfusionMatrixDisplay(confusion_matrix=np.round(CM_val_ave_percent,1),
                                display_labels=classes_id)
        disp.plot()
        plt.show()
    except:
        print("Failed to plot.")

    return Results_ave

In [16]:
a=np.array([2,1,3])
b=np.array([1,2,3])
ab=[a,b]
ab=np.array(ab)

c=np.array([3.15154,2,4])
d=np.array([1,2,3])
cd=[c,d]
cd=np.array(cd)

abcd=[ab,cd]
abcd=np.array(abcd)
abcd
np.mean(abcd,0)

np.round(abcd,2)

array([[[2.  , 1.  , 3.  ],
        [1.  , 2.  , 3.  ]],

       [[3.15, 2.  , 4.  ],
        [1.  , 2.  , 3.  ]]])

In [17]:
random_seed=22
indices = list(range(ds_processed_train_val.y.shape[0]))
np.random.seed(random_seed)
np.random.shuffle(indices)
Results_all=[]
for id_optimizer in [0,1,2]:
    for learning_rate in [1e-3,1e-4,1e-5]:
        print(f"id_optimizer: {id_optimizer}\tlearning_rate: {learning_rate} \n-----------------------------------------------------------")
        Results_ave= one_iteration(learning_rate,id_optimizer,indices)
        Results_all.append([id_optimizer, learning_rate, Results_ave])
        print('####################################################')

id_optimizer: 0	learning_rate: 0.001 
-----------------------------------------------------------
Fold 0
-------------------------------
Using cuda device
Epoch 1
loss:16.356211 [    0/60000]
Epoch 2
loss:2.525099 [    0/60000]
Epoch 3
loss:1.730583 [    0/60000]
Epoch 4
loss:1.359918 [    0/60000]
Epoch 5
loss:1.119316 [    0/60000]
Epoch 6
loss:0.905924 [    0/60000]
Epoch 7
loss:0.794874 [    0/60000]
Epoch 8
loss:0.726801 [    0/60000]
Epoch 9
loss:0.667917 [    0/60000]
Epoch 10
loss:0.644572 [    0/60000]
Epoch 11
loss:0.623728 [    0/60000]
Epoch 12
loss:0.557908 [    0/60000]
Epoch 13
loss:0.511955 [    0/60000]
Epoch 14
loss:0.489643 [    0/60000]
Epoch 15
loss:0.492573 [    0/60000]
Epoch 16
loss:0.473247 [    0/60000]
Epoch 17
loss:0.472517 [    0/60000]
Epoch 18
loss:0.453357 [    0/60000]
Epoch 19
loss:0.467518 [    0/60000]
Epoch 20
loss:0.483820 [    0/60000]
Epoch 21
loss:0.407070 [    0/60000]
Epoch 22
loss:0.410953 [    0/60000]
Epoch 23
loss:0.414772 [    0/60000]
Ep

In [None]:
Results_all

[[0,
  0.001,
  array([[18.73733333,  0.18737333,  0.18737333,  0.18737333,  2.05188782],
         [18.55333333,  0.18553333,  0.18553333,  0.18553333,  2.05249282],
         [19.21      ,  0.1921    ,  0.1921    ,  0.1921    ,  2.04551729]])],
 [0,
  0.0001,
  array([[71.86733333,  0.71867333,  0.71867333,  0.71867333,  0.76765054],
         [71.99333333,  0.71993333,  0.71993333,  0.71993333,  0.76360032],
         [72.3       ,  0.723     ,  0.723     ,  0.723     ,  0.76092842]])],
 [1,
  0.001,
  array([[33.06133333,  0.33061333,  0.33061333,  0.33061333,  2.12206863],
         [32.79      ,  0.3279    ,  0.3279    ,  0.3279    ,  2.1218404 ],
         [32.74666667,  0.32746667,  0.32746667,  0.32746667,  2.11988274]])],
 [1,
  0.0001,
  array([[13.73266667,  0.13732667,  0.13732667,  0.13732667,  6.30737782],
         [13.42666667,  0.13426667,  0.13426667,  0.13426667,  6.30826241],
         [12.89333333,  0.12893333,  0.12893333,  0.12893333,  6.29778263]])]]