In [4]:
import torch
import numpy as np

from torch import optim, nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as td
import torchvision.transforms as transforms
import torchvision.datasets as datasets


def custom_loader(batch_size, shuffle_test=False, data_path='./dataset/preprocessed'):
    # Add the necessary transforms
    # normalize = transforms.Normalize(mean=[0.024], std=[0.994])
    transform = transforms.Compose([
        transforms.Resize((48, 48)),  # Adjust this if your images are a different size
        # transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.ToTensor(),
        # normalize
    ])

    # Load your custom dataset
    train_dataset = datasets.ImageFolder(root=data_path + '/train', transform=transform)
    test_dataset = datasets.ImageFolder(root=data_path + '/validation', transform=transform)
    
    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle_test, pin_memory=True)

    return train_loader, test_loader



class MultiLayerFCNet(nn.Module):
    def __init__(self,input_size, hidden_size, output_size):
        super().__init__()

        self.layer1=nn.Conv2d(3,32,4,padding=1,stride=1)
        self.B1 = nn.BatchNorm2d(32)
        self.layer2 = nn.Conv2d(32, 32, 4, padding=1, stride=1)
        self.B2 = nn.BatchNorm2d(32)
        self.Maxpool=nn.MaxPool2d(2)
        self.layer3 = nn.Conv2d(32, 64, 4, padding=1, stride=1)
        self.B3 = nn.BatchNorm2d(64)
        self.layer4 = nn.Conv2d(64, 64, 4, padding=1, stride=1)
        self.B4 = nn.BatchNorm2d(64)
        self.dropout = nn.Dropout(0.5)
        
        # New layers
        self.layer5 = nn.Conv2d(64, 128, 4, padding=1, stride=1)
        self.B5 = nn.BatchNorm2d(128)
        self.layer6 = nn.Conv2d(128, 128, 4, padding=1, stride=1)
        self.B6 = nn.BatchNorm2d(128)
        self.layer7 = nn.Conv2d(128, 256, 4, padding=1, stride=1)
        self.B7 = nn.BatchNorm2d(256)
        self.layer8 = nn.Conv2d(256, 256, 4, padding=1, stride=1)
        self.B8 = nn.BatchNorm2d(256)
        
        # Calculate the size for the fully connected layer after additional max-pooling layers
        # Assuming two max-pooling operations in the existing layers
        self.fc_size = 256   # Now this is 256 * 3 * 3
        self.fc = nn.Linear(self.fc_size, output_size)

    def forward(self, x):
        # Pass through existing layers
        x = F.leaky_relu(self.B1(self.layer1(x)))
        x = self.Maxpool(F.leaky_relu(self.B2(self.layer2(x))))
        x = F.leaky_relu(self.B3(self.layer3(x)))
        x = self.Maxpool(F.leaky_relu(self.B4(self.layer4(x))))
        
        # Pass through new layers
        x = F.leaky_relu(self.B5(self.layer5(x)))
        x = F.leaky_relu(self.B6(self.layer6(x)))
        x = self.Maxpool(F.leaky_relu(self.B7(self.layer7(x))))
        x = self.Maxpool(F.leaky_relu(self.B8(self.layer8(x))))
        
        x = x.view(x.size(0), -1)  # Flatten the tensor for the fully connected layer
        return self.fc(x)

if __name__ == '__main__':

    batch_size = 64
    test_batch_size = 64
    input_size = 3 * 48 * 48  # 1 channels, 48x48 image size
    hidden_size = 50  # Number of hidden units
    output_size = 4  # Number of output classes 4
    num_epochs = 10

    # train_loader, _ = cifar_loader(batch_size)
    # _, test_loader = cifar_loader(test_batch_size)
    train_loader, test_loader = custom_loader(batch_size, data_path='./dataset/preprocessed')
    # dataloader = DataLoader(dataset=IrisDataset('iris.data'),
    #                         batch_size=10,
    #                         shuffle=True)

    epochs = 50
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = MultiLayerFCNet(input_size, hidden_size, output_size)
    model = nn.DataParallel(model)
    model.to(device)
    #model.load_state_dict(torch.load('path'))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    BestACC=0.3
    for epoch in range(epochs):
        running_loss = 0
        for instances, labels in train_loader:
            optimizer.zero_grad()

            output = model(instances)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(running_loss / len(train_loader))

        model.eval()
        with torch.no_grad():
            allsamps=0
            rightPred=0

            for instances, labels in test_loader:

                output = model(instances)
                predictedClass=torch.max(output,1)
                allsamps+=output.size(0)
                rightPred+=(torch.max(output,1)[1]==labels).sum()


            ACC=rightPred/allsamps
            print("epoch=",epoch)
            print('Accuracy is=',ACC*100)
            #if the acc is greater than the best acc, save the model
            
            if ACC>BestACC:
                torch.save(model.state_dict(), './model/best_model.pth')
                BestACC=ACC

            #save the model architecture

            torch.save(model, './model/model.pth')
            torch.save(model.state_dict(), './model/model_state_dict.pth')
            torch.save({
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
                'loss': loss
            }, './model/all.tar')
        model.train()



1.5778354065758842
epoch= 0
Accuracy is= tensor(31.2874)
1.4209551567933998
epoch= 1
Accuracy is= tensor(34.7305)


KeyboardInterrupt: 

RuntimeError: Error(s) in loading state_dict for MultiLayerFCNet:
	Missing key(s) in state_dict: "layer1.weight", "layer1.bias", "B1.weight", "B1.bias", "B1.running_mean", "B1.running_var", "layer2.weight", "layer2.bias", "B2.weight", "B2.bias", "B2.running_mean", "B2.running_var", "layer3.weight", "layer3.bias", "B3.weight", "B3.bias", "B3.running_mean", "B3.running_var", "layer4.weight", "layer4.bias", "B4.weight", "B4.bias", "B4.running_mean", "B4.running_var", "layer5.weight", "layer5.bias", "B5.weight", "B5.bias", "B5.running_mean", "B5.running_var", "layer6.weight", "layer6.bias", "B6.weight", "B6.bias", "B6.running_mean", "B6.running_var", "layer7.weight", "layer7.bias", "B7.weight", "B7.bias", "B7.running_mean", "B7.running_var", "layer8.weight", "layer8.bias", "B8.weight", "B8.bias", "B8.running_mean", "B8.running_var", "fc.weight", "fc.bias". 
	Unexpected key(s) in state_dict: "module.layer1.weight", "module.layer1.bias", "module.B1.weight", "module.B1.bias", "module.B1.running_mean", "module.B1.running_var", "module.B1.num_batches_tracked", "module.layer2.weight", "module.layer2.bias", "module.B2.weight", "module.B2.bias", "module.B2.running_mean", "module.B2.running_var", "module.B2.num_batches_tracked", "module.layer3.weight", "module.layer3.bias", "module.B3.weight", "module.B3.bias", "module.B3.running_mean", "module.B3.running_var", "module.B3.num_batches_tracked", "module.layer4.weight", "module.layer4.bias", "module.B4.weight", "module.B4.bias", "module.B4.running_mean", "module.B4.running_var", "module.B4.num_batches_tracked", "module.layer5.weight", "module.layer5.bias", "module.B5.weight", "module.B5.bias", "module.B5.running_mean", "module.B5.running_var", "module.B5.num_batches_tracked", "module.layer6.weight", "module.layer6.bias", "module.B6.weight", "module.B6.bias", "module.B6.running_mean", "module.B6.running_var", "module.B6.num_batches_tracked", "module.layer7.weight", "module.layer7.bias", "module.B7.weight", "module.B7.bias", "module.B7.running_mean", "module.B7.running_var", "module.B7.num_batches_tracked", "module.layer8.weight", "module.layer8.bias", "module.B8.weight", "module.B8.bias", "module.B8.running_mean", "module.B8.running_var", "module.B8.num_batches_tracked", "module.fc.weight", "module.fc.bias". 