In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, Dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(777)
torch.cuda.manual_seed_all(777)
#====================================================================
# calling data
trans = transforms.Compose([transforms.Resize((227,227)),
                            transforms.RandomHorizontalFlip(p=0.5), #data augmentation
                            transforms.RandomVerticalFlip(p=0.5),   #data augmentation
                            transforms.RandomAffine(15),            #data augmentation
                            transforms.ToTensor(), 
                            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
trainset = torchvision.datasets.ImageFolder(root='/home/gus/catvsdog',
                                            transform=trans)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=64,
                                          shuffle=True,
                                          drop_last=True,
                                          num_workers=2)
testset = torchvision.datasets.ImageFolder(root='/home/gus/cattest', 
                                           transform=trans)
testloader = torch.utils.data.DataLoader(testset, 
                                         batch_size=64,
                                         shuffle=True,
                                         drop_last=True,
                                         num_workers=2)
#====================================================================
# declaring hyperparameters
lr = 1e-2
#====================================================================
# constructiong AlexNet model
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()

        self.layer1 = nn.Sequential(
            # input shape = (?, 3, 227, 227)
            # Conv -> (?, 96, 55, 55)
            # Pool -> (?, 96, 27, 27)
            torch.nn.Conv2d(3,96,11,stride=4,padding=0),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(3,2),
            torch.nn.LocalResponseNorm(96,alpha=1e-4,beta=0.75,k=2)      #LRN
        )
        torch.nn.init.normal_(self.layer1[0].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer1[0].bias,0)
        self.layer2 = nn.Sequential(
            # input shape = (?, 96, 27, 27)
            # Conv -> (?, 256, 27, 27)
            # Pool -> (?, 256, 13, 13)
            torch.nn.Conv2d(96,256,5,stride=1,padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(3,2),
            torch.nn.LocalResponseNorm(256,alpha=1e-4,beta=0.75,k=2)     #LRN
        )
        torch.nn.init.normal_(self.layer2[0].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer2[0].bias,1)
        self.layer3 = nn.Sequential(
            # input shape = (?, 256, 13, 13)
            # Conv -> (?, 384, 13, 13)
            torch.nn.Conv2d(256,384,3,stride=1,padding=1),
            torch.nn.ReLU())
        torch.nn.init.normal_(self.layer3[0].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer3[0].bias,0)
        self.layer4 = nn.Sequential(
            # input shape = (?, 384, 13, 13)
            # Conv -> (?, 384, 13, 13)
            torch.nn.Conv2d(384,384,3,stride=1,padding=1),
            torch.nn.ReLU()
        )
        torch.nn.init.normal_(self.layer4[0].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer4[0].bias,1)
        self.layer5 = nn.Sequential(
            # input shape = (?, 384, 13, 13)
            # Conv -> (?, 256, 13, 13)
            # Pool -> (?, 256, 6, 6)
            torch.nn.Conv2d(384,256,3,stride=1,padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(3,2)
        )
        torch.nn.init.normal_(self.layer5[0].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer5[0].bias,1)
        self.layer6 = nn.Sequential(
            torch.nn.Dropout(p=0.5),                          #Implementing Dropout
            torch.nn.Linear(6*6*256,4096,bias=True),
            torch.nn.ReLU()
        )
        torch.nn.init.normal_(self.layer6[1].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer6[1].bias,1)
        self.layer7 = nn.Sequential(
            torch.nn.Dropout(p=0.5),                          #Implementing Dropout
            torch.nn.Linear(4096,2,bias=True)
        )                         
        torch.nn.init.normal_(self.layer7[1].weight,mean=0,std=0.01)    
        torch.nn.init.constant_(self.layer7[1].bias,1)
        
    def forward(self, x):
        out=self.layer1(x)
        out=self.layer2(out)
        out=self.layer3(out)
        out=self.layer4(out)
        out=self.layer5(out)
        out=out.view(out.size(0),-1)
        out=self.layer6(out)
        out=self.layer7(out)
        return out
    
model = AlexNet().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(),lr=lr,momentum=0.9,weight_decay=5e-4)
# Using SGD with L2 weight decay 0.0005, momentum 0.9

# total_batch = len(trainloader)
# print(total_batch)
# save_accuracy=np.array([])
# for epoch in range(10):
#     avg_cost = 0
#     for X,Y in trainloader:
#         X = X.to(device)
#         Y = Y.to(device)
        
#         optimizer.zero_grad()
#         hypothesis = model(X)
#         correct_prediction = torch.argmax(hypothesis, 1) == Y
#         accuracy = correct_prediction.float().mean()
#         save_accuracy = np.append(accuracy,axis=0)
#         cost = criterion(hypothesis,Y)
#         cost.backward()
#         optimizer.step()
#         avg_cost += cost
    
#     acc = save_accuracy.mean()
#     print('Accuracy:', acc)
#     print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
with torch.no_grad():
    a = 0
    for X,Y in testloader:
        X = X.to(device)
        Y = Y.to(device)
        prediction = model(X)
        correct_prediction = torch.argmax(prediction, 1) == Y
        accuracy = correct_prediction.float().mean()
        print('Accuracy:', accuracy.item())
        a += 1
        if a == 10:
            break