In [20]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
import torchvision
import glob
from torch.utils.data import random_split, Subset

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
classes = ['f','j','k','l','m','n','o','x','y','z']

transformer = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize([0.5,0.5,0.5],
                        [0.5,0.5,0.5])
])

train_path = '/content/drive/MyDrive/data/data_letters/train'

train_data = torchvision.datasets.ImageFolder(train_path, transform=transformer)

In [26]:
train_ratio = 0.8
train_size= int(train_ratio * len(train_data))
val_size = len(train_data) - train_size
train_set, val_set = random_split(train_data, [train_size, val_size])
print(len(train_set))
print(len(val_set))

8432
2109


In [28]:
trainloader = DataLoader(
    train_set,
    batch_size = 256,
    shuffle = True
)

valloader = DataLoader(
    val_set,
    batch_size = 256,
    shuffle = True
)

In [4]:
class Task1Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Task1Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.dropout = nn.Dropout(0.5)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        self.relu3 = nn.ReLU()
        self.fc = nn.Linear(in_features=32*32*32, out_features=num_classes)
        
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        output = self.pool(output)
        output = self.dropout(output)
        output = self.conv2(output)
        output = self.relu2(output)
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        output = output.view(-1, 32*32*32)
        output = self.fc(output)
        return output

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = torchvision.models.resnet34(pretrained=True)
model.to(device)
torch.save(model.state_dict(), '/content/drive/MyDrive/model/task1.model')

cuda


In [58]:
inchannel = model.fc.in_features
model.fc = nn.Linear(inchannel, 10)

optimizer = Adam(model.parameters(), lr=0.0001)
loss_function = nn.CrossEntropyLoss()

num_epoches = 15

train_count = len(train_set)
val_count = len(val_set)

best_accuracy = 0.0

hist = []

for epoch in range(num_epoches):
    model.to(device)
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    for i, (images, labels) in enumerate(trainloader):
        if torch.cuda.is_available():
            images = images.to(device)
            labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.cpu().data * images.size(0)
        _, prediction = torch.max(outputs.data,1)
        train_accuracy += int(torch.sum(prediction == labels.data))
      
    train_accuracy = train_accuracy / train_count
    train_loss = train_loss/train_count
    print(f"train_accuracy {train_accuracy}")
    model.eval()
    val_accuracy = 0.0
    with torch.no_grad():
        for j, (images, labels) in enumerate(valloader):
            if torch.cuda.is_available():
                images = images.to(device)
                labels = labels.to(device)
            outputs = model(images)
            _,prediction = torch.max(outputs.data,1)
            val_accuracy += int(torch.sum(prediction==labels.data))

        val_accuracy = val_accuracy / val_count
        print(f"epoch {epoch}")
        print(f"train_loss {train_loss}")
        print(f"Train Accuracy {train_accuracy}")
        print(f"Val Accuracy {val_accuracy}")

        hist.append(val_accuracy)
        if val_accuracy > best_accuracy:
          print(f"Best val accuracy {val_accuracy}")
          torch.save(model.state_dict(), '/content/drive/MyDrive/model/task1.model')
          best_accuracy = val_accuracy

train_accuracy 0.8237666034155597
epoch 0
train_loss 0.5485342741012573
Train Accuracy 0.8237666034155597
Val Accuracy 0.853959222380275
Best val accuracy 0.853959222380275
train_accuracy 0.8740512333965844
epoch 1
train_loss 0.3770177364349365
Train Accuracy 0.8740512333965844
Val Accuracy 0.8515884305357989
train_accuracy 0.8948055028462998
epoch 2
train_loss 0.31392624974250793
Train Accuracy 0.8948055028462998
Val Accuracy 0.8582266477003319
Best val accuracy 0.8582266477003319
train_accuracy 0.907258064516129
epoch 3
train_loss 0.2807277739048004
Train Accuracy 0.907258064516129
Val Accuracy 0.8847795163584637
Best val accuracy 0.8847795163584637
train_accuracy 0.9239800759013282
epoch 4
train_loss 0.23213723301887512
Train Accuracy 0.9239800759013282
Val Accuracy 0.8591749644381224
train_accuracy 0.9284867172675522
epoch 5
train_loss 0.2073179930448532
Train Accuracy 0.9284867172675522
Val Accuracy 0.8857278330962541
Best val accuracy 0.8857278330962541
train_accuracy 0.934060721

In [87]:
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow

In [89]:
class Visualize(object):
    def __call__(self, img):
        cv2_imshow(img)
        return img
transformer_test = transforms.Compose([
    transforms.CenterCrop(20),
    Visualize(),
    transforms.RandomRotation(20),
    Visualize(),
    transforms.RandomHorizontalFlip(),
    Visualize(),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],
                        [0.5,0.5,0.5])
])
# bala_data = torchvision.datasets.ImageFolder(train_path, transform=transformer_test)

In [83]:
to_pil_image = transforms.ToPILImage()
x = torch.randn(3, 256, 256)
img = to_pil_image(x)
out = transformer_test(img)

In [91]:
class Visualize(object):
    def __call__(self, img):
        cv2_imshow(img)
        return img
        
spatial_transform = transforms.Compose([transforms.RandomRotation(20),
                                        Visualize(),
                                        transforms.RandomResizedCrop(224),
                                        Visualize(),
                                        transforms.RandomHorizontalFlip(),
                                        Visualize(),
                                        transforms.ColorJitter(hue=.05, saturation=.05),
                                        Visualize(),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])


to_pil_image = transforms.ToPILImage()
x = torch.randn(3, 256, 256)
img = to_pil_image(x)
out = spatial_transform(img)

AttributeError: ignored