In [1]:
import argparse
import os
import random
import time
import sys
import numpy as np
import pandas as pd
import shutil

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from PIL import Image
import cv2

In [2]:
data = pd.read_csv("../delhi_data.csv")
data_train = data.sample(frac=0.75)
data_val = data.loc[~data.index.isin(data_train.index)]
print(len(data_train))
print(len(data_val))

1616
539


In [3]:
files_train = list(data_train['filename'])
files_val = list(data_val['filename'])
labels_train = list(data_train['class'])
labels_val = list(data_val['class'])
ids_train = [i for i in range(len(files_train))]
ids_val = [i for i in range(len(files_val))]

In [8]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=7):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

class Dataset(torch.utils.data.Dataset):
    def __init__(self, list_IDs, files, labels, tranform):
        self.filenames = files
        self.labels = labels
        self.list_IDs = list_IDs
        self.transform = tranform

    def __len__(self):
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        ID = self.list_IDs[index]
        #im = Image.open('../img/'+self.filenames[ID]+'.jpg')
        cv_im = cv2.imread('../img/'+self.filenames[ID]+'.jpg', 1)
        if cv_im is None:
            print(self.filenames[ID])
        pil_im = Image.fromarray(cv_im)
        X = self.transform(pil_im)
        y = self.labels[ID]
        return X, y

In [13]:
model = AlexNet()
model = torch.nn.DataParallel(model) #.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),0.01,momentum=0.9,weight_decay=0.0005)
train_dataset = Dataset(ids_train, files_train, labels_train, transforms.Compose([transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),]))
val_dataset = Dataset(ids_val, files_val, labels_val, transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),]))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

def train(train_loader,model,criterion,optimizer,epoch):
    model.train()
    for x, y in train_loader:
        yhat = model(x)
        loss = criterion(yhat,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
def val(val_loader,model,criterion):
    model.eval()
    sum_acc = 0.0
    count = 0.0
    with torch.no_grad():
        for x, y in val_loader:
            yhat = model(x)
            _, pred = torch.max(yhat.data, 1)
            sum_acc += (pred == y).sum().item()
            count += x.size(0)
    return sum_acc/count

for epoch in range(50):
    train(train_loader,model,criterion,optimizer,epoch)
    acc = val(val_loader,model,criterion)
    print('epoch: %d, val set accuracy: %.3f' % (epoch+1, acc))


epoch: %d, val set accuracy: %.3f 1 0.38404452690166974
epoch: %d, val set accuracy: %.3f 2 0.38404452690166974
epoch: %d, val set accuracy: %.3f 3 0.38404452690166974


Process Process-54:
Process Process-56:
Process Process-55:
Process Process-53:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.7

KeyboardInterrupt: 