### Importing Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shutil
from __future__ import print_function, division
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
from PIL import ImageFile,Image
import  glob
ImageFile.LOAD_TRUNCATED_IMAGES = True
plt.ion()

### Getting the Data ready for training

##### Downloading Data

In [None]:
from __future__ import print_function
import cifar_utils as ut

if __name__ == "__main__":
    trn, tst= ut.loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')
    print ('Writing train text file...')
    ut.saveTxt(r'./Train_cntk_text.txt', trn)
    print ('Done.')
    print ('Writing test text file...')
    ut.saveTxt(r'./Test_cntk_text.txt', tst)
    print ('Done.')

    print ('Converting train data to png images...')
    ut.saveTrainImages(r'./Train_cntk_text.txt', 'train')
    # print ('Done.')
    print ('Converting test data to png images...')
    ut.saveTestImages(r'./Test_cntk_text.txt', 'test')
    print ('Done.')

Downloading http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Done.
Extracting files...
Done.
Preparing train set...
Done.
Preparing test set...
Done.
Writing train text file...
Done.
Writing test text file...
Done.
Converting train data to png images...
Converting test data to png images...
Done.


In [None]:
files = ['train', 'test']

for f in files:
  list = os.listdir(os.path.join(os.getcwd(), f))
  number_files = len(list)
  print("No of {} Images: {}".format(f, number_files))


No of train Images: 50000
No of test Images: 10000


##### Preprocessing

In [None]:
# Retrieve class names
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

d = unpickle(os.getcwd() + '/cifar-10-batches-py/batches.meta')
class_names = d[b'label_names']
class_names = [byte.decode('utf8') for byte in class_names]

In [None]:
# View Classes
print('No of classes: {}'.format(len(class_names)))
print(class_names)

No of classes: 10
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
# Make class directoires in train and test directories
files = ['train', 'test']
for f in files:
  for i in range(len(class_names)):
      path = os.path.join(os.getcwd(), f , class_names[i])
      
      if not os.path.exists(path):
          os.makedirs(path)

In [None]:
# Read-in mapping text file (maps images to classes)
train_map = pd.read_csv('train_map.txt', sep='\t', header=None)
test_map  = pd.read_csv('test_map.txt', sep='\t',header=None)

In [None]:
# Insepecting Files
train_map.head()

Unnamed: 0,0,1
0,/content/train/00000.png,6
1,/content/train/00001.png,9
2,/content/train/00002.png,9
3,/content/train/00003.png,4
4,/content/train/00004.png,1


In [None]:
# Move files to respective classes in train and test directories
files = ['train', 'test']
mapping_files = [train_map, test_map]

for i in range(len(files)):
  for j in range(len(mapping_files[i])):
      src = mapping_files[i].iloc[j,0]
      dst = os.path.join(os.getcwd(),files[i], class_names[mapping_files[i].iloc[j][1]])
      
      if os.path.exists(src):
          shutil.move(src, dst)

In [None]:
# Class data overview
files = ['train', 'test']
for f in files:
  for i in range(len(class_names)):
    list = os.listdir(os.path.join(os.getcwd(),f,class_names[i])) # dir is your directory path
    number_files = len(list)
    print('Images in {} set {} class are {}'.format(f, class_names[i], number_files))
  print('\n')

Images in train set airplane class are 5000
Images in train set automobile class are 5000
Images in train set bird class are 5000
Images in train set cat class are 5000
Images in train set deer class are 5000
Images in train set dog class are 5000
Images in train set frog class are 5000
Images in train set horse class are 5000
Images in train set ship class are 5000
Images in train set truck class are 5000


Images in test set airplane class are 1000
Images in test set automobile class are 1000
Images in test set bird class are 1000
Images in test set cat class are 1000
Images in test set deer class are 1000
Images in test set dog class are 1000
Images in test set frog class are 1000
Images in test set horse class are 1000
Images in test set ship class are 1000
Images in test set truck class are 1000




In [None]:
# Create Directory
path = os.path.join(os.getcwd()+'/arranged_data_final')
    
if not os.path.exists(path):
    os.makedirs(path)

# Move organized train and test files to a separate folder
files = ['train', 'test']
for f in files:
  src = os.path.join(os.getcwd(),f)
  dst = os.path.join(os.getcwd(),'arranged_data_final')
      
  if os.path.exists(src):
    shutil.move(src, dst)

In [None]:
# Data Processing and Augmentation
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

## Training

##### Loading in Dataset

In [None]:
# Load in Dataset
data_dir = os.getcwd() + '/arranged_data_final'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x]) for x in ['train', 'test']}

In [None]:
# Create dataloaders
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=150,
                                             shuffle=True, num_workers=16)
              for x in ['train', 'test']}

In [None]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes
print(len(class_names))

10


In [None]:
with open('labels.txt', 'w') as filehandle:
    for listitem in class_names:
        filehandle.write('%s\n' % listitem)

##### Loading Pretrained Model

In [None]:
# Create Model
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, len(class_names))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




##### Setting Optimizer and Loss Function

In [None]:
# model parameters
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.003,momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

##### Training (Sit back it will take some time)

In [None]:
# Training
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    train_loss, val_loss, train_acc, val_acc = [], [], [], []

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward propagation.
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if phase == 'train':
                train_loss.append(epoch_loss)
                train_acc.append(epoch_acc)
            else:
                val_loss.append(epoch_loss)
                val_acc.append(epoch_acc)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        # print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, [train_loss, train_acc, val_loss, val_acc]


model_ft, info = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=50)

Epoch 0/49
----------




train Loss: 0.6161 Acc: 0.7898
test Loss: 0.4928 Acc: 0.8254
Epoch 1/49
----------
train Loss: 0.2573 Acc: 0.9114
test Loss: 0.4660 Acc: 0.8391
Epoch 2/49
----------
train Loss: 0.1686 Acc: 0.9433
test Loss: 0.4281 Acc: 0.8588
Epoch 3/49
----------
train Loss: 0.1075 Acc: 0.9646
test Loss: 0.4446 Acc: 0.8619
Epoch 4/49
----------
train Loss: 0.0776 Acc: 0.9744
test Loss: 0.4688 Acc: 0.8590
Epoch 5/49
----------


##### Saving Model

In [None]:
torch.save(model_ft.state_dict(), ('resnet50_f.pth'))

## Testing

In [None]:
def predictions(model, image):
    model.eval()
    img = image.to(device)

    with torch.no_grad():
        outputs = model(img)
        outputs=torch.softmax(outputs, dim=1, dtype=float)
        _, preds = torch.max(outputs, 1)
    return class_names[preds]


def image_loader(loader, image):
    image = loader(image).float()
    image = torch.tensor(image, requires_grad=True)
    image = image.unsqueeze(0)
    return image

In [None]:
classes=glob.glob(os.getcwd()+ '/arranged_data_final/test/*')
all_images=[glob.glob(classes[i]+'/*') for i in range(len(classes)) ]
merged_images=np.reshape(all_images,-1)
# true_labels_array=[]
# classified_labels_array=[]
import time
start_time=time.time()
count=0
total_correct=0
for j in merged_images:
    image=Image.open(j)
    image=image_loader(data_transforms['test'], image)
    pred_name=predictions(model_ft, image)
    if "_".join(pred_name.split()) in j.split('/'):
        total_correct+=1
    count=count+1


end_time=time.time()

print('Total time=', end_time-start_time, " seconds")
print('Total images processed=', count)
print('Frames Per Seconds with Pytorch Model =', count/(end_time-start_time))
print('Test Data accuracy with Pytorch Model =', (total_correct/count)*100)