# Classifier Evaluation Notebook

Steps:

1. Loads data from data/test/
2. Preprocesses test data
3. Loads trained model from models/
4. Generates labels- 0(infographics), 1(not-infographics)
5. **AIM- Over 90% accuracy**


# Requirements

In [0]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.autograd import Variable
from torch.utils.data import Dataset
import os
from natsort import natsorted
from PIL import Image
from os import walk
import csv
from shutil import copyfile

# Mount Project Directory

In [71]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Paths

In [0]:
PARENT_DIR = "/content/gdrive/My Drive/projects/infographics_dataset_collection"
DATA_DIR = PARENT_DIR + "/data/test"
MODEL_PATH = PARENT_DIR + "/models/infographics_classifier.pth"
BATCH_SIZE = 16
PREDICTIONS_PATH = PARENT_DIR + "/output/predictions.csv"
INFO_DIR = PARENT_DIR + "/output/info"

# Load Model

In [73]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=torch.load(MODEL_PATH)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# Get test images filenames

In [74]:
image_filenames = []
for (dirpath, dirnames, filenames) in walk(DATA_DIR):
    image_filenames.extend(filenames)
    break

print(image_names)
print(len(image_names))

['Pali_Matli.jpg', 'RyanMcCready1.jpg', 'padmakumarav077.jpg', 'PJOnline_News.jpg', 'Rowan4Bernie.jpg', 'TusanESC.jpg', 'ThatNigerian_.jpg', 'PaideiaNews.jpg', 'proshare.png', 'wbutian.jpg', 'tony_rao.jpg', 'vixvix.jpg', 'YoloCountyCA.jpg', 'Otega62605728.jpg', 'thecableindex.jpg', 'proshare.jpg', 'turatlasru.jpg', 'TuratlasJP.jpg', 'nembotavy_0101.jpg', 'sfos_scsh.jpg', 'sanicadurima.jpg', 'sanatana_dharmi.jpg', 'vineetpandey785.jpg', 'UmbrineFatima.jpg', 'saleh202036.jpg', 'videodesignq21.jpg', 'stedas.jpg', 'warriorclassthe.jpg', 'VlackSheepGD.jpg', 'SeadogDriftwood.jpg', 'ShelbyKuttyMD.jpg', 'OzSeekerView.jpg', 'QaunainM.jpg', 'ravi_idc.jpg', 'punthep1987.jpg', '__why_so_blue.jpg', 'SalfordCouncil.jpg', 'RALee85.jpg', 'trevbobbean.jpg', 'NdivhoSinyegwe.jpg', 'opjha.jpg', 'shamologyy.jpg', 'SuklaPaul12.jpg', 'seer116dAult.jpg', 'timothuney.jpg', 'myvaluepicks.jpg', 'qatarmarketers.png', 'silvestromedia.png', 'SANDROANDRINE.jpg', '_SametKaya.jpg', 'SinanOku5.jpg', 'taggy_.jpg', 'VUMC

# Preprocess

In [0]:
test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])

def get_images():
    # data = CustomDataSet(DATA_DIR, transform=test_transforms)
    # indices = list(range(len(data)))
    # np.random.shuffle(indices)
    # idx = indices[:num]
    # loader = torch.utils.data.DataLoader(data, batch_size=num)
    # dataiter = iter(loader)
    # images = dataiter.next()
    # return images
    images = []
    for image_filename in image_filenames:
      image_path = os.path.join(DATA_DIR, image_filename)
      image = Image.open(image_path).convert("RGB")
      tensor_image = test_transforms(image)
      images.append(tensor_image)
    
    return images

# Predict

In [0]:
def predict_image(image):
    image_tensor = test_transforms(image).float()
    image_tensor = image_tensor.unsqueeze_(0)
    input = Variable(image_tensor)
    input = input.to(device)
    output = model(input)
    index = output.data.cpu().numpy().argmax()
    return index

In [77]:
to_pil = transforms.ToPILImage()
images = get_images()
classes = ['info', 'notinfo']
print("Total images to predict - ", len(images))
fig=plt.figure(figsize=(10,10))

assert len(image_filenames) == len(images)

predictions = []

for ii, image_filename in enumerate(image_filenames):
    image = to_pil(images[ii])
    index = predict_image(image)
    predictions.append([image_filename, classes[index]])

print(predictions)
    

  "Palette images with Transparency expressed in bytes should be "


Total images to predict -  505
[['Pali_Matli.jpg', 'info'], ['RyanMcCready1.jpg', 'notinfo'], ['padmakumarav077.jpg', 'info'], ['PJOnline_News.jpg', 'notinfo'], ['Rowan4Bernie.jpg', 'notinfo'], ['TusanESC.jpg', 'notinfo'], ['ThatNigerian_.jpg', 'info'], ['PaideiaNews.jpg', 'notinfo'], ['proshare.png', 'info'], ['wbutian.jpg', 'notinfo'], ['tony_rao.jpg', 'info'], ['vixvix.jpg', 'notinfo'], ['YoloCountyCA.jpg', 'info'], ['Otega62605728.jpg', 'info'], ['thecableindex.jpg', 'info'], ['proshare.jpg', 'info'], ['turatlasru.jpg', 'notinfo'], ['TuratlasJP.jpg', 'notinfo'], ['nembotavy_0101.jpg', 'notinfo'], ['sfos_scsh.jpg', 'notinfo'], ['sanicadurima.jpg', 'info'], ['sanatana_dharmi.jpg', 'info'], ['vineetpandey785.jpg', 'info'], ['UmbrineFatima.jpg', 'notinfo'], ['saleh202036.jpg', 'info'], ['videodesignq21.jpg', 'notinfo'], ['stedas.jpg', 'notinfo'], ['warriorclassthe.jpg', 'notinfo'], ['VlackSheepGD.jpg', 'info'], ['SeadogDriftwood.jpg', 'info'], ['ShelbyKuttyMD.jpg', 'notinfo'], ['OzSeek

<Figure size 720x720 with 0 Axes>

In [0]:
# use image_names, get each image by filename and call predict_image on each, save label in list, save list in CSV

# Result

1. Create predictions.csv in PREDICTIONS_PATH
2. Move images labelled "info" to INFO_DIR folder

In [0]:
with open(PREDICTIONS_PATH, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(predictions)

In [84]:
for prediction in predictions:
  filename = prediction[0]
  label = prediction[1]
  if label is 'info':
    src = os.path.join(DATA_DIR, filename)
    dest = os.path.join(INFO_DIR, filename)
    copyfile(src, dest)
    print("File moved from {} to {}".format(src, dest))

File moved from /content/gdrive/My Drive/projects/infographics_dataset_collection/data/test/Pali_Matli.jpg to /content/gdrive/My Drive/projects/infographics_dataset_collection/output/info/Pali_Matli.jpg
File moved from /content/gdrive/My Drive/projects/infographics_dataset_collection/data/test/padmakumarav077.jpg to /content/gdrive/My Drive/projects/infographics_dataset_collection/output/info/padmakumarav077.jpg
File moved from /content/gdrive/My Drive/projects/infographics_dataset_collection/data/test/ThatNigerian_.jpg to /content/gdrive/My Drive/projects/infographics_dataset_collection/output/info/ThatNigerian_.jpg
File moved from /content/gdrive/My Drive/projects/infographics_dataset_collection/data/test/proshare.png to /content/gdrive/My Drive/projects/infographics_dataset_collection/output/info/proshare.png
File moved from /content/gdrive/My Drive/projects/infographics_dataset_collection/data/test/tony_rao.jpg to /content/gdrive/My Drive/projects/infographics_dataset_collection/ou