### Introduction

This notebook will be for running the Resnet-50 baseline on the Airnet reconstructions of the training set from VizWiz.

### Mount Google Drive

In [None]:
# Show button and code
from google.colab import drive
drive.mount('/content/drive')

import os

# changes working directory to cmsc_472 shared proj file
os.chdir("/content/drive/MyDrive/CMSC_472/CMSC 472 Project/Code/")

Mounted at /content/drive


### Unzipping the dataset

The address below for unzipping the file will change based on your local Google Drive file path. Change the file addresses are appropriate.

In [None]:
!unzip -q -o dataset/airnet-reconstruction/airnet_train.zip -d dataset/airnet-reconstruction/train

In [None]:
!unzip -q -o dataset/airnet-reconstruction/airnet_val.zip -d dataset/airnet-reconstruction/val/

In [None]:
!unzip -q -o dataset/airnet-reconstruction/airnet_test.zip -d dataset/airnet-reconstruction/test

#### Install timm (local)

In [None]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m38.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.4/182.4 KB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.11.1 timm-0.6.12


#### Install timm (colab)

In [None]:
!pip3 install virtualenv
!virtualenv "/content/drive/MyDrive/CMSC_472/CMSC 472 Project/Code/virtual_env"
!chmod 755 "/content/drive/MyDrive/CMSC_472/CMSC 472 Project/Code/virtual_env/bin/activate"; pip install timm

Collecting virtualenv
  Downloading virtualenv-20.26.1-py3-none-any.whl (3.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting distlib<1,>=0.3.7 (from virtualenv)
  Downloading distlib-0.3.8-py2.py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.9/468.9 kB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: distlib, virtualenv
Successfully installed distlib-0.3.8 virtualenv-20.26.1
created virtual environment CPython3.10.12.final.0-64 in 19576ms
  creator CPython3Posix(dest=/content/drive/MyDrive/CMSC_472/CMSC 472 Project/Code/virtual_env, clear=False, no_vcs_ignore=False, global=False)
  seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/root/.local/share/virtualenv)
    added seed packages: pip==24.0, setuptools==69.5.1, wheel==0.43.0
  activators BashActivator,CShellActivator,F

### Get predictions

#### Import libraries (local)

In [None]:
import os
import argparse
import json
from datetime import datetime

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
from torchvision import transforms

import timm

#### Import Libraries (CoLab)

In [None]:
import os
import argparse
import json
from datetime import datetime

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
from torchvision import transforms

# changes working directory to cmsc_472 shared proj file
os.chdir("/content/drive/MyDrive/CMSC_472/CMSC 472 Project/Code/")

import sys
# add the path of the virtual environmentsite-packages to colab system path
sys.path.append("virtual_env/lib/python3.10/site-packages")
import timm

#### Set variables


In [None]:
ann_path = 'dataset/annotations.json'
images_path = 'dataset/airnet-reconstruction'
prediction_path = 'predictions/airnet-reconstruction'

# change model name based on what you are training
model_name = 'resnet50'
#model_name = 'vit_base_patch32_224'

batch_size = 64

#### Load annotation file

In [None]:
annotations = json.load(open(ann_path))
indices_in_1k = [d['id'] for d in annotations['categories']]

#### Set device

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

#### Create dataset class and dataloader

In [None]:
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),

    # for normalization use top for
    # ViT and bottom for ResNet

    #transforms.Normalize(mean=[0.5, 0.5, 0.5],
    #                     std=[0.5, 0.5, 0.5])
    #])
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    ])

class VizWizClassification(Dataset):
    def __init__(self, annotations, transform=None):
        self.images = []
        self.image_names = []
        for img in annotations['images']:

          name = str(img)
          name = name.replace('jpg', 'png')
          if 'train' in img:
            self.images.append(images_path + '/train/' + name)
            self.image_names.append(str(img))

          if 'val' in img:
            self.images.append(images_path + '/val/airnet_val/' + name)
            self.image_names.append(str(img))

          if 'test' in img:
            self.images.append(images_path + '/test/airnet_test/' + name)
            self.image_names.append(str(img))

        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert('RGB')
        image_name = self.image_names[idx]
        if self.transform:
            image = self.transform(image)
        return image, image_name

dataset = VizWizClassification(annotations,test_transform)
vizwiz_loader = torch.utils.data.DataLoader(dataset,batch_size=batch_size, shuffle=False)

#### Load the model

In [None]:
model = timm.create_model(model_name, pretrained=True).to(device)
model.eval()

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     

#### Get predictions

In [None]:
results = {}
with torch.no_grad():
    for images, images_path in vizwiz_loader:
        images = images.to(device)
        outputs = model(images)[:,indices_in_1k]
        pred = list(outputs.data.max(1)[1].cpu())
        for i in range(len(pred)):
                results[images_path[i]] = indices_in_1k[pred[i]]

### Save the prediction file for EvalAI server

In [None]:
file_path = os.path.join(prediction_path, datetime.now().strftime("prediction-resnet-airnet.json"))
with open(file_path, 'w') as outfile:
    json.dump(results, outfile)

Now you can upload this file on EvalAI server.