<a href="https://colab.research.google.com/github/piyengar/vehicle-predictor/blob/master/Vehicle_type_trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Colab specific
- Install additional libraries
- Setup integration with google drive
    - Needs these paths: mount point, Dataset storage path, checkpoint storage path
- Setup content folder as git repo and pull codebase from github
    - This can be done by installing github cli and triggering authentication on browser(TBD)
    - for now we can manually set it up


In [2]:
%%capture
! pip install pytorch-lightning
! pip install pytorch-lightning-bolts
! pip install ipywidgets
! pip install torchmetrics
! pip install efficientnet_pytorch


In [1]:
import os
# Path constants
STORAGE_ROOT='/content/drive'
CARZAM_ROOT= os.path.join(STORAGE_ROOT, 'MyDrive/Gatech/CARZAM')
CHECKPOINT_ROOT=os.path.join(CARZAM_ROOT, 'checkpoints')
PREDICTION_ROOT=os.path.join(CARZAM_ROOT, 'predictions')
DATASET_ROOT=os.path.join(CARZAM_ROOT, 'Datasets')
COMPCAR_ROOT=os.path.join(CARZAM_ROOT, 'CompCars')
from google.colab import drive
drive.mount(STORAGE_ROOT)
!mkdir -p "{CHECKPOINT_ROOT}"
!mkdir -p "{PREDICTION_ROOT}"
!ln -s "{CHECKPOINT_ROOT}" "checkpoints"
!ln -s "{PREDICTION_ROOT}" "predictions"
!ln -s "{CARZAM_ROOT}" "carzam"


Mounted at /content/drive


In [3]:

GITHUB_PAT_PATH=os.path.join(STORAGE_ROOT, 'MyDrive/Gatech/github_pat_colab.txt')

with open(GITHUB_PAT_PATH) as reader:
    GITHUB_PAT = reader.readline()


### Setup up codebase from github -- (TBD)

In [4]:
!git init
!git remote add origin "https://{GITHUB_PAT}@github.com/piyengar/vehicle-predictor.git" 
!git pull origin master
!git branch --set-upstream-to=origin/master master

Initialized empty Git repository in /content/.git/
remote: Enumerating objects: 116, done.[K
remote: Counting objects: 100% (116/116), done.[K
remote: Compressing objects: 100% (79/79), done.[K
remote: Total 116 (delta 64), reused 67 (delta 34), pack-reused 0[K
Receiving objects: 100% (116/116), 5.49 MiB | 6.33 MiB/s, done.
Resolving deltas: 100% (64/64), done.
From https://github.com/piyengar/vehicle-predictor
 * branch            master     -> FETCH_HEAD
 * [new branch]      master     -> origin/master
Branch 'master' set up to track remote branch 'master' from 'origin'.


### Mount onedrive
Need to configure a remote called 'od' with 'rclone config' before the drive can be mounted.



In [None]:
!curl https://rclone.org/install.sh | sudo bash
!rclone config

In [None]:
!mkdir -p od
!rclone --vfs-cache-mode writes mount --daemon od: od/

## Download dataset

In [9]:
import os
import subprocess
from enum import Enum
from zipfile import ZipFile
class DatasetType(Enum):
    VRIC= "VRIC"
    CARS196 = "Cars196"
    VEHICLE_ID ="VehicleID"
    BOXCARS116K = "BoxCars116k"
    COMP_CARS= "CompCars"
    VERI = "Veri"
    COMBINED = "Combined"

def setup_dataset(dataset: DatasetType, carzam_root:str= 'carzam', dest_dir:str = 'dataset'):
    if dataset == DatasetType.COMP_CARS:
        os.makedirs(os.path.join(dest_dir, 'CompCars'), exist_ok=True)
        archive_path = os.path.join(carzam_root, 'CompCars', 'sv_data.zip')
        temp_file = os.path.join(dest_dir, 'cc_combined.zip')
        subprocess.call(['zip', '-F', archive_path,  '-b', dest_dir, '--out', temp_file])
        subprocess.run(['unzip', '-P', 'd89551fd190e38', '-d', os.path.join(dest_dir, 'CompCars'), temp_file])
        os.remove(temp_file)
    elif dataset == DatasetType.VEHICLE_ID:
        archive_path = os.path.join(carzam_root, 'Datasets', 'VehicleID_V1.0.zip')
        subprocess.run(['unzip', '-P', 'CVPR16_IDM@PKU', '-d', dest_dir, archive_path])
        os.rename(os.path.join(dest_dir, "VehicleID_V1.0"), os.path.join(dest_dir, "VehicleID"))
    elif dataset == DatasetType.VERI:
        archive_path = os.path.join(carzam_root, 'Datasets', 'VeRi_with_plate.zip')
        subprocess.run(['unzip', '-d', dest_dir, archive_path])
    elif dataset == DatasetType.CARS196:
        archive_path = os.path.join(carzam_root, 'Datasets', 'Cars196.zip')
        subprocess.run(['unzip', '-d', os.path.join(dest_dir, 'Cars196'), archive_path])
    elif dataset == DatasetType.BOXCARS116K:
        archive_path = os.path.join(carzam_root, 'Datasets', 'BoxCars116k.zip')
        subprocess.run(['unzip', '-d', dest_dir, archive_path])
    elif dataset == DatasetType.VRIC:
        archive_path = os.path.join(carzam_root, 'Datasets', 'VRIC.zip')
        subprocess.run(['unzip', '-d', os.path.join(dest_dir, 'VRIC'), archive_path])

## Setup datasets

### VehicleID Dataset setup

In [None]:
setup_dataset(DatasetType.VEHICLE_ID)

### CompCars Dataset setup

In [14]:
setup_dataset(DatasetType.COMP_CARS)

### VeRI dataset Setup

In [12]:
setup_dataset(DatasetType.VERI)

### Cars196 dataset Setup

In [10]:
setup_dataset(DatasetType.CARS196)

### BoxCars116k dataset Setup

In [7]:
setup_dataset(DatasetType.BOXCARS116K)

### VRIC dataset Setup

In [11]:
setup_dataset(DatasetType.VRIC)

## Predict vehicle colors using SVM

In [None]:
%load_ext autoreload
%autoreload 2

# Imports
import numpy as np
from datetime import datetime

from tqdm.notebook import tqdm
from time import sleep
from sklearn.linear_model import SGDClassifier
from color.dataset import VeriDataset
from color import ColorDataModule, valid_archs, ColorPredictionWriter
from color.utils import evaluate_predictions, get_conf_data
from joblib import dump, load

## Train SVM

### Training params

In [None]:
# one of : VRIC, Cars196, VehicleID, BoxCars116k, CompCars, Veri, Combined
train_dataset_name = 'Combined'

# Learning rate/eta0
lr=0.02
batch_size=128
max_epochs=10
model_arch="SVM"

# development
is_dev_run=False

In [None]:
allowed_color_list = [
                    'black',
                    'white',
                    'red',
                    'yellow',
                    'blue',
                    'gray'
]
model = SGDClassifier(learning_rate='constant', eta0=lr)
# init datamodule
dm = ColorDataModule(
    dataset_name=train_dataset_name, 
    data_dir="dataset", 
    batch_size=batch_size,
    allowed_color_list=allowed_color_list
)
dm.setup('fit')
train_dataloader = dm.train_dataloader()
val_dataloader = dm.val_dataloader()
classes = [i for i in range(len(allowed_color_list))]
now = datetime.now() 
model_checkpoint_path = os.path.join('checkpoints/color', model_arch)
!mkdir -p "{model_checkpoint_path}"
model_checkpoint_file = os.path.join(model_checkpoint_path, f'{train_dataset_name}_{now.strftime("%Y%m%d%H%M%S")}.pkl')
for epoch in range(max_epochs):
    # Train
    stats = {'val_acc':0.0, 'train_acc':0.0}
    batches = tqdm(enumerate(train_dataloader), total=len(train_dataloader), postfix=stats)
    batches.set_description(f'Epoch:{epoch} - Training')
    accs = []
    for mini_batch_idx, (x,_, y) in batches:
        n = len(x)
        x = x.view(n, -1).numpy()
        y = y.numpy()
        model.partial_fit(x, y, classes)
        accs.append(model.score(x, y))
        stats['train_acc'] = np.mean(accs)
        batches.set_postfix(stats)
        if is_dev_run: break
        
    # Validate
    batches = tqdm(enumerate(val_dataloader), total=len(val_dataloader), postfix=stats)
    batches.set_description(f'Epoch:{epoch} - Validating')
    accs = []
    for mini_batch_idx, (x,_, y) in batches:
        n = len(x)
        x = x.view(n, -1).numpy()
        y = y.numpy()
        accs.append(model.score(x, y))
        stats['val_acc'] = np.mean(accs)
        batches.set_postfix(stats)
        if is_dev_run: break
    dump(model, model_checkpoint_file)
    if is_dev_run: break


HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=293.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=293.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=293.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=293.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=293.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=293.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=682.0), HTML(value='')))

KeyboardInterrupt: ignored

### Predict using model

In [None]:
allowed_color_list = [
                    'black',
                    'white',
                    'red',
                    'yellow',
                    'blue',
                    'gray'
]
dataset_names = ['VehicleID', 'CompCars', 'Veri', ]

for idx, train_dataset_name in enumerate([
                        #    'VehicleID', 
                        #    'CompCars', 
                        #    'Veri', 
                           'Combined',
                           ]):
    model_checkpoint_path = os.path.join('checkpoints/color', f'best_{train_dataset_name}_{model_arch}.pkl')
    for dataset_name in dataset_names:
        # TODO need to export to method
        model = load(model_checkpoint_path)

        # init datamodule
        dm = ColorDataModule(
            dataset_name=dataset_name, 
            data_dir="dataset", 
            batch_size=batch_size,
            allowed_color_list=allowed_color_list
        )
        dm.setup('test')
        test_dataloader = dm.test_dataloader()
        # Predict
        batches = tqdm(enumerate(test_dataloader), total=len(test_dataloader), postfix=stats)
        batches.set_description(f'Predicting')
        prediction_root, predict_model_name, best_model_path, prediction_out_file = get_conf_data(train_dataset_name, dataset_name, model_arch)
        prediction_path = os.path.join(prediction_root, prediction_out_file)
        preds = []
        with open(prediction_path, mode="w") as writer:
            writer.truncate()
        for mini_batch_idx, (x, *_) in batches:
            n = len(x)
            x = x.view(n, -1).numpy()
            prediction = model.predict(x)
            with open(prediction_path, mode="a") as writer:
                np.savetxt(writer, prediction, fmt="%d")

HBox(children=(FloatProgress(value=0.0, max=927.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=927.0), HTML(value='')))