<a href="https://colab.research.google.com/github/stepanbabayan/DFBS-Object-Classification/blob/colab/train_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Clone Repository

In [None]:
!git clone https://github.com/stepanbabayan/DFBS-Object-Classification.git

## Switch to colab branch 

In [None]:
mv ./DFBS-Object-Classification/ ./Model/

In [None]:
cd Model

In [None]:
!git checkout colab

In [None]:
!ls

## Additional Environment Setups

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
import zipfile
with zipfile.ZipFile('./data.zip', 'r') as zip_ref:
    zip_ref.extractall('')

In [None]:
import sys
sys.path.append('Model/')

## Imports

In [None]:
import os
import argparse
import yaml

import load_data, models, train
from Model.test import evaluate

import torch
from torchsummary import summary

## Environment variables

In [None]:
# yaml_data = './configs/config.yaml'

# with open(yaml_data) as file:
#     config = yaml.safe_load(file)

use_gpu = True

In [None]:
# Training Device
if use_gpu:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('Device: GPU')
else:
    device = torch.device('cpu')
    print('Device: CPU')

## Data path

In [None]:
# Datasets
data_root = f'./data'

train_dir = os.path.join(data_root, 'train')
val_dir = None
test_dir = os.path.join(data_root, 'test')

In [None]:
print('Num classes:', len(os.listdir(train_dir)))

## Project Parameters

In [None]:
root_dir = os.path.abspath('./')

In [None]:
# Train Id is the unique name of the current training process.
# It should be a descriptive name for the current training setup.
train_id = 'Default_10_Notebook_Train_1'

In [None]:
# The model's final checkpoint will be saved in {model_dir}
model_dir = f'{root_dir}/model/{train_id}'
# All other checkpoints are saved in {checkpoints_dir}
checkpoints_dir = f'{root_dir}/Checkpoint/{train_id}'

save_model_path = f"{model_dir}/final.pth"

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
if not os.path.exists(checkpoints_dir):
    os.makedirs(checkpoints_dir)

## Training Parameters

In [None]:
# Network parameters
num_epochs = 301
num_classes = len(os.listdir(train_dir))
input_shape = (160, 50)

In [None]:
# Batch sizes
train_batch_size = 64
val_batch_size = 1
test_batch_size = 32

#### Start the training from scratch

In [None]:
start_epoch = 0
load_model_path = None

#### In case if you want to continue your training from a certain checkpoint

In [None]:
# start_epoch = 6
# load_model_path = f'{checkpoints_dir}/5.pth'

## Data Loaders

In [None]:
train_data, train_classes, train_proportions = load_data.load_images(train_dir, train_batch_size, 'train')
val_data, val_classes, _ = load_data.load_images(val_dir, val_batch_size, 'val') if val_dir else (None, None, None)
test_data, test_classes, _ = load_data.load_images(test_dir, test_batch_size, 'test') if test_dir else (None, None, None)

## Training Setup

In [None]:
net = models.Model(num_classes=num_classes, input_shape=input_shape).to(device)

### Layers

In [None]:
print(net)

### Output Summary

In [None]:
summary(net, (1, 160, 50))

In [None]:
if load_model_path:
    net.load_state_dict(torch.load(load_model_path))

In [None]:
# If it throws 403 or other error, try switching your browser or turning off incognito mode.
%tensorboard --logdir runs

In [None]:

print('\nTraining started:')

net = train.train_model(
    net,
    train=train_data,
    val=val_data,
    test=test_data,
    epochs=num_epochs,
    start_epoch=start_epoch,
    device=device,
    model_folder=checkpoints_dir,
    train_id=train_id,
    classes=test_classes,
    train_proportions=train_proportions,
    evaluate=evaluate
)

In [None]:
torch.save(net.state_dict(), save_model_path)

In [None]:
%tensorboard --logdir runs