In [None]:
import torch 
torch.cuda.get_device_name(device=None)

---
---
# BOSCH AI HACKATHON 2021
---
## Team - KanthabAI

Members:
  - Sanjana
  - Lakshya
  - Abeesh
  - Sachin
  - Shubham
  
~ Code, data and scripts maintained by Lakshya Dev at https://github.com/lakshyads/bosch-ai-hackathon-2021-kanthabai

  
---
**Content:**
1. Prep the environment and data
2. Training the Yolov5 model
3. Running inferences on training data
4. Test with test data
5. ***Detecting new Images***
6. Retraining from last checkpoint
---
---

## Initial Config Setup - Give inputs for configs. This also enables the use of runall cells to automate processes

In [None]:
# Run this cell to configure setup

available_models = {
    1: "yolov5s",
    2: "yolov5m",
    3: "yolov5l",
    4: "yolov5x",
    5: "yolov5s6",
    6: "yolov5m6",
    7: "yolov5l6",
    8: "yolov5x6"
}

selected_model_input = epoc_input = batch_input = image_size_input = image_size_input = use_pretrained = test_weights_input =  detect_images_path_input = ''
selected_model = selected_weights = selected_epoc = selected_batch_size = selected_test_weights = detect_images_path = ''
runTrain = runTest = runDetect = False

# common configs for train, test or detect
image_size_input = int(input('EnterImage size : '))
use_pretrained = input('Use pretrained initial weights for selected model? Enter "Y" for yes, "N" for no.')
test_weights_input = input('Enter testing/detection weights. Type last to use most recent available training weights or enter path to weights : ')
selected_weights = ''
if (use_pretrained == "Y" or use_pretrained == "y"):
  print(f"\navailable_models = {available_models}")  
  selected_model_input = int(input('Enter model number from available_models : '))
  selected_weights = f'{available_models[selected_model_input]}.pt'
else:
  selected_weights = input('Enter initial weights path to start training from or leave empty to train from scratch.')
selected_image_size = image_size_input
selected_test_weights = test_weights_input

# select what functions to perform
trainInput = input('\nShould training be run? Enter "Y" for yes, "N" for no.')
testInput = input('\nShould testing be run? Enter "Y" for yes, "N" for no.')
detectInput = input('\nShould detection be run? Enter "Y" for yes, "N" for no.')

# specific configs for train
if (trainInput == "Y" or trainInput == "y"):
  runTrain = True
  if not (use_pretrained == "Y" or use_pretrained == "y"):
    print(f"\navailable_models = {available_models}")  
    selected_model_input = int(input('Enter model number from available_models : '))
  selected_model = f'{available_models[selected_model_input]}.yaml'  
  epoc_input = int(input('\nEnter number of epocs : '))
  selected_epoc = epoc_input
  batch_input = int(input('Enter batch size : '))
  selected_batch_size = batch_input
  
# specific configs for test
if (testInput == "Y" or testInput == "y"):
  runTest = True

# specific configs for detect
if (detectInput == "Y" or detectInput == "y"):
  runDetect = True
  detect_images_path_input = input('Enter path for detection Images')
  detect_images_path = detect_images_path_input

git_token = input('Enter github access token : ')
# select git branch to use. Repo link : https://github.com/lakshyads/bosch-ai-hackathon-2021-kanthabai
selected_git_branch = input('Enter git branch to use : ')

print('\nYour configuration:\n---- -------------\n')

print(f'runTrain = {runTrain}')
print(f'runTest = {runTest}')
print(f'runDetect = {runDetect}\n')

print(f'selected_model = {selected_model}')
print(f'selected_weights = {selected_weights}')
print(f'selected_epoc = {selected_epoc}')
print(f'selected_batch_size = {selected_batch_size}')
print(f'selected_image_size = {selected_image_size}')
print(f'selected_test_weights = {selected_test_weights}')
print(f'detect_images_path = {detect_images_path}')

print(f'\nselected_git_branch = {selected_git_branch}\n')

# 1. PREP THE ENVIRONMENT AND DATA
---

### Setup git LFS for downloading dataset with repo

In [None]:
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
!sudo apt-get install git-lfs

### Setup git identity

In [None]:
!git config --global user.email "lakshyadev@live.com"
!git config --global user.name "Lakshya from google colab"

### Cloning the model and script files from Github repo

In [None]:
!git clone https://github.com/lakshyads/bosch-ai-hackathon-2021-kanthabai.git yolov5
!git pull
%cd /content/yolov5/
!git checkout $selected_git_branch


### Ready the dataset included with above repo (extract & split)



In [None]:
%cd /content/yolov5/

# Unzip dataset
!unzip -q ./data/final-data-full.zip -d /content/yolov5/data/dataset/

# =================================================

# Set to True to create test set
make_test_set = False

# =================================================

if(make_test_set is False):
  #split dataset without test set
  !python /content/yolov5/data-utils/split_train_val_test.py --images /content/yolov5/data/dataset --labels /content/yolov5/data/dataset --out /content/yolov5/data/dataset --move y --test n
else:
  #split dataset with test set
  !python /content/yolov5/data-utils/split_train_val_test.py --images /content/yolov5/data/dataset --labels /content/yolov5/data/dataset --out /content/yolov5/data/dataset --move y --test y

### Installing the dependencies

In [None]:
!pip install -r requirements.txt

%cd /content/yolov5/

### Option to connect and use Google drive


In [None]:
# =================================================

# Set to true to connect to GDrive
useGDrive = False

# =================================================
# =================================================

if (useGDrive is True):
  %cd ..
  !pip install PyDrive

  import os
  from pydrive.auth import GoogleAuth
  from pydrive.drive import GoogleDrive
  from google.colab import auth
  from oauth2client.client import GoogleCredentials

  # Connect GDrive
  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  drive = GoogleDrive(gauth)

  # Download zipped data to Colab from Google Drive
  download = drive.CreateFile({'id': '1S23A8_uuiE2zp50qJDSp_xcC294kaFW7'})
  download.GetContentFile('train-validate-test-split-data.zip')

  %cd /content/yolov5
else:
  print("Google Drive will not be used. set useGDrive = True to use G Drive")

### Give full permissions to everthing inside /content/yolov5

In [None]:
!chmod -R 755 /content/yolov5


### Download all versions of YOLO-V5 model

In [None]:
!/content/yolov5/weights/download_weights.sh

---
# 2. TRAINING THE YOLO-V5 MODEL
---

### Run training

In [None]:
if (runTrain is True):
  !python train.py --img $selected_image_size --batch $selected_batch_size --epochs $selected_epoc --data final-data.yaml --cfg $selected_model --weights $selected_weights --nosave --cache 
else:
  print('Running training script is disabled in initial config')

### Commit runs/train folder updates to git

In [None]:
if (runTrain is True):
  from utils.plots import plot_results
  import os

  # find the latest run
  dirPath, dir_names, _ = next(os.walk('runs/train'))
  dir_names.sort()
  latest = os.path.join(dirPath, dir_names[-1]) # eg: 'runs/train/exp3'

  !git add runs
  !git commit -m "Updated $latest data after a new execution"
  !git push https://$git_token@github.com/lakshyads/bosch-ai-hackathon-2021-kanthabai.git



### OPTIONAL - Zip and upload complete runs folder to GDrive as precaution

In [None]:
# =================================================

# Backup run folder to Google drive
backup_to_drive = False # setting to True will work only if useGDrive is also True and Gdrive is connected

# =================================================
# =================================================

from datetime import datetime

# datetime object containing current date and time
now = datetime.now()
dt_string = now.strftime("%d%m%Y-T-%H%M%S")
model_name = available_models[selected_model_input]
epocs = selected_epoc
batch_size = selected_batch_size
initial_weights = selected_weights
# initial_weights = 'custom_Init_Weights'

if(runTrain is True and useGDrive is True and backup_to_drive is True):
  print('Backing up to google drive')
  import shutil
  import os

  bak_archive_name = f'run-{model_name}-{initial_weights}-epocs_{epocs}-batch_{batch_size}-{dt_string}'
  bak_archive_path = r'/content/yolov5/backups-for-gdrive/'
  bak_archive = os.path.join(bak_archive_path, bak_archive_name)
  dir_to_archive = '/content/yolov5/runs'
  try:
    print('\n Trying to upload run folder to gdrive ...')
    if not os.path.exists(bak_archive_path):
      os.makedirs(bak_archive_path)

    shutil.make_archive(base_name=bak_archive, root_dir=dir_to_archive, format= 'zip', base_dir=dir_to_archive)

    file = drive.CreateFile({'title': f'{bak_archive_name}.zip', 'parents': [{'id': '1Fo9h4-e-AvGSeYX0Q_086MnP7uquJMyN'}]})
    file.SetContentFile(f'{bak_archive}.zip')
    file.Upload()  
    print(f'\n Run data uploaded to google drive. Uploaded file name = {bak_archive}')
  except Exception as e:
    print(f'\n Failed to upload Run data to GDrive: {bak_archive} to Google Drive. \nException details: {e}')

---
# 3. RUNNING INFERENCES ON TRAINING DATA
---

### Visualizing training metrics using Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs/train

### Plotting the training results from latest run

In [None]:
from utils.plots import plot_results
import os

# find the latest run
_, dir_names, _ = next(os.walk('runs/train'))
dir_names.sort()

latest = os.path.join(dirPath, dir_names[-1]) # eg: 'runs/train/exp3'

plot_results(save_dir=f'{latest}')

---
# 4. TESTING
---

### Update --weights with required weights path

In [None]:
use_weights = ''
if (selected_test_weights == 'last'):
  from utils.plots import plot_results
  import os
  # find the latest run
  _, dir_names, _ = next(os.walk('runs/train'))
  dir_names.sort()
  latest = os.path.join(dirPath, dir_names[-1]) # eg: 'runs/train/exp3'
  use_weights = f'{latest}/weights/best.pt'
else:
  use_weights = selected_test_weights

if (runTest is True):
  # Update --weights path as required
  !python test.py --weights $use_weights --data final-data.yaml --img $selected_image_size
else:
  print('Testing script is disabled. Enable in initial config')

### Commit runs/test folder updates to git

In [None]:
if (runTest is True):
  from utils.plots import plot_results
  import os
  # find the latest run
  _, dir_names, _ = next(os.walk('runs/test'))
  dir_names.sort()
  latest = os.path.join(dirPath, dir_names[-1]) # eg: 'runs/test/exp3'

  !git add runs
  !git commit -m "Updated $latest data after a new testing execution"
  !git push https://$git_token@github.com/lakshyads/bosch-ai-hackathon-2021-kanthabai.git


---
# 5. DETECTING ON NEW IMAGES
---

### Extract new images from zipped upload


In [None]:
if (runDetect is True):
  %cd ..
  !unzip -q /content/extracted_images.zip 
  %cd yolov5

### Runs the detect script.
### Update --weights with required weights path

In [None]:
use_weights = ''
if (selected_test_weights == 'last'):
  from utils.plots import plot_results
  import os
  # find the latest run
  _, dir_names, _ = next(os.walk('runs/train'))
  dir_names.sort()
  latest = os.path.join(dirPath, dir_names[-1]) # eg: 'runs/train/exp3'
  use_weights = f'{latest}/weights/best.pt'
else:
  use_weights = selected_test_weights

if (runDetect is True):
  !python detect.py --source $detect_images_path --weights $use_weights --img $selected_image_size --save-txt --save-conf
else:
  print('Detection script is disabled. Enable in initial config')

---
# 6. RETRAINING FROM THE LAST CHECKPOINT
---

In [None]:
# !python train.py --weights /content/yolov5/runs/train/exp2/weights/last.pt --epochs 100 --img 416