# Create Basic Setup
### 1. Installation
* connect to own drive
* create paths used throughout the notebook
* get and install own repo
* get and install waymo repo

### 2. Data
* transfer waymo dataset from gcs to gdrive
* unpack 
* convert 

### 3. Training
* training loop
* visual assessment

Remarks:
* __linux_version paths should NOT be concatenated using e.g. os.path.join
* data transfer: runtime cpu
* waymo: tf is version 1.x
* training: runtime gpu
* tensorboard: enable 3rd party cookies in your browser

# 1. Installation

In [0]:
'''
MOUNT
'''

from google.colab import drive
drive.mount('/content/drive')

'''
Create PATHS
'''

ROOT_DIR__linux_version = '/content/drive/My\ Drive/Colab\ Notebooks/DeepCV_Packages/'
DATA_DIR__linux_version = ROOT_DIR__linux_version + 'data/'
REPO_DIR__linux_version = ROOT_DIR__linux_version + 'DeepCVLab/'
DEEPCVLAB_DIR__linux_version = REPO_DIR__linux_version + 'deepcvlab/'

ARCHIVE_DEST_DIR__linux_version = '/content/drive/My\ Drive/Colab\ Notebooks/'                  # this should be a repo containing very few files

In [None]:
'''
GET OWN REPO
'''

%cd {ROOT_DIR__linux_version}
!rm -rf {REPO_DIR__linux_version}
!git clone https://github.com/pmcgrath249/DeepCVLab.git

In [0]:
'''
INSTALL EVERYTHING
'''

# permanently change dir 
%cd {DEEPCVLAB_DIR__linux_version}

# install waymo dataset utils in utils; https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb
!cd utils && rm -rf waymo-od > /dev/null
!cd utils && git clone https://github.com/waymo-research/waymo-open-dataset.git waymo-od
!cd utils/waymo-od && git branch -a
!cd utils/waymo-od && git checkout remotes/origin/r1.0
!pip3 install --upgrade pip
!pip3 install waymo-open-dataset

# install requirements
!cd {REPO_DIR__linux_version} && pip3 install -r requirements.txt

# install own package
!cd {REPO_DIR__linux_version} && python3 -m pip install .

# 2. DATA

Data source: https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_0_0

### Transfer

Help 1: https://medium.com/@philipplies/transferring-data-from-google-drive-to-google-cloud-storage-using-google-colab-96e088a8c041

I had to change Help 1 because I was not able to find  the project_id nessecary for this approach

Help 2: https://cloud.google.com/storage/docs/access-public-data?hl=de 

REMARK: No costs arise as the bucket is managed by waymo

### Note

* Use a CPU runtime for this section. It gives access to more disk storage w.r.t. the compute instance. Unpacking clutters the disk.
* __linux_version paths cannot be concatenated using os.join because of spaces and escaping characters within the paths
* Due to COLABxDRIVE issues, it is important to copy datasets to directories with little content.
Otherwise it is not possible to extract files from the archives reliably.
Moreover, I have had issues with moving archives -> iterative procedure
https://research.google.com/colaboratory/faq.html#drive-timeout 

In [None]:
'''
AUTHENTICATE GCS
'''

from google.colab import auth
auth.authenticate_user()

In [None]:
'''
LIST DIR TO BE COPIED
'''

bucket_name = 'waymo_open_dataset_v_1_0_0'
!gsutil ls -r gs://{bucket_name}/

In [None]:
'''
COPY GCS TO DRIVE AND 
UNPACK
'''

import os

# naming
bucket_name = 'waymo_open_dataset_v_1_0_0'
training_bucket = os.path.join(bucket_name, 'training')

for i in range(32):                                                                             # from ls above
    dataset_name = 'training_000{}.tar'.format(i) if i < 10 else 'training_00{}.tar'.format(i)  # right amount of leading zeros
    data_bucket = os.path.join(training_bucket, dataset_name)                 

    print('start copying: ' + dataset_name)
    !gsutil -m cp -r gs://{data_bucket}/ {ARCHIVE_DEST_DIR__linux_version}                          # copy multi-threaded and recursively

    print('start unpacking: ' + dataset_name)
    archive_full_path = ARCHIVE_DEST_DIR__linux_version + dataset_name
    unpack_dest = DATA_DIR__linux_version + dataset_name[:-4] + '/'
    !mkdir -p {unpack_dest}
    !tar -xvf {archive_full_path} -C {unpack_dest}

    print('deleting archive: ' + dataset_name)
    !rm {archive_full_path}

In [None]:
'''
OFFLINE DATA CONVERSION
'''

%cd {REPO_DIR__linux_version}
from deepcvlab.utils import Dense_U_Net_lidar_helper as utils
utils.waymo_to_pytorch_offline()
utils.distribute_data_into_train_val_test([0.6,0.2,0.2])

# 3. Training

In [None]:
'''
TRAINING WITH TENSORBOARD VISUALIZATION
'''

# import
%cd {REPO_DIR__linux_version}
from deepcvlab.utils.Dense_U_Net_lidar_helper import get_config
from deepcvlab.agents.Dense_U_Net_lidar_Agent import Dense_U_Net_lidar_Agent as Dense_U_Agent
import os
from pathlib import Path

config = get_config()
config.agent.max_epoch = 100

# if first time training create summary dir before running for tensorboard to work
Path(config.dir.summary).mkdir(exist_ok=True)

# use tensorboard to visualize
%load_ext tensorboard
%tensorboard --logdir {os.path.join(*config.dir.summary.split('/')[-2:])}

# start training
agent = Dense_U_Agent(config=config, torchvision_init=True)
agent.run()
agent.finalize()

In [None]:
'''
VISUALLY ASSESS DATA AFTER FORWARD PASS
'''

import numpy as np
import matplotlib.pyplot as plt
from deepcvlab.utils.Dense_U_Net_lidar_helper import get_config
from deepcvlab.agents.Dense_U_Net_lidar_Agent import Dense_U_Net_lidar_Agent as Dense_U_Agent

def visual_assessment(img, pred, gt):
  
    num_plots = gt.shape[0]
    fig=plt.figure(figsize=(3*7,num_plots*7))
    for i in range(num_plots):
    
        im = img[i].permute(1, 2, 0).detach().numpy().astype(np.uint8)
        fig.add_subplot(num_plots, 3, i*3+1)   
        plt.imshow(im)

        p = pred[i].permute(1, 2, 0)[:,:,0].detach().numpy().astype(np.uint8)
        fig.add_subplot(num_plots, 3, i*3+2)   
        plt.imshow(p, cmap=plt.cm.gray)

        g = gt[i].permute(1, 2, 0)[:,:,0].detach().numpy()
        fig.add_subplot(num_plots, 3, i*3+3)   
        plt.imshow(g, cmap=plt.cm.gray)

    plt.show()

config = get_config()
config.optimizer.mode = 'train'
agent = Dense_U_Agent(torchvision_init=True)

for image, lidar, _, ht_map in agent.data_loader.train_loader:
            
    if agent.cuda:
        image = image.cuda()
        lidar = lidar.cuda()

    prediction = agent.model(image, lidar)

    visual_assessment(image.cpu(), prediction.cpu(), ht_map)
    continue