# 1) Mount drive, unzip data, clone repo, install packages

## 1.1) Mount Drive and define paths
Run provided colab code to mount Google Drive. Then define dataset paths relative to mount point.

In [1]:
drive_root = r'C:\Users\thana\PycharmProjects\kth-course-projects\biogans\.gdrive_personal'

## 1.2) Unzip Img directory in Colab
By unzipping the `lin-48x80.zip` in Colab before running our model we gain significant disk reading speedups.
So, the first step is to unzip images directory, and then save the image directory before proceeding.

In [2]:
import os

# Check if LIN Dataset is present / Download Dataset
df_root_drive = f'{drive_root}/Datasets/LIN_48x80'
if not os.path.exists(f'{df_root_drive}/lin-48x80.zip'):
    !pip install kaggle --upgrade
    os.environ['KAGGLE_CONFIG_DIR'] = drive_root
    !kaggle datasets download "achariso/lin-48x80" -p "$df_root_drive"

# Unzip
if not os.path.exists(f"{df_root_drive}/LIN_Normalized_WT_size-48-80_train"):
  !pip install unzip
  !unzip -qq "$df_root_drive/lin-48x80.zip" -d "$df_root_drive"
  if not os.path.exists(f'$df_root_drive/LIN_Normalized_WT_size-48-80_train'):
      print(f'Error: Dataset not found at "/content/data/LIN_48x80"')

Downloading lin-48x80.zip to C:\Users\thana\PycharmProjects\kth-course-projects\biogans\.gdrive_personal/Datasets/LIN_48x80




  0%|          | 0.00/980M [00:00<?, ?B/s]
  0%|          | 2.00M/980M [00:00<01:25, 12.0MB/s]
  0%|          | 4.00M/980M [00:00<01:27, 11.7MB/s]
  1%|          | 6.00M/980M [00:00<01:27, 11.6MB/s]
  1%|          | 8.00M/980M [00:00<01:27, 11.6MB/s]
  1%|1         | 10.0M/980M [00:00<01:27, 11.7MB/s]
  1%|1         | 12.0M/980M [00:01<01:26, 11.7MB/s]
  1%|1         | 14.0M/980M [00:01<01:26, 11.8MB/s]
  2%|1         | 16.0M/980M [00:01<01:26, 11.7MB/s]
  2%|1         | 18.0M/980M [00:01<01:28, 11.4MB/s]
  2%|2         | 20.0M/980M [00:01<01:24, 11.9MB/s]
  2%|2         | 22.0M/980M [00:01<01:25, 11.8MB/s]
  2%|2         | 24.0M/980M [00:02<01:25, 11.7MB/s]
  3%|2         | 26.0M/980M [00:02<01:25, 11.8MB/s]
  3%|2         | 28.0M/980M [00:02<01:24, 11.8MB/s]
  3%|3         | 30.0M/980M [00:02<01:24, 11.9MB/s]
  3%|3         | 32.0M/980M [00:02<01:24, 11.8MB/s]
  3%|3         | 34.0M/980M [00:03<01:23, 11.9MB/s]
  4%|3         | 36.0M/980M [00:03<01:23, 11.8MB/s]
  4%|3         | 38.

Error: Dataset not found at "/content/data/LIN_48x80"


'unzip' is not recognized as an internal or external command,
operable program or batch file.


## 1.3) Clone GitHub repo
Clone achariso/gans-thesis repo into /content/code
 using git clone.
 For more info see: https://medium.com/@purba0101/how-to-clone-private-github-repo-in-google-colab-using-ssh-77384cfef18f

In [5]:
repo_root = r'C:\Users\thana\PycharmProjects\kth-course-projects\biogans'
src_root = f'{repo_root}/src'

## 1.5) Add code/, */src/ to path
This is necessary in order to be able to run the modules.

In [7]:
content_root_abs = f'{repo_root}'
src_root_abs = f'{repo_root}/src'
%env PYTHONPATH="/env/python:$content_root_abs:$src_root_abs

%cd "$repo_root"

env: PYTHONPATH="/env/python:C:\Users\thana\PycharmProjects\kth-course-projects\biogans:C:\Users\thana\PycharmProjects\kth-course-projects\biogans/src
C:\Users\thana\PycharmProjects\kth-course-projects\biogans


# 2) Train BioGAN model on LIN Dataset
In this section we run the actual training loop for BioGAN network. BioGAN consists of one and multi-channel DCGAN-like
Generators and Discriminators.

### Actual Run
Eventually, run the code!

In [8]:
chkpt_step = 'latest'   # supported: 'latest', <int>, None
log_level = 'debug'     # supported: 'debug', 'info', 'warning', 'error', 'critical', 'fatal'
device = 'cuda'             # supported: 'cpu', 'cuda', 'cuda:<GPU_INDEX>'
gdrive_which = 'personal'   # supported: 'personal', 'auth'

classes = 'Alp14'

# Running with -i enables us to get variables defined inside the script (the script runs inline)
%run -i src/train_setup.py --log_level $log_level --chkpt_step $chkpt_step --seed 42 --device $device --gdrive_which $gdrive_which -use_refresh_token --which_classes $classes
%cd src/

ModuleNotFoundError: No module named 'colorlog'

C:\Users\thana\PycharmProjects\kth-course-projects\biogans\src


### BioGAN Training
The code that follows defines the dataloaders/evaluators/models and the main training loop.


In [None]:
import torch
from IPython.core.display import display
from torch import Tensor
from torch.nn import DataParallel
# noinspection PyProtectedMember
from torch.utils.data import DataLoader

from datasets.lin import LINDataloader
from modules.biogan import OneClassBioGan
from utils.metrics import GanEvaluator

###################################
###  Hyper-parameters settings  ###
###################################
#   - training
n_epochs = 100

batch_size = 48 if not run_locally else 48
train_test_splits = [90, 10]  # for a 90% training - 10% evaluation set split
#   - evaluation
metrics_n_samples = 1000 if not run_locally else 2
metrics_batch_size = 32 if not run_locally else 1
f1_k = 3 if not run_locally else 1
#   - visualizations / checkpoints steps
display_step = 300
checkpoint_step = 600
metrics_step = 1800  # evaluate model every 3 checkpoints

###################################
###   Dataset Initialization    ###
###################################
#   - the dataloader used to access the training dataset of cross-scale/pose image pairs at every epoch
#     > len(dataloader) = <number of batches>
#     > len(dataloader.dataset) = <number of total dataset items>
dataloader = LINDataloader(dataset_fs_folder_or_root=datasets_groot, train_not_test=True,
                           batch_size=batch_size, pin_memory=not run_locally, shuffle=True,
                           which_classes=args.which_classes)
dataset = dataloader.dataset
#   - apply rudimentary tests
assert issubclass(dataloader.__class__, DataLoader)
assert len(dataloader) == len(dataset) // batch_size + (1 if len(dataset) % batch_size else 0)
_x = next(iter(dataloader))
assert tuple(_x.shape) == (batch_size, 2, 48, 80)

###################################
###    Models Initialization    ###
###################################
#   - initialize evaluator instance (used to run GAN evaluation metrics: FID, IS, PRECISION, RECALL, F1 and SSIM)
evaluator = GanEvaluator(model_fs_folder_or_root=models_groot, gen_dataset=dataset, device=exec_device,
                         z_dim=-1, n_samples=metrics_n_samples, batch_size=metrics_batch_size, f1_k=f1_k,
                         ssim_c_img=2)
#   - initialize model
chkpt_step = args.chkpt_step
try:
    if chkpt_step == 'latest':
        _chkpt_step = chkpt_step
    elif isinstance(chkpt_step, str) and chkpt_step.isdigit():
        _chkpt_step = int(chkpt_step)
    else:
        _chkpt_step = None
except NameError:
    _chkpt_step = None
biogan = OneClassBioGan(model_fs_folder_or_root=models_groot, config_id='default', dataset_len=len(dataset),
                        chkpt_epoch=_chkpt_step, evaluator=evaluator, device=exec_device, log_level=log_level)
biogan.logger.debug(f'Using device: {str(exec_device)}')
biogan.logger.debug(f'Model initialized. Number of params = {biogan.nparams_hr}')
#   - setup multi-GPU training
if torch.cuda.device_count() > 1:
    biogan.gen = DataParallel(biogan.gen, list(range(torch.cuda.device_count())))
    biogan.info(f'Using {torch.cuda.device_count()} GPUs for PGPG Generator (via torch.nn.DataParallel)')
# #   - load dataloader state (from model checkpoint)
# if 'dataloader' in biogan.other_state_dicts.keys():
#     dataloader.set_state(biogan.other_state_dicts['dataloader'])
#     biogan.logger.debug(f'Loaded dataloader state! Current pem_index={dataloader.get_state()["perm_index"]}')


### BioGAN Main training loop


In [None]:
from tqdm.autonotebook import tqdm
from utils.dep_free import in_notebook
torch.cuda.empty_cache()

###################################
###       Training Loop         ###
###################################
#   - start training loop from last checkpoint's epoch and step
gcapture_ready = True
async_results = None
biogan.logger.info(f'[training loop] STARTING (epoch={biogan.epoch}, step={biogan.initial_step})')
biogan.initial_step += 1
for epoch in range(biogan.epoch, n_epochs):
    image_1: Tensor
    image_2: Tensor
    pose_2: Tensor

    # noinspection PyProtectedMember
    d = {
        'step': biogan.step,
        'initial_step': biogan.initial_step,
        'epoch': biogan.epoch,
        '_counter': biogan._counter,
        'epoch_inc': biogan.epoch_inc,
    }
    # initial_step = biogan.initial_step % len(dataloader)
    biogan.logger.debug('[START OF EPOCH] ' + str(d))
    for x in tqdm(dataloader):
        # Transfer image batches to GPU
        x = x.to(exec_device)

        # Perform a forward + backward pass + weight update on the Generator & Discriminator models
        disc_loss, gen_loss = biogan(x)

        # Metrics & Checkpoint Code
        if biogan.step % checkpoint_step == 0:
            # Check if another upload is pending
            if not gcapture_ready and async_results:
                # Wait for previous upload to finish
                biogan.logger.warning('Waiting for previous gcapture() to finish...')
                [r.wait() for r in async_results]
                biogan.logger.warning('DONE! Starting new capture now.')
            # Capture current model state, including metrics and visualizations
            async_results = biogan.gcapture(checkpoint=True, metrics=biogan.step % metrics_step == 0, visualizations=True,
                                            dataloader=dataloader, in_parallel=True, show_progress=True,
                                            delete_after=False)
        # Visualization code
        elif biogan.step % display_step == 0:
            visualization_img = biogan.visualize()
            visualization_img.show() if not in_notebook() else display(visualization_img)

        # Check if a pending checkpoint upload has finished
        if async_results:
            gcapture_ready = all([r.ready() for r in async_results])
            if gcapture_ready:
                biogan.logger.info(f'gcapture() finished')
                if biogan.latest_checkpoint_had_metrics:
                    biogan.logger.info(str(biogan.latest_metrics))
                async_results = None

        # If run locally one pass is enough
        if run_locally and gcapture_ready:
            break

    # If run locally one pass is enough
    if run_locally:
        break

    # noinspection PyProtectedMember
    d = {
        'step': biogan.step,
        'initial_step': biogan.initial_step,
        'epoch': biogan.epoch,
        '_counter': biogan._counter,
        'epoch_inc': biogan.epoch_inc,
    }
    biogan.logger.debug('[END OF EPOCH] ' + str(d))

# Check if a pending checkpoint exists
if async_results:
    ([r.wait() for r in async_results])
    biogan.logger.info(f'last gcapture() finished')
    if biogan.latest_checkpoint_had_metrics:
        biogan.logger.info(str(biogan.latest_metrics))
    async_results = None

# Training finished!
biogan.logger.info('[training loop] DONE')

# 3) Evaluate PGPG
In this section we evaluate the generation performance of our trained network using the SOTA GAN evaluation metrics.

## 3.1) Get the metrics evolution plots
We plot how the metrics evolved during training. The GAN is **not** trained to minimize those metrics (they are
calculated using `torch.no_grad()`) and thus this evolution merely depends on the network and showcases the correlation
between the GAN evaluation metrics, and the losses (e.g. adversarial & reconstruction) used to optimize the network.

In [None]:
# Since the PGPG implements utils.ifaces.Visualizable, we can
# directly call visualize_metrics() on the model instance.
_ = biogan.visualize_metrics(upload=True, preview=True)

## 3.2) Evaluate Generated Samples
In order to evaluate generated samples and compare model with other GAN architectures trained on the same dataset.
For this purpose we will re-calculate the evaluation metrics as stated above, but with a much bigger number of samples.
In this way, the metrics will be more trustworthy and comparable with the corresponding metrics in the original paper.


In [None]:
# Initialize a new evaluator instance
# (used to run GAN evaluation metrics: FID, IS, PRECISION, RECALL, F1 and SSIM)
evaluator = GanEvaluator(model_fs_folder_or_root=models_groot, gen_dataset=dataloader.test_set, target_index=0,
                         device=exec_device, n_samples=10000, batch_size=64, f1_k=f1_k, ssim_c_img=2)
# Run the evaluator
metrics_dict = evaluator.evaluate(gen=biogan.gen, metric_name='all', show_progress=True)

# Print results
import json
print(json.dumps(metrics_dict, indent=4))

#
#-----------
# Epoch 93
#----------
# 
# Training Set:
# {
#   "fid": 16.195581436157227
#   "is": 3.82967472076416
#   "f1": 0.8827780485153198
#   "precision": 0.8856828808784485
#   "recall": 0.8798921704292297
#   "ssim": 0.8029271364212036
# }
#
# Test Set:
# {
#     "fid": 26.503515243530273,
#     "is": 2.957645606994629,
#     "f1": 0.8494825959205627,
#     "precision": 0.8351463675498962,
#     "recall": 0.8643196225166321,
#     "ssim": 0.7690791009871171
# }
#