# Classification of Jigsaw Puzzles -  Cats Vs. Dogs

## Colab Environment setup

### Installations and mounts

In [None]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.1.3-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.8.0 (from lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.3.0.post0-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.2/840.2 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.1.3-py3-none-any.whl (777 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.7/777.7 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning, lightning
Successfully installed lightning-2.1.3 lightning-utilities-0.10.1 pytorch-lightning-2.1.3 torchmetrics-1.3.0.

In [None]:
from pathlib import Path
# --- Run setting - colab or local
ENVIRONMENT = 'Colab'

# --- Paths
DRIVE_PROJECT_PATH = Path('/content/drive/MyDrive/mlds_final_project/project')
LOCAL_PROJECT_PATH = Path.cwd().parent

In [None]:
# --- Mount google drive
if ENVIRONMENT == 'Colab':
    from google.colab import drive
    drive.mount('/content/drive')
    PROJECT_PATH = DRIVE_PROJECT_PATH

else:
    raise ValueError(f'ENVIRONMENT - Only Colab runs are supported here!')

Mounted at /content/drive


### Download dataset from Drive

In [None]:
train_zip_path =  PROJECT_PATH / Path('cats_vs_dogs_dataset/train.zip')
test_zip_path =  PROJECT_PATH / Path('cats_vs_dogs_dataset/test_labeled.zip')

assert train_zip_path.exists(), f'Could not find data on drive: {train_zip_path}'
assert test_zip_path.exists(), f'Could not find data on drive: {test_zip_path}'

data_path = Path('./data')
TRAIN_DATA_PATH = data_path / Path('train')
TEST_DATA_PATH = data_path / Path('test')

print(f'Train data path on colab: {TRAIN_DATA_PATH.absolute()}')
print(f'Test data path on colab: {TEST_DATA_PATH.absolute()}')


Train data path on colab: /content/data/train
Test data path on colab: /content/data/test


In [None]:
import shutil
import zipfile

FORCE_DATA_DOWNLOAD = False

if not ENVIRONMENT == 'Colab':
  raise Exception('Auto data download only works in Colab mode!')

# --- check folder existance
if not data_path.exists() or FORCE_DATA_DOWNLOAD:
    print('Deleting existing data folder')
    if data_path.exists():
        shutil.rmtree(data_path)

    data_path.mkdir(parents=True, exist_ok=True)
    print('Copying datasets from drive')
    new_test_zip_path = data_path / test_zip_path.name
    shutil.copy(test_zip_path, new_test_zip_path)

    new_train_zip_path = data_path / train_zip_path.name
    shutil.copy(train_zip_path, new_train_zip_path)

    print('Extracting ZIP files')
    with zipfile.ZipFile(new_test_zip_path, 'r') as zip_ref:
        zip_ref.extractall(data_path)

    with zipfile.ZipFile(new_train_zip_path, 'r') as zip_ref:
        zip_ref.extractall(data_path)

    print('Deleting zip files')
    new_test_zip_path.unlink()
    new_train_zip_path.unlink()



Deleting existing data folder
Copying datasets from drive
Extracting ZIP files
Deleting zip files


The available constants for use after the above code runs:


*   PROJECT_PATH - Project root path
*   TRAIN_DATA_PATH - train data path
*   TEST_DATA_PATH - test data path



### Download code from drive

In [None]:
colab_src_path =  PROJECT_PATH / Path('notebooks/src.zip')
assert colab_src_path.exists(), f'Could not find data on drive: {colab_src_path}'

local_src_path = Path('./src')

import shutil
import zipfile

FORCE_SRC_DOWNLOAD = True

if not ENVIRONMENT == 'Colab':
  raise Exception('Auto data download only works in Colab mode!')

# --- check folder existance
if not local_src_path.exists() or FORCE_SRC_DOWNLOAD:
    print('Deleting existing src folder')
    if local_src_path.exists():
        shutil.rmtree(local_src_path)

    local_src_path.mkdir(parents=True, exist_ok=True)

    print('Copying src from drive')
    new_src_path = local_src_path / colab_src_path.name
    shutil.copy(colab_src_path, new_src_path)

    print('Extracting ZIP file')
    with zipfile.ZipFile(new_src_path, 'r') as src_ref:
        src_ref.extractall('./')


    print('Deleting zip file')
    new_src_path.unlink()


Deleting existing src folder
Copying src from drive
Extracting ZIP file
Deleting zip file


## Run

In [None]:
raise Exception

Exception: 

### ================================= Stop Here! =====================
* Check what configurations you want to run
* Put them in a folder
* Use the below cell to scan the folder and make sure you run the right config
* Then go ahead and continue

### Map configurations

In [None]:
from glob import glob

run_config_path = '/content/src/runners/train_test_runners/run_configs'

configs_list = glob(f'{Path(run_config_path).absolute()}/*.json')
print('Configurations found:')
for c in configs_list:
    print(c)

Configurations found:
/content/src/runners/train_test_runners/run_configs/plain_vitb16pretrained_patch16_resize224.json


### Run!

In [None]:
from google.colab import runtime

from src.trainer.trainer_modules.train_flow import execute_train_flow
from src.util_functions.util_functions import load_dict_from_json
from src.util_functions.printc import printc

for run_config_path in configs_list:
    printc.cyan('============================================================================================')
    printc.cyan(f'Starting to run: {run_config_path}')
    printc.cyan('============================================================================================')

    run_params = load_dict_from_json(run_config_path)
    execute_train_flow(run_params, PROJECT_PATH, TRAIN_DATA_PATH, TEST_DATA_PATH, stop_before_fit=False)

printc.cyan('============================================================================================')
printc.cyan('-------------------------------------All Done!----------------------------------------------')
printc.cyan('============================================================================================')



[96mStarting to run: /content/src/runners/train_test_runners/run_configs/plain_vitb16pretrained_patch16_resize224.json[0m
Run output path: /content/drive/MyDrive/mlds_final_project/project/outputs/plain_vitb16pretrained_patch16_resize224_20240126_192234
Creating Datasets


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 158MB/s]
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'inference_normalizer' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['inference_normalizer'])`.


TrainLogsCallback:
Running on: Train: True, Validation: True, Test: False
Saving logs to: /content/drive/MyDrive/mlds_final_project/project/outputs/plain_vitb16pretrained_patch16_resize224_20240126_192234/train_log

TrainLogsCallback:
Running on: Train: False, Validation: False, Test: True
Saving logs to: /content/drive/MyDrive/mlds_final_project/project/outputs/plain_vitb16pretrained_patch16_resize224_20240126_192234/test_log

PerSampleCsvLogCallback:
Running on: Train: False, Validation: False, Test: True
Saving logs to: /content/drive/MyDrive/mlds_final_project/project/outputs/plain_vitb16pretrained_patch16_resize224_20240126_192234/test_predictions_log



INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


[94mStarting train [0m


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name              | Type              | Params
--------------------------------------------------------
0 | model             | VisionTransformer | 85.8 M
1 | criterion         | BCELoss           | 0     
2 | output_normalizer | Sigmoid           | 0     
--------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
343.198   Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name              | Type              | Params
--------------------------------------------------------
0 | model             | VisionTransformer | 85.8 M
1 | criterion         | BCELoss           | 0     
2 | output_normalizer | Sigmoid           | 0     
--------------------------------------------------------
85.8 M    Trainable params
0         Non-trainab

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

### Discnnect runtime

In [None]:
runtime.unassign()