<h2><center>Submission for Clouds Removal</center></h2>

![](https://media.discordapp.net/attachments/703945836614123561/866939997021143040/banner.jpg?width=1439&height=277)

*Acknowledgement*: Thanks to my friend for advice. (See this [post](https://discourse.aicrowd.com/t/suggestions-concerning-how-to-solve-this-puzzle/6218))

# Setting up Environment

### Downloading Dataset

So we will first need to download the python library by AIcrowd that will allow us to download the dataset by just inputting the API key. 

In [None]:
!pip install aicrowd-cli

%load_ext aicrowd.magic

In [None]:
%aicrowd login --api-key API_KEY

In [None]:
# Downloading the Dataset
!rm -rf data
!mkdir data
!aicrowd dataset download -c clouds-removal "*Complete*" -o data

In [None]:
# Unzipping the dataset
!unzip data/train.zip -d data/train >> /dev/null
!unzip data/test.zip -d data/test >> /dev/null

# Importing Libraries

In [None]:
# Importing Libraries
import os
import numpy as np
import random
import torch
from natsort import natsorted
from glob import glob
import cv2
from tqdm.notebook import tqdm

In [None]:
def seed_everything(seed):
  os.environ["PL_GLOBAL_SEED"] = str(seed)
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)

seed_everything(1)

# Preprocessing

In [None]:
all_files = os.listdir('data/train')
num_total = int(len(all_files) / 2)

## Get validation set

In [None]:
indexes = [i for i in range(num_total)]
val_idxes = random.sample(indexes, 6)
val_idxes[0]

val_idxes = []

## Extract frames from videos

In [None]:
!rm -rf my_data
!rm -rf testA
!mkdir my_data
!mkdir my_data/A
!mkdir my_data/B
!mkdir my_data/A/train
!mkdir my_data/A/val
!mkdir my_data/B/train
!mkdir my_data/B/val
!mkdir testA
!mkdir testA/test

for cloud_video in natsorted(glob('data/train/cloud*')):
  current = int(cloud_video.split('.')[0].split('_')[1])
  img_video = cv2.VideoCapture(cloud_video)
  # Going through each frame
  index = 0
  while True:
    # Reading the frame
    ret, frame = img_video.read()
    if ret:
      if current in val_idxes:
        cv2.imwrite(f'my_data/A/val/{current}_{index}.jpg', frame)
      else:
        cv2.imwrite(f'my_data/A/train/{current}_{index}.jpg', frame)
      index += 1
    else:
      break

for clear_video in natsorted(glob('data/train/clear*')):
  current = int(clear_video.split('.')[0].split('_')[1])
  img_video = cv2.VideoCapture(clear_video)
  # Going through each frame
  index = 0
  while True:
    # Reading the frame
    ret, frame = img_video.read()
    if ret:
      if current in val_idxes:
        cv2.imwrite(f'my_data/B/val/{current}_{index}.jpg', frame)
      else:
        cv2.imwrite(f'my_data/B/train/{current}_{index}.jpg', frame)
      index += 1
    else:
      break

for cloud_video in natsorted(glob('data/test/*')):
  current = int(cloud_video.split('.')[0].split('_')[1])
  img_video = cv2.VideoCapture(cloud_video)
  # Going through each frame
  index = 0
  while True:
    # Reading the frame
    ret, frame = img_video.read()
    if ret:
      cv2.imwrite(f'testA/test/{current}_{index}.jpg', frame)
      index += 1
    else:
      break

I used pix2pix algorithm implemented by the original author in [this repo](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix).

In [None]:
!rm -rf pytorch-CycleGAN-and-pix2pix
!git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix.git

In [None]:
!rm -rf final_data
!cd pytorch-CycleGAN-and-pix2pix && python3 datasets/combine_A_and_B.py --fold_A ../my_data/A --fold_B ../my_data/B --fold_AB ../final_data

# Training

In [None]:
!pip install dominate
!pip install visdom

Note: in the submission notebook, I set both n_epochs and n_epochs_decay to be 10, but stopped the training at the start of epoch 16.

In [None]:
!rm -rf checkpoints
!cd pytorch-CycleGAN-and-pix2pix && python3 train.py --dataroot ../final_data --name clouds --model pix2pix --direction AtoB --checkpoints_dir ../checkpoints --batch_size 4 --n_epochs 10 --n_epochs_decay 5 --gan_mode lsgan --save_epoch_freq 1 --display_id -1

# Generate Submission

Read all of the files from the tesing directroy and save the same video in `clear` directory for submsision. 

In [None]:
from PIL import Image
!rm -rf new_testA
!mkdir new_testA
!mkdir new_testA/test
for img in os.listdir('testA/test'):
  new_img = Image.new('RGB', (1024, 512))
  orig_img = Image.open(f'testA/test/{img}')
  new_img.paste(orig_img)
  new_img.save(f'new_testA/test/{img}')
print(len(os.listdir('new_testA/test')))

In [None]:
from tqdm import tqdm

# Generating Predictions Function
def gen_predictions():
  !rm -rf clear
  !mkdir clear
  for index in tqdm(range(500)):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(os.path.join("clear", f"clear_{index}.mp4"), fourcc, 24.0, (512,512))
    video_images = [f'results/clouds/test_latest/images/{index}_{i}_fake.png' for i in range(24)]
    for image in video_images:
      assert os.path.isfile(image)
      frame = cv2.imread(image)
      frame = cv2.resize(frame, (512, 512))
      out.write(frame)

In [None]:
gen_predictions()