In [None]:
from IPython.display import HTML, display
import torch
import torch.nn as nn
from torch.functional import F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import torchvision
from torchvision import transforms
import torchvision.transforms as T
from torchvision.utils import make_grid
from torchvision.utils import save_image
from torchvision.datasets import ImageFolder

import albumentations as albu  

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

from skimage.draw import random_shapes
from matplotlib import pyplot as plt
import matplotlib.animation as animation
from matplotlib import font_manager, rc
from IPython import display
from PIL import Image
import numpy as np
import random
import glob
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
from tqdm import tqdm
from tqdm import tqdm_notebook
import cv2

In [None]:
PATH= 'understanding_cloud_organization/'

In [None]:
ORI_SIZE = (1400, 2100) # (height, width)
NEW_SIZE = (384, 576) # (height, width)

import cv2
INTERPOLATION = cv2.INTER_CUBIC

In [None]:

def mask2rle(img):
    """
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formatted
    """
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def rle2mask(mask_rle, shape):
    """
    mask_rle: run-length as string formatted (start length)
    shape: (width,height) of array to return
    Returns numpy array, 1 - mask, 0 - background
    """
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in
                       (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [None]:
df = pd.read_csv(os.path.join(PATH, 'train.csv'))

In [None]:
for idx, row in df.iterrows():
    encodedpixels = row[1]
    if encodedpixels is not np.nan:
        mask = rle2mask(encodedpixels, shape=ORI_SIZE[::-1])
        mask = cv2.resize(mask, NEW_SIZE[::-1], interpolation=INTERPOLATION)

        rle = mask2rle(mask)
        df.at[idx, 'EncodedPixels'] = rle

In [None]:
df.to_csv(PATH + 'resized/train.csv', index=False)

In [None]:
train_images_dir = os.path.join(PATH, 'train_images/')
image_files = os.listdir(train_images_dir)

for image_file in tqdm_notebook(image_files):
    #print(image_file)
    if image_file != ".ipynb_checkpoints":
        img = cv2.imread(os.path.join(train_images_dir, image_file))
        img = cv2.resize(img, NEW_SIZE[::-1], interpolation=INTERPOLATION)

    dst = os.path.join('understanding_cloud_organization/resized/train_images/', image_file)
    cv2.imwrite(dst, img)

In [None]:
# test files
test_images_dir = os.path.join(PATH, 'test_images')
image_files = os.listdir(test_images_dir)

for image_file in tqdm_notebook(image_files):
    img = cv2.imread(os.path.join(test_images_dir, image_file))
    img = cv2.resize(img, NEW_SIZE[::-1], interpolation=INTERPOLATION)

    dst = os.path.join('understanding_cloud_organization/resized/test_images', image_file)
    cv2.imwrite(dst, img)
