In [67]:
!pip install rasterio -q
!pip install pandas_path --quiet

import shutil
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas_path import path
from pathlib import Path
from PIL import Image
import rasterio

import os
import random
import numpy.ma as ma
plt.style.use("ggplot")
%matplotlib inline

from IPython.display import HTML, display

!pip install line_profiler

import cv2
import albumentations
from tqdm import tqdm_notebook, tnrange
from itertools import chain
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.keras.metrics import MeanIoU
import keras.backend as K
from keras.models import Model, load_model
from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D, GlobalMaxPool2D
from keras.layers.merge import concatenate, add
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

pd.set_option('max_colwidth', 400)



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load data path

In [7]:
img_size = 512

DATA_DIR = Path.cwd().parent / "content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train"
TRAIN_FEATURES = DATA_DIR / "train_features"
TRAIN_LABELS = DATA_DIR / "train_labels"

BANDS = ["B02", "B03", "B04", "B08"]

train_meta = pd.read_csv(DATA_DIR / "train_metadata.csv")

In [57]:
def add_paths(df, feature_dir, label_dir=None, bands=BANDS):
    """
    Given dataframe with a column for chip_id, returns a dataframe with a column
    added indicating the path to each band's TIF image as "{band}_path", eg "B02_path".
    A column is also added to the dataframe with paths to the label TIF, if the
    path to the labels directory is provided.
    """
    for band in bands:
        df[f"{band}_path"] = ""

    for i in range(len(df)):
      for band in bands:
          df[f"{band}_path"][i] = feature_dir / df["chip_id"][i] / f"{band}.tif"
          #assert df[f"{band}_path"].path.exists().all()
      if label_dir is not None:
          df["label_path"][i] = label_dir / (df["chip_id"][i] + ".tif")
          #assert df["label_path"].path.exists().all()

    return df

In [68]:
train_meta = add_paths(train_meta, TRAIN_FEATURES, TRAIN_LABELS, bands=BANDS)

In [70]:
train_meta.head(5)

Unnamed: 0,chip_id,location,datetime,cloudpath,B02_path,B03_path,B04_path,B08_path,label_path
0,adwp,Chifunfu,2020-04-29T08:20:47Z,az://./train_features/adwp,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwp/B02.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwp/B03.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwp/B04.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwp/B08.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_labels/adwp.tif
1,adwu,Chifunfu,2020-04-29T08:20:47Z,az://./train_features/adwu,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwu/B02.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwu/B03.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwu/B04.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwu/B08.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_labels/adwu.tif
2,adwz,Chifunfu,2020-04-29T08:20:47Z,az://./train_features/adwz,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwz/B02.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwz/B03.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwz/B04.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adwz/B08.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_labels/adwz.tif
3,adxp,Chifunfu,2020-04-29T08:20:47Z,az://./train_features/adxp,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adxp/B02.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adxp/B03.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adxp/B04.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/adxp/B08.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_labels/adxp.tif
4,aeaj,Chifunfu,2020-04-29T08:20:47Z,az://./train_features/aeaj,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/aeaj/B02.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/aeaj/B03.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/aeaj/B04.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_features/aeaj/B08.tif,/content/drive/MyDrive/DATA_SCIENCE/data_google_colab/cloud_cover/raw/train/train_labels/aeaj.tif
