In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pylab as plt


In [None]:
from fastai.conv_learner import *
from fastai.dataset import *


### Look at the data set

Lets look at the size of the datasets.

NOTE: The data set was taken from the thesis project of V Minh at the University of Toronto: https://www.cs.toronto.edu/~vmnih/data/

In [None]:
!echo 'Training set'
!ls ~/fastai/data/roads/mass_roads/train/sat | wc -l
!ls ~/fastai/data/roads/mass_roads/train/map | wc -l
!echo 'Validation Set'
!ls ~/fastai/data/roads/mass_roads/valid/sat | wc -l
!ls ~/fastai/data/roads/mass_roads/valid/map | wc -l
!echo 'Test Set'
!ls ~/fastai/data/roads/mass_roads/test/sat | wc -l
!ls ~/fastai/data/roads/mass_roads/test/sat | wc -l

In [None]:
import os 
from glob import glob

INPUT_PATH = '/home/ubuntu/fastai/data/roads/mass_roads'
DATA_PATH = INPUT_PATH
TRAIN_DATA = os.path.join(DATA_PATH, "train/sat")
TRAIN_MASKS_DATA = os.path.join(DATA_PATH, "train/map")
VALID_DATA = os.path.join(DATA_PATH, "valid/sat")
VALID_MASKS_DATA = os.path.join(DATA_PATH, "valid/map")

TEST_DATA = os.path.join(DATA_PATH, "test/sat")

TRAIN_DATA_PATH = os.path.join(DATA_PATH, "train")
VALID_DATA_PATH = os.path.join(DATA_PATH, "valid")
TEST_DATA_PATH = os.path.join(DATA_PATH, "test")

In [None]:
TRAIN_DATA

In [None]:
!ls {TRAIN_DATA} | tail -10

In [None]:
train_files = glob(os.path.join(TRAIN_DATA, "*.tiff"))
train_files[:10]

In [None]:
train_ids = [s[len(TRAIN_DATA)+1:-5] for s in train_files]

In [None]:
train_ids[:5]

In [None]:
test_files = glob(os.path.join(TEST_DATA, "*.tiff"))
test_ids = [s[len(TEST_DATA)+1:-5] for s in test_files]

In [None]:
test_ids[:5]

In [None]:
def get_filename(image_id, image_type):
    check_dir = False
    if "Train" == image_type:
        ext = 'tiff'
        data_path = TRAIN_DATA
    elif "Train_mask" in image_type:
        ext = 'tif'
        data_path = TRAIN_MASKS_DATA
    elif "Test" in image_type:
        ext = 'tiff'
        data_path = TEST_DATA
    else:
        raise Exception("Image type '%s' is not recognized" % image_type)

    if check_dir and not os.path.exists(data_path):
        os.makedirs(data_path)

    return os.path.join(data_path, "{}.{}".format(image_id, ext))

In [None]:
get_filename(10828750_15, 'Train')

In [None]:
get_filename(10828750_15, 'Train_mask')

In [None]:
import cv2
from PIL import Image


def get_image_data(image_id, image_type, **kwargs):
    if 'mask' in image_type:
        img = _get_image_data_pil(image_id, image_type, **kwargs)
    else:
        img = _get_image_data_opencv(image_id, image_type, **kwargs)
    return img

def _get_image_data_opencv(image_id, image_type, **kwargs):
    fname = get_filename(image_id, image_type)
    img = cv2.imread(fname)
    assert img is not None, "Failed to read image : %s, %s" % (image_id, image_type)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img


def _get_image_data_pil(image_id, image_type, return_exif_md=False, return_shape_only=False):
    fname = get_filename(image_id, image_type)
    
    try:
        img_pil = Image.open(fname)
    except Exception as e:
        assert False, "Failed to read image : %s, %s. Error message: %s" % (image_id, image_type, e)

    if return_shape_only:
        return img_pil.size[::-1] + (len(img_pil.getbands()),)

    img = np.asarray(img_pil)
    assert isinstance(img, np.ndarray), "Open image is not an ndarray. Image id/type : %s, %s" % (image_id, image_type)
    if not return_exif_md:
        return img
    else:
        return img, img_pil._getexif()

### Display a single image

In [None]:
image_id = train_ids[0]

plt.figure(figsize=(20, 20))
img = get_image_data(image_id, "Train")
mask = get_image_data(image_id, "Train_mask")
img_masked = cv2.bitwise_and(img, img, mask=mask)

print("Image shape: {} | image type: {} | mask shape: {} | mask type: {}".format(img.shape, img.dtype, mask.shape, mask.dtype) )

plt.subplot(131)
plt.imshow(img)
plt.subplot(132)
plt.imshow(mask)
plt.subplot(133)
plt.imshow(img_masked)

In [None]:
img_shape = [get_image_data(id, "Train").shape for id in train_ids]
img_shape[:5]

In [None]:
mask_shape = [get_image_data(id, "Train_mask").shape for id in train_ids]
mask_shape[:5]

### Convert to PNG

First: Convert the original .tif files to png and sore in directory 'train/sat_png'
Second: convert the mask .tif files to .png and store in the directory 'train/map_png'

#### Convert the training set to .png files

In [None]:
INPUT_PATH_PNG = '/home/ubuntu/fastai/data/roads/mass_roads_png'
DATA_PATH_PNG = INPUT_PATH_PNG
TRAIN_PATH_PNG=os.path.join(DATA_PATH_PNG, "train")
TRAIN_DATA_PNG = os.path.join(DATA_PATH_PNG, "train/sat")
TRAIN_MASKS_DATA_PNG = os.path.join(DATA_PATH_PNG, "train/map")
TEST_DATA_PNG = os.path.join(DATA_PATH_PNG, "test/sat")

In [None]:
TRAIN_PATH_PNG

In [None]:
TRAIN_DATA

In [None]:
TRAIN_DATA_PNG

In [None]:
import pathlib
pathlib.Path(f'{TRAIN_PATH}/sat').mkdir(parents=True, exist_ok=True) 
pathlib.Path(f'{TRAIN_PATH}/map').mkdir(parents=True, exist_ok=True) 

In [None]:
train_ids[:5]

In [None]:
def convert_train_img(fn):
    input_file = '{}/{}.tiff'.format(TRAIN_DATA, fn)
    #print(input_file)
    output_file = '{}/{}.png'.format(TRAIN_DATA_PNG, fn)
    #print(output_file)
    Image.open(input_file).save(output_file)

In [None]:
for f in train_ids: convert_img(f)

In [None]:
def convert_train_mask_img(fn):
    input_file = '{}/{}.tif'.format(TRAIN_MASKS_DATA, fn)
    output_file = '{}/{}.png'.format(TRAIN_MASKS_DATA_PNG, fn)
    Image.open(input_file).save(output_file)

In [None]:
for f in train_ids: convert_mask_img(f)

### Convert the VALIDation files

In [None]:
valid_files = glob(os.path.join('{}/sat'.format(VALID_DATA_PATH), "*.tiff"))
valid_files[:10]

In [None]:
valid_ids = [s[len(TRAIN_DATA)+1:-5] for s in valid_files]
valid_ids[:5]

In [None]:
VALID_DATA_PATH_PNG=os.path.join(DATA_PATH_PNG, "valid")

In [None]:
def convert_valid_img(fn):
    input_file = '{}/map/{}.tif'.format(VALID_DATA_PATH, fn)
    output_file = '{}/map/{}.png'.format(VALID_DATA_PATH_PNG, fn)
    Image.open(input_file).save(output_file)
    
def convert_valid_mask_img(fn):
    input_file = '{}/sat/{}.tiff'.format(VALID_DATA_PATH, fn)
    output_file = '{}/sat/{}.png'.format(VALID_DATA_PATH_PNG, fn)
    Image.open(input_file).save(output_file)

In [None]:
for f in valid_ids:
    convert_valid_img(f)
    convert_valid_mask_img(f)

### Now convert the TEST files

In [None]:
test_files = glob(os.path.join('{}/sat'.format(TEST_DATA_PATH), "*.tiff"))
test_files[:10]

In [None]:
TEST_DATA_PATH_ROOT = '{}/sat/'.format(TEST_DATA_PATH)
test_ids = [s[len(TEST_DATA_PATH_ROOT):-5] for s in test_files]
test_ids[:5]

In [None]:
TEST_DATA_PATH_PNG=os.path.join(DATA_PATH_PNG, "test")

In [None]:
def convert_test_img(fn):
    input_file = '{}/map/{}.tif'.format(TEST_DATA_PATH, fn)
    output_file = '{}/map/{}.png'.format(TEST_DATA_PATH_PNG, fn)
    Image.open(input_file).save(output_file)
    
def convert_test_mask_img(fn):
    input_file = '{}/sat/{}.tiff'.format(TEST_DATA_PATH, fn)
    output_file = '{}/sat/{}.png'.format(TEST_DATA_PATH_PNG, fn)
    Image.open(input_file).save(output_file)

In [None]:
for f in test_ids:
    convert_test_img(f)
    convert_test_mask_img(f)