In [1]:
from collections import defaultdict
import csv
import sys
import time

import cv2
from shapely.geometry import MultiPolygon, Polygon
## conda install shapely
import shapely.wkt
import shapely.affinity
import numpy as np
import tifffile as tiff
## pip install tifffile
import pandas as pd

from shapely.wkt import loads as wkt_loads
from matplotlib.patches import Patch
# from matplotlib.patches import Polygon, Patch

# decartes package makes plotting with holes much easier
from descartes.patch import PolygonPatch
## pip install descartes

import matplotlib.pyplot as plt
import tifffile as tiff

from sklearn.linear_model import SGDClassifier, RidgeClassifier, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import average_precision_score, roc_auc_score
from sklearn.externals import joblib

import xgboost as xgb
## http://xgboost.readthedocs.io/en/latest/build.html

from IPython.core.debugger import Tracer

In [2]:
## Define classes and corresponding colours - for plotting
CLASSES = {
        1 : 'Buildings',
        2 : 'Misc',
        3 : 'Road',
        4 : 'Track',
        5 : 'Trees',
        6 : 'Crops',
        7 : 'Waterway',
        8 : 'Standing water',
        9 : 'Vehicle Large',
        10 : 'Vehicle Small',
        }

COLORS = {
        1 : '0.7',
        2 : '0.4',
        3 : '#b35806',
        4 : '#dfc27d',
        5 : '#1b7837',
        6 : '#a6dba0',
        7 : '#74add1',
        8 : '#4575b4',
        9 : '#f46d43',
        10: '#d73027',
        }
ZORDER = {
        1 : 5,
        2 : 5,
        3 : 4,
        4 : 1,
        5 : 3,
        6 : 2,
        7 : 7,
        8 : 8,
        9 : 9,
        10: 10,
        }    

## Load training polygons
train_wkt = pd.read_csv('satellite_image/train_wkt_v4.csv')

## Load sample submission
sample_submission = pd.read_csv('satellite_image/sample_submission.csv')

## Load grid size
grid_sizes = pd.read_csv('satellite_image/grid_sizes.csv')
grid_sizes.columns = ['ImageId','Xmax','Ymin']

In [3]:
def get_scalers(image_size, x_max, y_min):
# To provide scalers that will be used to scale predicted polygons    
    
    h, w = image_size  # they are flipped so that mask_for_polygons works correctly
    w_ = w * (w / (w + 1))
    h_ = h * (h / (h + 1))
    return w_ / x_max, h_ / y_min


## Convert polygons to mask
def mask_for_polygons(polygons, image_size):
    image_mask = np.zeros(image_size, np.uint8)
    if not polygons:
        return image_mask
    int_coords = lambda x: np.array(x).round().astype(np.int32)
    exteriors = [int_coords(poly.exterior.coords) for poly in polygons]
    interiors = [int_coords(pi.coords) for poly in polygons
                 for pi in poly.interiors]
    cv2.fillPoly(image_mask, exteriors, 1)
    cv2.fillPoly(image_mask, interiors, 0)
    return image_mask


In [4]:
# scale "M" and "A" to "P" images
def func_feature_scale(imageName, numRows, numCols):
    
    data = tiff.imread(imageName)
    data = data.transpose([1, 2, 0])
    nr, nc, nlayer = data.shape[0], data.shape[1], data.shape[2]
    out = np.zeros(shape = (numRows, numCols, nlayer), dtype='uint16')
    
    for i in range(nlayer):
        
        dataArray = data[:,:,i]
        rowRatio = round(numRows/nr) 
        colRatio = round(numCols/nc)
        for j in range(nr-1):
            for k in range(nc-1):
                out[j*rowRatio:(j+1)*rowRatio, k*colRatio:(k+1)*colRatio, i] = dataArray[j,k]
                
        # for j == nr-1 & k < nc-1
        for k in range(nc-1):
            out[(nr-1)*rowRatio:, k*colRatio:(k+1)*colRatio, i] = dataArray[nr-1,k]
            
        # for k == nc-1 & j < nr-1
        for j in range(nr-1):
            out[j*rowRatio:(j+1)*rowRatio, (nc-1)*colRatio:, i] = dataArray[j,nc-1]
            
        # for k == nc-1 & j == nr-1
        out[(nr-1)*rowRatio:,(nc-1)*colRatio:,i] = dataArray[-1, -1]
        
    return out

In [5]:
## Convert image to training data: X and y
def image_to_train(image_id, class_type, image_type ='3'):
    
    # Read "M" + "A" + "P" bands or "RGB" band
    if image_type =='3':
        image = tiff.imread('../input/three_band/{}.tif'.format(image_id)).transpose([1, 2, 0])
        X = image.reshape(-1, 3).astype(np.float32) 
        image_size = image.shape[:2]
    else:
        X_P = tiff.imread('../input/sixteen_band/{}_P.tif'.format(image_id))
        nomRow = X_P.shape[0]
        nomCol = X_P.shape[1]
        
#        Tracer()()
        imageMP = func_feature_scale('../input/sixteen_band/{}_M.tif'.format(image_id), nomRow, nomCol)
        X_M = imageMP.reshape(-1, 8).astype(np.float32) 

        imageAP = func_feature_scale('../input/sixteen_band/{}_A.tif'.format(image_id), nomRow, nomCol)
        X_A = imageAP.reshape(-1, 8).astype(np.float32)    
        
        X_P = X_P.reshape(-1, 1).astype(np.float32)    
        X = np.hstack((X_A, X_M, X_P))  
        image_size = [nomRow, nomCol]
    
    # Get grid size: x_max and y_min
    x_max = grid_sizes[grid_sizes['ImageId']==image_id].Xmax.values[0]
    y_min = grid_sizes[grid_sizes['ImageId']==image_id].Ymin.values[0]
    
    # Load train poly with shapely
    train_polygons = shapely.wkt.loads(train_wkt[(train_wkt['ImageId']==image_id) & 
                                                (train_wkt['ClassType']==class_type)].MultipolygonWKT.values[0])
    # Scale polygons
    x_scaler, y_scaler = get_scalers(image_size, x_max, y_min)
    train_polygons_scaled = shapely.affinity.scale(train_polygons,
                                                   xfact=x_scaler,
                                                   yfact=y_scaler,
                                                   origin=(0, 0, 0))
    train_mask = mask_for_polygons(train_polygons_scaled, image_size)
    y = train_mask.reshape(-1)    

    return X, y, train_polygons