In [None]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

# Warning used to notify implicit data conversions happening in the code.
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

In [None]:
import os
import glob

import rasterio
from rasterio import features
import numpy as np
import scipy

import pandas as pd
import geopandas as gpd

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn import preprocessing
from sklearn.pipeline import Pipeline


In [None]:
with rasterio.open(os.path.join(DATA_PATH,'input','Sentinel-2_RGB.tiff')) as src:
    band_rgb = src.read()
    transform = src.transform
    shape = src.shape
    profile = src.profile

In [None]:
glob_path = glob.glob(os.path.join(DATA_PATH,'training_data','*'))

shapefiles = [f for f in glob_path if f.endswith('.shp')]

In [None]:
labeled_pixels = np.zeros((shape[0],shape[1]))

for i,shp in enumerate(shapefiles):
    label = i+1  
    df = gpd.read_file(shp)
    geom = df['geometry']
    vectors_rasterized = features.rasterize(geom,
                                           out_shape = shape,
                                           transform = transform,
                                           all_touched=True,
                                           fill=0, default_value=label)
    labeled_pixels += vectors_rasterized

# convert shape of raster from bands:rows:cols to rows:cols:bands
raster_img = np.rollaxis(band_rgb,0,3)

In [None]:
for i, shp in enumerate(shapefiles):
    i = i+1
    shp_path = os.path.split(shp)
    land_classes = shp_path[1][:-4]
    print('Class {land_classes} contains {n} pixels'.format(land_classes=land_classes, n=(labeled_pixels == i).sum()))

roi_int = labeled_pixels.astype(int)
# X is the matrix containing our features
X = raster_img[roi_int > 0] 
# y contains the values of our training data
y = labeled_pixels[labeled_pixels>0]


In [None]:
#Split our dataset into training and testing. Test data will be used to make predictions
split_test_data = 0.30
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_test_data, stratify = y)

# Reshape the data so that we make predictions for the whole raster
new_shape = (raster_img.shape[0] * raster_img.shape[1], raster_img.shape[2] )
img_as_array = raster_img[:,:].reshape(new_shape)
print('Reshaped from {o} to {n}'.format(o=raster_img.shape,
                                        n=img_as_array.shape))

In [None]:
def tune(X, y, search_type):

    param_range_c = np.logspace(0, 2, 8)
    param_range_gamma = np.logspace(-6, -1, 8)

    param_grid = {'svm__C': param_range_c,
                  'svm__gamma': param_range_gamma}

    pip = Pipeline([('scale', preprocessing.StandardScaler()), 
                    ('svm', SVC(kernel='rbf', class_weight='balanced'))])
    
    if search_type == 'grid':
        clf = GridSearchCV(estimator= pip, 
                        param_grid= param_grid,
                        scoring= 'accuracy',
                        cv=3,
                        n_jobs=-1)

        clf = clf.fit(X_train, y_train)
        
        # print accuracy of the model
        print ('Best parameters:', clf.best_params_) 
        print('Score', clf.best_score_)
    

    elif search_type == 'random':
        clf = RandomizedSearchCV(estimator= pip, 
                                param_distributions=param_grid, 
                                scoring= 'accuracy',
                                cv=3,
                                n_iter=15,
                                error_score='numeric',# it supresses the warning error
                                n_jobs=-1)

        clf = clf.fit(X_train, y_train)
        
        # print accuracy of the model
        print ('Best parameters:', clf.best_params_) 
        print('Score', clf.best_score_)
        
        
    return clf
