In [2]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

import os
import pickle
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn import metrics
from sklearn.model_selection import train_test_split, GridSearchCV
import joblib

from building_dection import SN7_Location, create_dataset

In [3]:
#sn7_locations are created in the notebook coperinicus_dataset.ipynb
sn7_pickle_file = Path('../outputs/sn7_locations.pkl')
sn7_locations = pickle.load(open(sn7_pickle_file, 'rb'))

In [4]:
number_of_samples = 5000
ratio = 0.5
mod = 'reference'
ref_dataset = create_dataset(sn7_locations, number_of_samples, ratio, mod)

In [5]:
y = ref_dataset[:,0]
x = ref_dataset[:,1:]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [7]:
# print info about image_dataset, labels_dataset, X_train, X_test, y_train, y_test
print("image_dataset shape:", x.shape)
print("labels_dataset shape:", y.shape)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


image_dataset shape: (4992, 13)
labels_dataset shape: (4992,)
X_train shape: (3993, 13)
X_test shape: (999, 13)
y_train shape: (3993,)
y_test shape: (999,)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn import svm
from sklearn import metrics


model = svm.SVC(kernel='linear') 

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Print the coefficients
print('Coefficients: \n', model.coef_)

# Print the mean squared error
print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))

Coefficients: 
 [[-0.02286556 -0.05713922  0.28441025 -0.06736528  0.55203204  0.05402422
  -0.16115105  0.12922268  0.08976164 -0.13388301 -0.18431858  0.11499543
  -5.63564312]]
Mean squared error: 0.28


In [10]:
number_of_samples = 5000
ratio = 0.5
mod = 'FLAG_WV_OFF'
wv_dataset = create_dataset(sn7_locations, number_of_samples, ratio, mod)
y = wv_dataset[:,0]
x = wv_dataset[:,1:]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
# print info about image_dataset, labels_dataset, X_train, X_test, y_train, y_test
print("image_dataset shape:", x.shape)
print("labels_dataset shape:", y.shape)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


model = svm.SVC(kernel='linear') 

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Print the coefficients
print('Coefficients: \n', model.coef_)

# Print the mean squared error
print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))

image_dataset shape: (4992, 13)
labels_dataset shape: (4992,)
X_train shape: (3993, 13)
X_test shape: (999, 13)
y_train shape: (3993,)
y_test shape: (999,)
Coefficients: 
 [[ 1.76589726e-02 -4.83789367e-02  3.19747349e-01 -9.31282446e-02
   2.37542164e+00  0.00000000e+00 -2.88889566e-01  2.42751502e-01
   8.20203675e-02 -1.83166683e-01 -6.34996513e-02  3.12347456e-02
  -1.78275311e+01]]
Mean squared error: 0.30


In [11]:
parametres = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
model = svm.SVC()
clf = GridSearchCV(model, parametres)
clf.fit(X_train, y_train)

In [12]:
y = ref_dataset[:,0]
x = ref_dataset[:,1:]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = svm.SVC()
clf_ref = GridSearchCV(model, parametres)
clf_ref.fit(X_train, y_train)

In [16]:
# get predictions with the best model
y_pred = clf_ref.predict(X_test)
print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
# get best parameters
print(clf_ref.best_params_)

Mean squared error: 0.23
{'C': 10, 'kernel': 'rbf'}


In [17]:
y = wv_dataset[:,0]
x = wv_dataset[:,1:]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
y_pred = clf.predict(X_test)
print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print(clf.best_params_)

Mean squared error: 0.29
{'C': 10, 'kernel': 'rbf'}


In [10]:
y = ref_dataset[:,0]
x = ref_dataset[:,1:]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [11]:
param_grid = {
    "C": [1, 10, 100,1000],
    "kernel": ["linear", "rbf", "poly"],
    "gamma": ["scale", "auto", 0.1, 1],
    "degree": [2, 3, 4],
}


svc_model = svm.SVC()
grid_search = GridSearchCV(estimator=svc_model, param_grid=param_grid, scoring='accuracy', cv=5)

grid_search.fit(X_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)
print(grid_search.best_estimator_)
print(grid_search.cv_results_)
print(grid_search.scorer_)
print(grid_search.n_splits_)
print(grid_search.refit_time_)



In [8]:
param_grid = {
    "C": [1, 10, 100],
    "kernel": ["rbf"],
    "gamma": ["scale", "auto", 0.1, 1],
}


svc_model = svm.SVC()
grid_search_rbf = GridSearchCV(estimator=svc_model, param_grid=param_grid, scoring='accuracy', cv=5)

grid_search_rbf.fit(X_train, y_train)
print(grid_search_rbf.best_params_)
print(grid_search_rbf.best_score_)
print(grid_search_rbf.best_estimator_)
print(grid_search_rbf.cv_results_)
print(grid_search_rbf.scorer_)
print(grid_search_rbf.n_splits_)
print(grid_search_rbf.refit_time_)


{'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}
0.7625879467128396
SVC(C=100)
{'mean_fit_time': array([0.34550428, 0.86336966, 0.85533543, 0.8634851 , 0.34266381,
       0.90026627, 0.90309138, 0.87988462, 0.4069695 , 0.88133535,
       0.88281956, 0.87520638]), 'std_fit_time': array([0.00456633, 0.01409817, 0.00338209, 0.01198132, 0.01288812,
       0.02311222, 0.00895192, 0.00347571, 0.00579808, 0.00252783,
       0.00694185, 0.00962413]), 'mean_score_time': array([0.17366953, 0.35249848, 0.35366268, 0.3536139 , 0.15938373,
       0.36323729, 0.35092821, 0.3529758 , 0.14679952, 0.34889121,
       0.35095835, 0.34925508]), 'std_score_time': array([0.00420795, 0.00174326, 0.00402229, 0.00182267, 0.00515207,
       0.02340369, 0.00388378, 0.00286318, 0.0035537 , 0.00328341,
       0.00217384, 0.00286629]), 'param_C': masked_array(data=[1, 1, 1, 1, 10, 10, 10, 10, 100, 100, 100, 100],
             mask=[False, False, False, False, False, False, False, False,
                   False, False

In [10]:
# save the grid search model to disk
joblib.dump(grid_search_rbf, "../outputs/grid_search_rbf.sav")

['../outputs/grid_search_rbf.sav']

In [19]:
modifications = ['reference','class_VEGETATION', 'class_NO_DATA', 'class_CLOUD_HIGH_PROBABILITY']
modifications += ['FLAG_WV_OFF', 'FLAG_CIRRUS_ON']
modifications += ['LUT_AEROSOL_MARITIME', 'LUT_OZONE_250', 'LUT_OZONE_450', 'LUT_SEASON_WINTER']

datasets = {}
for mod in modifications:
    number_of_samples = 10000
    ratio = 0.3
    dataset = create_dataset(sn7_locations, number_of_samples, ratio, mod)
    datasets[mod] = dataset
    print(mod, dataset.shape)

reference (9996, 14)
class_VEGETATION (9996, 14)
class_NO_DATA (9996, 14)
class_CLOUD_HIGH_PROBABILITY (9996, 14)
FLAG_WV_OFF (9996, 14)
FLAG_CIRRUS_ON (9996, 14)
LUT_AEROSOL_MARITIME (9996, 14)
LUT_OZONE_250 (9996, 14)
LUT_OZONE_450 (9996, 14)
LUT_SEASON_WINTER (9996, 14)


In [15]:
# print info about gridsearch
print(grid_search_rbf.best_params_)


{'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}


In [26]:
models = {}

for mod in modifications:
    y = datasets[mod][:,0]
    x = datasets[mod][:,1:]
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    model = svm.SVC(C=1000, gamma='scale', kernel='rbf')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(mod, 'Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
    models[mod] = model

reference Mean squared error: 0.20
class_VEGETATION Mean squared error: 0.20
class_NO_DATA Mean squared error: 0.20
class_CLOUD_HIGH_PROBABILITY Mean squared error: 0.22
FLAG_WV_OFF Mean squared error: 0.21
FLAG_CIRRUS_ON Mean squared error: 0.20
LUT_AEROSOL_MARITIME Mean squared error: 0.20
LUT_OZONE_250 Mean squared error: 0.21
LUT_OZONE_450 Mean squared error: 0.19
LUT_SEASON_WINTER Mean squared error: 0.20


In [27]:
print('mod', 'accuracy', 'precision', 'recall', 'f1-score', 'MSE',sep=',')
for mod in modifications:
    # get accuracy, precision, recall, f1-score
    model = models[mod]
    y = datasets[mod][:,0]
    x = datasets[mod][:,1:]
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    y_pred = model.predict(X_test)
    # print(mod, 'Accuracy:', metrics.accuracy_score(y_test, y_pred))
    # print(mod, 'Precision:', metrics.precision_score(y_test, y_pred, average='weighted'))
    # print(mod, 'Recall:', metrics.recall_score(y_test, y_pred, average='weighted'))
    # print(mod, 'F1-score:', metrics.f1_score(y_test, y_pred, average='weighted'))
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred, average='weighted')
    recall = metrics.recall_score(y_test, y_pred, average='weighted')
    f1_score = metrics.f1_score(y_test, y_pred, average='weighted')
    mean_squared_error = metrics.mean_squared_error(y_test, y_pred)
    print(mod, accuracy, precision, recall, f1_score,mean_squared_error, sep=',')
    #print mean squared error
    # print('%.2f' % metrics.mean_squared_error(y_test, y_pred))
    

mod,accuracy,precision,recall,f1-score,MSE
reference,0.7965,0.7880184638466373,0.7965,0.7836758289223564,0.2035
class_VEGETATION,0.804,0.7967130449887095,0.804,0.7919094197791872,0.196
class_NO_DATA,0.7985,0.8001258681945704,0.7985,0.7748664718485665,0.2015
class_CLOUD_HIGH_PROBABILITY,0.782,0.7754569192594006,0.782,0.7581161174521355,0.218
FLAG_WV_OFF,0.7895,0.7855554533876284,0.7895,0.7665603869524621,0.2105
FLAG_CIRRUS_ON,0.7955,0.789320276987348,0.7955,0.777700498008558,0.2045
LUT_AEROSOL_MARITIME,0.7985,0.7926185497936242,0.7985,0.7815808559254684,0.2015
LUT_OZONE_250,0.79,0.7807095299548638,0.79,0.7753303325342076,0.21
LUT_OZONE_450,0.806,0.8004781731474687,0.806,0.7915304329497876,0.194
LUT_SEASON_WINTER,0.7985,0.7900366690366961,0.7985,0.787376237344164,0.2015
