# Selecting classification paramters by cross-validation

This notebook shows how to select the paramemters for the RBF kernel transformation and stochastic gradient descent classifier used in MCC-RGB. It goes through the feature calculation and training steps manually to show the details behind the algorithm's update step.

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np

In [None]:
import matplotlib.pyplot as plt

In [None]:
from scipy.misc import bytescale
from skimage.color import rgb2lab

In [None]:
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [None]:
from pymccrgb import mcc, mcc_rgb, classify_ground_mcc, plot_points_3d

In [None]:
from pymccrgb.datasets import load_mammoth_trees

In [None]:
data = load_mammoth_trees(n_points=5e5, seed=0)

In [None]:
rgb = bytescale(data[:, 3:6]).astype(np.int16)
red = rgb[:, 0]
green = rgb[:, 1]
blue = rgb[:, 2]
ngrdvi = (green - red) / (green + red)
lab = rgb2lab(np.array([rgb]))[0]

In [None]:
xx = data[:, 0]
yy = data[:, 1]
zz = data[:, 2]

In [None]:
scale = 1  # m
tol = 0.3  # m
labels = classify_ground_mcc(data, scale, tol)
y = labels

In [None]:
np.random.seed(10)
n_train = 10000
X = np.hstack([lab[:, 1:3].reshape(-1, 2), ngrdvi.reshape(-1, 1)])

In [None]:
X_train = X[subset, :]
y_train = y[subset]

In [None]:
print(np.sum(y_train), np.sum(y_train == 0))

In [None]:
plot_points_3d(xx[subset], yy[subset], zz[subset], rgb[subset] / 255)

In [None]:
pipe = Pipeline([('rbf', RBFSampler()), ('clf', SGDClassifier(max_iter=10, n_jobs=-1))])

In [None]:
gammas = [0.01, 0.1, 1, 10, 100, 1000]
n_components = [10, 100, 1000]
Cs = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = {'rbf__n_components': n_components,
              'rbf__gamma': gammas,
              'clf__alpha': Cs
             }

In [None]:
grid = GridSearchCV(pipe, cv=5, n_jobs=-1, param_grid=param_grid)

In [None]:
grid.fit(X_train, y_train)

In [None]:
print(grid.best_params_)

In [None]:
pipe = Pipeline([('rbf', RBFSampler(n_components=1000, gamma=10)), ('clf', SGDClassifier(alpha=0.0001, max_iter=10, n_jobs=-1))])

In [None]:
pipe.fit(X_train, y_train)

In [None]:
y_pred = pipe.predict(X)

In [None]:
mask = y_pred == 1
plot_points_3d(xx[mask], yy[mask], zz[mask], rgb[mask] / 255)
plt.title('Predicted ground points')

In [None]:
mask = y_pred == 0
plot_points_3d(xx[mask], yy[mask], zz[mask], rgb[mask] / 255)
plt.title('Predicted nonground points')

In [None]:
mask = (y_pred == 0) & (y == 1)
plot_points_3d(xx[mask], yy[mask], zz[mask], rgb[mask] / 255)
plt.title('Updated MCC ground points')