In [10]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from scipy.stats import uniform, randint

In [11]:
data_name = 'metadata19_hmineq0.0_tmin20050000_mean_grid_20.csv'
glathida_rgis = pd.read_csv(data_name, low_memory = False)
glathida_rgis = glathida_rgis.dropna()
df = glathida_rgis.drop(columns = 'RGIId')

In [12]:
glacierForms = ('Glacier', 'Ice cap', 'Perennial snowfield', 'Seasonal snow-field', 'Not assigned')
glacierID = (0, 1, 2, 3, 9)

for i in range(len(glacierForms)):
    print(i, glacierForms[i],': ', np.size(df[df['Form'] == glacierID[i]]))

0 Glacier :  2888361
1 Ice cap :  1011750
2 Perennial snowfield :  0
3 Seasonal snow-field :  0
4 Not assigned :  0


Only two of the Forms exists. Why are the other included?

In [13]:
def split(data, test_size, target):
    # Initial data split into x and y values.
    xData = pd.DataFrame(data)
    xData = xData.drop(columns = [target])
    # Target
    yData = data[target]

    # Split into test and training data
    xTrain, xTest, yTrain, yTest = train_test_split(xData, yData, test_size = test_size, random_state = 42)

    return xTrain, xTest, yTrain, yTest

In [14]:
xTrain, xTest, yTrain, yTest = split(df, 0.1, 'Form')

In [15]:
# Multi-layer Perceptron (MLP)
def ClassMLP_crude(xTrain, xTest, yTrain, yTest):
    # Define and train MLP classifier
    ClassifierMLP = MLPClassifier(random_state = 42, early_stopping=True)
    
    # Train the model on training data
    ClassifierMLP.fit(xTrain, yTrain)

    # Compute predictions from trained model:
    pre = ClassifierMLP.predict(xTest)
    acc = accuracy_score(yTest, pre)
    
    return acc, pre    

In [16]:
acc, pre = ClassMLP_crude(xTrain, xTest, yTrain, yTest)

print('Accuracy:')
print(acc)
print()
print('Clasification prediction:')
for i in range(len(glacierForms)):
    print(i, glacierForms[i], len(pre[np.where(pre == i)]))

Accuracy:
0.8513809732573433

Clasification prediction:
0 Glacier 5498
1 Ice cap 1345
2 Perennial snowfield 0
3 Seasonal snow-field 0
4 Not assigned 0


Hyperparameter optimization with grid search.

In [20]:
# Number of output neurons, Number of input neurons, Number of samples in training set, scaling factor in range 2-10.
def hiddenLayerSize(No, Ni, Ns, alpha): # Tries to estimate optimal hidden layer neurons.
    Nh = Ns / (alpha * (Ni + No))
    return Nh
No = 2

Nh1 = int(hiddenLayerSize(No, np.shape(xTrain)[1], np.shape(xTrain)[0], 6))
Nh2 = int(hiddenLayerSize(No, np.shape(xTrain)[1], np.shape(xTrain)[0], 8))
Nh3 = int(hiddenLayerSize(No, np.shape(xTrain)[1], np.shape(xTrain)[0], 10))
print(f'Estimated hidden layer neurons: {Nh1}, {Nh2} and {Nh3}')

hParamGrid = {'activation': ['relu', 'tanh'],
               'alpha': [0.0001, 0.001, 0.01],
               'solver': ['adam', 'sgd'],
               'early_stopping': [True],
               'learning_rate': ['constant', 'adaptive'],
               'hidden_layer_sizes': [(Nh1, Nh1, int(Nh1 / 3)),
                                      (Nh2, Nh2, int(Nh2 / 3)), 
                                      (Nh3, Nh3, int(Nh3 / 3))]}

Estimated hidden layer neurons: 176, 132 and 106


In [19]:
def ClassMLPgrid(xTrain, xTest, yTrain, yTest, paramSpace, maxIts, crossValids):
    # Define the MLPClassifier.
    mlp = MLPClassifier(max_iter = maxIts, random_state = 42)

    print("Data normalizing.")
    scaler = StandardScaler()
    xTrain = scaler.fit_transform(xTrain)
    xTest = scaler.transform(xTest)
    
    # Perform GridSearchCV
    print("Grid search hyperparameter optimization.")
    grid_search = GridSearchCV(estimator = mlp, param_grid = paramSpace, cv = crossValids, n_jobs = -1)
    grid_search.fit(xTrain, yTrain)

    # Best parameters found during grid search.
    print("Best hyperparameters found.")
    best_params = grid_search.best_params_

    # Train MLPRegressor with best parameters.
    print("Training optimized model.")
    best_mlp = MLPClassifier(max_iter = maxIts, **best_params)
    best_mlp.fit(xTrain, yTrain)

    print("Finished model.")
    pre = best_mlp.predict(xTest)
    acc = accuracy_score(yTest, pre)

    return best_mlp, best_params, acc, pre

In [17]:
bestMLP, bestParams, acc, pre = ClassMLPgrid(xTrain, xTest, yTrain, yTest,hParamGrid, 20, 5)

print('Accuracy:')
print(acc)
print()
print(f'Best hyperparameters: {bestParams}')
print()
print('Clasification prediction:')
for i in range(len(glacierForms)):
    print(i, glacierForms[i], len(pre[np.where(pre == i)]))

NameError: name 'ClassMLPgrid' is not defined

In [22]:
NhMax = int(hiddenLayerSize(No, np.shape(xTrain)[1], np.shape(xTrain)[0], 6))
NhMin = int(hiddenLayerSize(No, np.shape(xTrain)[1], np.shape(xTrain)[0], 10))
print(f'Hidden layer neuron range estimation: {NhMin} and {NhMax}')

hParamSpace = {'activation': ['relu', 'tanh'],
               'alpha': uniform(0.001, 0.1),
               'solver': ['adam', 'sgd'],
               'early_stopping': [True],
               'learning_rate': ['constant', 'adaptive'],
               'hidden_layer_sizes': [(randint(NhMin, NhMax).rvs(),
                                       randint(NhMin, NhMax).rvs(), 
                                       int(0.3 * randint(NhMin, NhMax).rvs()))]}

Hidden layer neuron range estimation: 106 and 176


In [23]:
def ClassMLPran(xTrain, xTest, yTrain, yTest, paramSpace, maxIts, crossValids):
    # Define the MLPClassifier.
    mlp = MLPClassifier(max_iter = maxIts, random_state = 42)

    print("Data normalizing.")
    scaler = StandardScaler()
    xTrain = scaler.fit_transform(xTrain)
    xTest = scaler.transform(xTest)
    
    # Perform GridSearchCV
    print("Distribution search hyperparameter optimization.")
    grid_search = RandomizedSearchCV(estimator = mlp, param_distributions = paramSpace, cv = crossValids, n_jobs = -1)
    grid_search.fit(xTrain, yTrain)

    # Best parameters found during grid search.
    print("Best hyperparameters found.")
    best_params = grid_search.best_params_

    # Train MLPRegressor with best parameters.
    print("Training optimized model.")
    best_mlp = MLPClassifier(max_iter = maxIts, **best_params, random_state = 42)
    best_mlp.fit(xTrain, yTrain)

    print("Finished model.")
    pre = best_mlp.predict(xTest)
    acc = accuracy_score(yTest, pre)

    return best_mlp, best_params, acc, pre

In [24]:
bestMLP, bestParams, acc, pre = ClassMLPran(xTrain, xTest, yTrain, yTest, hParamSpace, 20, 5)

print('Accuracy:')
print(acc)
print()
print(f'Best hyperparameters: {bestParams}')
print()
print('Clasification prediction:')
for i in range(len(glacierForms)):
    print(i, glacierForms[i], len(pre[np.where(pre == i)]))

Data normalizing.
Distribution search hyperparameter optimization.




Best hyperparameters found.
Training optimized model.
Finished model.
Accuracy:
0.9884553558380827

Best hyperparameters: {'activation': 'tanh', 'alpha': 0.007990248655920507, 'early_stopping': True, 'hidden_layer_sizes': (115, 143, 26), 'learning_rate': 'constant', 'solver': 'adam'}

Clasification prediction:
0 Glacier 5002
1 Ice cap 1841
2 Perennial snowfield 0
3 Seasonal snow-field 0
4 Not assigned 0


