In [136]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

In [127]:
data_name = 'metadata19_hmineq0.0_tmin20050000_mean_grid_20.csv'
glathida_rgis = pd.read_csv(data_name, low_memory=False)
glathida_rgis = glathida_rgis.dropna()
df = glathida_rgis.drop(columns = 'RGIId')

In [128]:
glacier_forms = ('Land-terminating', 'Marine-terminating', 'Lake-terminating',
                 'Dry calving', 'Regenerated', 'Shelf-terminating', 'Not assigned')
glacier_id = (0, 1, 2, 3, 4, 5, 9)

for i in range(len(glacier_forms)):
    print(glacier_forms[i],': ', np.size(df[df['Form'] == glacier_id[i]]))

Land-terminating :  2888361
Marine-terminating :  1011750
Lake-terminating :  0
Dry calving :  0
Regenerated :  0
Shelf-terminating :  0
Not assigned :  0


Only two cclasses in the dataset to indentify. Why are the other forms relevant?

In [129]:
def split(data, test_size, target):
    # Initial data split into x and y values.
    xData = pd.DataFrame(data)
    xData = xData.drop(columns = [target])
    # Target
    yData = data[target]

    # Split into test and training data
    xTrain, xTest, yTrain, yTest = train_test_split(xData, yData, test_size = test_size, random_state = 42)

    return xTrain, xTest, yTrain, yTest

In [130]:
xTrain, xTest, yTrain, yTest = split(df, 0.1, 'Form')

In [131]:
# Multi-layer Perceptron (MLP)
def ClassMLP_crude(xTrain, xTest, yTrain, yTest):
    # Define and train MLP classifier
    ClassifierMLP = MLPClassifier(random_state = 42)
    
    # Train the model on training data
    ClassifierMLP.fit(xTrain, yTrain)

    # Compute predictions from trained model:
    pre = ClassifierMLP.predict(xTest)
    acc = accuracy_score(yTest, pre)
    
    return pre, acc

In [132]:
pre, acc = ClassMLP_crude(xTrain, xTest, yTrain, yTest)
print(acc)

0.8639485605728482


Hyperparameter optimization with grid search.

In [149]:
# Number of output neurons, Number of input neurons, Number of samples in training set, scaling factor in range 2-10.
def hiddenLayerSize(No, Ni, Ns, alpha): # Tries to estimate optimal hidden layer neurons.
    Nh = Ns / (alpha * (Ni + No))
    return Nh

Nh1 = int(hiddenLayerSize(1, np.shape(xTrain)[1], np.shape(xTrain)[0], 8))
Nh2 = int(hiddenLayerSize(1, np.shape(xTrain)[1], np.shape(xTrain)[0], 9))
Nh3 = int(hiddenLayerSize(1, np.shape(xTrain)[1], np.shape(xTrain)[0], 10))
print(f'Estimated hidden layer neurons: {Nh1}, {Nh2} and {Nh3}')

hParamSpace = {'activation': ['relu', 'tanh'],
               'alpha': [0.0001, 0.001, 0.01],
               'solver': ['adam', 'sgd'],
               'learning_rate': ['constant', 'adaptive'],
               'hidden_layer_sizes': [(Nh1, Nh1, int(Nh1 / 2)),
                                      (Nh2, Nh2, int(Nh2 / 2)), 
                                      (Nh3, Nh3, int(Nh3 / 2))]}

Estimated hidden layer neurons: 135, 120 and 108


In [143]:
def ClassMLP(xTrain, xTest, yTrain, yTest, paramSpace, maxIts, crossValids):
    # Define the MLPClassifier.
    mlp = MLPClassifier(max_iter = maxIts, random_state = 42)

    print("Data normalizing.")
    scaler = StandardScaler()
    xTrain = scaler.fit_transform(xTrain)
    xTest = scaler.transform(xTest)
    
    # Perform GridSearchCV
    print("Grid search hyperparameter optimization.")
    grid_search = GridSearchCV(estimator = mlp, param_grid = paramSpace, cv = crossValids, n_jobs = -1)
    grid_search.fit(xTrain, yTrain)

    # Best parameters found during grid search.
    print("Best hyperparameters found:")
    best_params = grid_search.best_params_
    print(best_params)

    # Train MLPRegressor with best parameters.
    print("Training optimized model")
    best_mlp = MLPClassifier(max_iter = maxIts, **best_params)
    best_mlp.fit(xTrain, yTrain)

    print("Finished model")
    probs = best_mlp.predict_proba(xTest)
    pre_probs = [[i, prob[1]] for i, prob in enumerate(probs)]
    binary_pre = [1 if prob >= 0.5 else 0 for _, prob in pre_probs]
    acc = accuracy_score(yTest, binary_pre)
    print(f'Accuracy: {acc}')
    print(f'Predictions: {pre_probs}')

    return best_mlp, probs, pre_probs, acc

In [144]:
best_mlp, probs, pre_probs, acc = ClassMLP(xTrain, xTest, yTrain, yTest,hParamSpace, 10, 5)

Data normalizing.
Grid search hyperparameter optimization.




Best hyperparameters found:
{'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (135, 135, 67), 'learning_rate': 'constant', 'solver': 'adam'}
Training optimized model
Finished model
Accuracy: 0.977202981148619
Predictions: [[0, 2.0063047516089934e-05], [1, 0.029844048023053283], [2, 0.00016604430740596026], [3, 2.0627639765176287e-08], [4, 8.955990497848423e-07], [5, 0.6181596948989054], [6, 7.022584565766747e-08], [7, 0.9999463085434374], [8, 0.9999999999333766], [9, 6.175003830283512e-07], [10, 1.3485652180717621e-27], [11, 0.9611633536961002], [12, 0.9521180556754257], [13, 0.008886984925868407], [14, 0.9999999998546982], [15, 5.749564582492412e-15], [16, 0.005876284723108685], [17, 0.01995616261229029], [18, 1.1173057344481752e-06], [19, 3.55019851989186e-08], [20, 2.1489994859324395e-06], [21, 0.0020746425580700774], [22, 0.004638360876970215], [23, 0.02102620339271986], [24, 9.455012570145889e-15], [25, 2.561478131218778e-09], [26, 0.9967389437133262], [27, 0.99923192514

