In [1]:
from matplotlib import pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd

from pic_parser.utils import cell_to_array

DB = "E:/Dropbox/SPEED/Self Driving EHD/Data/10-Mar-2022 large nozzle mosaic/logs/measurements.xlsx"
df = pd.read_excel(DB, index_col=0)

X = np.array([cell_to_array(x) for x in df.vector])
Y = np.array(df.area)

In [5]:
# Initialize some ML kit
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor


def evaluate(Y, Y_true):
    mae = np.abs(Y - Y_true).mean()
    return mae


def norm(x):
    mean = x.mean(0)
    std = x.std(0)
    return (x - mean)/std, mean, std


class MLE_Regressor():
    def __init__(self):
        self.answers = None
    def fit(self, X, Y):
        self.answers = Y.mean(0)
    def predict(self, X):
        '''Unpredictable for multiple X!'''
        return self.answers


def LOO(X, Y, model):
    N = Y.shape[0]
    errors = []
    for i in range(N):
        trainset = np.arange(N) != i
        X_train = X[trainset, :]
        Y_train = Y[trainset]
        # TODO stop overfitting!:
        X_train = X
        Y_train = Y
        model.fit(X_train, Y_train)
        errors.append(evaluate(model.predict(X[i][None, :]), Y[i]))
    return np.mean(errors)

Y, mean, std = norm(Y)

In [6]:
print('Training MLE...')
model = MLE_Regressor()
MLE_mae = LOO(X, Y, model)
print('MLE MAE: %.5f' % MLE_mae, end='\n\n')

print('Training KNN...')
model = KNeighborsRegressor()
MLE_mae = LOO(X, Y, model)
print('KNN MAE: %.5f' % MLE_mae, end='\n\n')

print('Training Ridge...')
model = Ridge()
MLE_mae = LOO(X, Y, model)
print('Ridge MAE: %.5f' % MLE_mae, end='\n\n')

print('Training Kernel Ridge...')
model = KernelRidge()
MLE_mae = LOO(X, Y, model)
print('Kernel MAE: %.5f' % MLE_mae, end='\n\n')

print('Training Random Forest...')
model = RandomForestRegressor()
MLE_mae = LOO(X, Y, model)
print('Random Forest MAE: %.5f' % MLE_mae, end='\n\n')

print('Training MLP...')
model = MLPRegressor(max_iter=2000)
MLE_mae = LOO(X, Y, model)
print('MLP MAE: %.5f' % MLE_mae, end='\n\n')

Training MLE...
MLE MAE: 0.79183

Training KNN...
KNN MAE: 0.75766

Training Ridge...
Ridge MAE: 0.73436

Training Kernel Ridge...
Kernel MAE: 0.73358

Training Random Forest...
Random Forest MAE: 0.29440

Training MLP...
MLP MAE: 0.05167

