In [1]:
import csv
import numpy as np

In [2]:
with open('input.csv', 'r') as csvfile:
    inputs = list(csv.reader(csvfile))
    inputs = [
        [float(num) for num in row]
        for row in inputs
        if '-999' not in row
    ]
inputs = np.array(inputs)

with open('output.csv', 'r') as csvfile:
    Y = list(csv.reader(csvfile))[1:] # Skip the first row
    Y = [
        [float(num) for num in row]
        for row in Y
    ]
Y = np.array(Y)

Get the depths with output values

In [3]:
# Output depths
out_depths = Y[:,0]
out_depths = [round(depth) for depth in out_depths]

# Input depths
in_depths = inputs[:,0]
in_depths = [round(depth) for depth in in_depths]

Y = Y[:,-1]
print(Y)

[1.53757734e+01 6.40763963e-01 2.90368342e+00 2.42913986e+00
 9.66461146e+01 3.64993559e+01 1.01580394e+03 1.42019440e+03
 3.04064301e+00 2.40163804e+00 1.77057279e+00 2.04758087e+00
 1.79422727e+00 1.05591821e+00 2.48570121e+00 1.60532316e+01
 3.53483894e+01]


In [4]:
X = []
inputs = inputs[:,1:]
# Map output values to inputs
for depth in out_depths:
    if depth in in_depths:
        # Map the inputs to outputs by depth; remove the first redundant column
        X.append(inputs[in_depths.index(depth)])
X = np.array(X)

In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
scaler.fit(inputs)
X = scaler.transform(X)
print(X)

[[-1.37691677  0.38663725 -0.59729995  0.19792547 -1.44899854]
 [-1.23954899  0.0137692  -0.92372995 -0.30620164 -1.01570437]
 [-1.10218122 -0.51426327 -0.92333393 -0.12101209  0.74106755]
 [-1.05341497 -0.12860334  1.45097117  0.02688235  1.54866783]
 [-0.7924162  -0.21233996 -1.02608271 -0.16988155 -1.3299499 ]
 [-0.7924162  -0.21233996 -1.02608271 -0.16988155 -1.3299499 ]
 [-0.7924162  -0.21233996 -1.02608271 -0.16988155 -1.3299499 ]
 [-0.7924162  -0.21233996 -1.02608271 -0.16988155 -1.3299499 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.78623533 -0.13521184 -0.05746864 -0.02841731 -1.0189219 ]
 [-0.77867942 -0.2122047  -0.95811155 -0.49139119 -0.99

In [6]:
from sklearn.neural_network import MLPRegressor

models = []
for hidden_size in range(1, 30):
    hidden_size = hidden_size * 10
    
    for lr in [0.1, 0.01, 0.001]:
        for alpha in [0.05, 0.1, 0.25, 0.5]:
            model = MLPRegressor(hidden_layer_sizes=(200,), 
                                 activation='relu', 
                                 solver='lbfgs',
                                 alpha=alpha,
                                 max_iter=500,
                                 learning_rate_init=0.01,
                                 tol=0,
                                 validation_fraction=0,
                                 verbose=False)
            model.fit(X, Y)
            score = model.score(X,Y)
            print('Testing with hidden_size=%s, lr=%s, alpha:%s, score:%s' % (hidden_size, lr, alpha, score))
            models.append({'model': model, 'score': score})

Testing with hidden_size=10, lr=0.1, alpha:0.05, score:0.4673237430881146
Testing with hidden_size=10, lr=0.1, alpha:0.1, score:0.467323737655628
Testing with hidden_size=10, lr=0.1, alpha:0.25, score:0.4673236933388202
Testing with hidden_size=10, lr=0.1, alpha:0.5, score:0.4673230842448915
Testing with hidden_size=10, lr=0.01, alpha:0.05, score:0.4673235850875412
Testing with hidden_size=10, lr=0.01, alpha:0.1, score:0.4673235511211222
Testing with hidden_size=10, lr=0.01, alpha:0.25, score:0.46732357130005847
Testing with hidden_size=10, lr=0.01, alpha:0.5, score:0.46732324002123315
Testing with hidden_size=10, lr=0.001, alpha:0.05, score:0.467323391959373
Testing with hidden_size=10, lr=0.001, alpha:0.1, score:0.4673204582778502
Testing with hidden_size=10, lr=0.001, alpha:0.25, score:0.467323415947455
Testing with hidden_size=10, lr=0.001, alpha:0.5, score:0.467322796052672
Testing with hidden_size=20, lr=0.1, alpha:0.05, score:0.46732346775624545
Testing with hidden_size=20, lr=0

Testing with hidden_size=100, lr=0.1, alpha:0.5, score:0.4673233075711987
Testing with hidden_size=100, lr=0.01, alpha:0.05, score:0.46732358044764416
Testing with hidden_size=100, lr=0.01, alpha:0.1, score:0.46732333635153156
Testing with hidden_size=100, lr=0.01, alpha:0.25, score:0.4673236790503158
Testing with hidden_size=100, lr=0.01, alpha:0.5, score:0.4673232228401401
Testing with hidden_size=100, lr=0.001, alpha:0.05, score:0.467323781296164
Testing with hidden_size=100, lr=0.001, alpha:0.1, score:0.46732360443781434
Testing with hidden_size=100, lr=0.001, alpha:0.25, score:0.4673235090457171
Testing with hidden_size=100, lr=0.001, alpha:0.5, score:0.46732316351714137
Testing with hidden_size=110, lr=0.1, alpha:0.05, score:0.46732180175064386
Testing with hidden_size=110, lr=0.1, alpha:0.1, score:0.4673233959428045
Testing with hidden_size=110, lr=0.1, alpha:0.25, score:0.46732322369639656
Testing with hidden_size=110, lr=0.1, alpha:0.5, score:0.46732339193277694
Testing with h

Testing with hidden_size=190, lr=0.1, alpha:0.5, score:0.46732325366166183
Testing with hidden_size=190, lr=0.01, alpha:0.05, score:0.46732375294904654
Testing with hidden_size=190, lr=0.01, alpha:0.1, score:0.46732329401660205
Testing with hidden_size=190, lr=0.01, alpha:0.25, score:0.4673233596820934
Testing with hidden_size=190, lr=0.01, alpha:0.5, score:0.4673233126687299
Testing with hidden_size=190, lr=0.001, alpha:0.05, score:0.4673228265166812
Testing with hidden_size=190, lr=0.001, alpha:0.1, score:0.46732368935462504
Testing with hidden_size=190, lr=0.001, alpha:0.25, score:0.4673230716983986
Testing with hidden_size=190, lr=0.001, alpha:0.5, score:0.4673230020553936
Testing with hidden_size=200, lr=0.1, alpha:0.05, score:0.46732349940766216
Testing with hidden_size=200, lr=0.1, alpha:0.1, score:0.4673170689016765
Testing with hidden_size=200, lr=0.1, alpha:0.25, score:0.4673231582208464
Testing with hidden_size=200, lr=0.1, alpha:0.5, score:0.46732166741543846
Testing with h

Testing with hidden_size=280, lr=0.01, alpha:0.05, score:0.4673236947384926
Testing with hidden_size=280, lr=0.01, alpha:0.1, score:0.46732367438731204
Testing with hidden_size=280, lr=0.01, alpha:0.25, score:0.46732359545893565
Testing with hidden_size=280, lr=0.01, alpha:0.5, score:0.4673231361663826
Testing with hidden_size=280, lr=0.001, alpha:0.05, score:0.46732015969426344
Testing with hidden_size=280, lr=0.001, alpha:0.1, score:0.467323755715352
Testing with hidden_size=280, lr=0.001, alpha:0.25, score:0.4673236425000703
Testing with hidden_size=280, lr=0.001, alpha:0.5, score:0.4673233787562963
Testing with hidden_size=290, lr=0.1, alpha:0.05, score:0.46732357860549323
Testing with hidden_size=290, lr=0.1, alpha:0.1, score:0.46732358223016984
Testing with hidden_size=290, lr=0.1, alpha:0.25, score:0.46732337806777285
Testing with hidden_size=290, lr=0.1, alpha:0.5, score:0.4673231983069377
Testing with hidden_size=290, lr=0.01, alpha:0.05, score:0.4673234817852579
Testing with 

In [7]:
# Best score
models = sorted(models, key=lambda x:-x['score'])
top_model = models[0]['model']
print(top_model)

MLPRegressor(activation='relu', alpha=0.05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(200,), learning_rate='constant',
       learning_rate_init=0.01, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0, validation_fraction=0,
       verbose=False, warm_start=False)


In [8]:
print(Y)

[1.53757734e+01 6.40763963e-01 2.90368342e+00 2.42913986e+00
 9.66461146e+01 3.64993559e+01 1.01580394e+03 1.42019440e+03
 3.04064301e+00 2.40163804e+00 1.77057279e+00 2.04758087e+00
 1.79422727e+00 1.05591821e+00 2.48570121e+00 1.60532316e+01
 3.53483894e+01]


In [9]:
print(top_model.predict(X))

[ 15.35504205   0.69941913   2.92042433   2.40759206 642.25166845
 642.25166845 642.25166845 642.25166845   2.04348229   2.04348229
   2.04348229   2.04348229   2.04348229   2.04348229   2.04348229
  25.73914646  25.73914646]


In [10]:
top_model.score(X, Y)

0.4673238006073291