In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from keras.optimizers import Adam

from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_absolute_error

In [2]:
params = {
    'epochs': 200,
    'test_size': 0.2,
    'random_state': 1337,
    'validation_split': 0.2
}

In [3]:
df = pd.read_csv("../data/qsc_out.random_scan_nfp2.csv")

In [4]:
df.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,y0,y1,y2,y3,y4,y5,y6
0,-0.104856,0.19779,0.00126,-0.011792,0.000794,0.00012,-0.68718,-0.215177,0.948834,0.264545,0.404154,0.631039,0.898928,1.581192,0.334374
1,-0.110385,0.188144,0.001381,-0.011358,0.000855,0.000138,-0.658897,-0.233486,0.930837,0.298531,0.469811,0.639501,0.876879,1.568947,0.336379
2,-0.095042,0.132051,0.000675,-0.008909,0.000477,0.000348,-0.671871,-0.944641,1.526347,0.385624,0.334515,0.46828,0.804239,1.288313,0.331086
3,-0.115912,-0.207162,0.001411,0.01206,0.000871,-0.000108,-0.736734,0.012462,0.783335,0.278748,0.497138,0.645087,0.926717,1.717088,0.338459
4,-0.09639,-0.217079,0.000933,0.010517,0.000621,-0.000126,-0.764525,-0.048433,0.925351,0.272636,0.548743,0.631384,0.941509,1.549327,0.331432


In [5]:
df.shape

(29674, 15)

In [6]:
x_columns = [col for col in df.columns if col.startswith('x')]
y_columns = [col for col in df.columns if col.startswith('y')]

## ACTUALLY SOLVING THE INVERSE PROBLEM
Y = df[x_columns].values
X = df[y_columns].values

In [7]:
def preprocess_data(X_train, X_test, Y_train, Y_test, params):
    scaler_x = StandardScaler().fit(X_train)
    scaler_y = StandardScaler().fit(Y_train)
    X_train = scaler_x.transform(X_train)
    X_test = scaler_x.transform(X_test)
    Y_train = scaler_y.transform(Y_train)
    Y_test = scaler_y.transform(Y_test)

    input_shape = X_train.shape[1]
    
    output_shape = Y_train.shape[1]
    return X_train, X_test, Y_train, Y_test, input_shape, output_shape, scaler_x, scaler_y

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=params['test_size'], 
                                                    random_state=params['random_state'])

X_train, X_test, Y_train, Y_test, input_shape, output_shape, scaler_x, scaler_y = preprocess_data(X_train, X_test, Y_train, Y_test, params)

In [8]:
X_train.mean(axis=0), X_train.std(axis=0) 

(array([-1.23324421e-14,  2.75938148e-14, -7.59591967e-15, -1.74703992e-14,
        -3.12392412e-14, -5.68256506e-14, -1.97065971e-13]),
 array([1., 1., 1., 1., 1., 1., 1.]))

## Appears to be drift, perhaps the sample is not big enough

In [9]:
X_test.mean(axis=0), X_test.std(axis=0) 

(array([ 0.002863  ,  0.0093879 , -0.00576779, -0.01150026, -0.01160122,
        -0.01466698, -0.0055547 ]),
 array([1.00800321, 1.00616058, 1.0009353 , 0.99290648, 0.99481008,
        1.00920012, 0.71155792]))

In [10]:
Y_train.mean(axis=0), Y_train.std(axis=0) 

(array([-1.86603895e-17, -2.39778990e-17, -4.12750044e-17, -8.97978632e-17,
         1.45237693e-17,  4.88584184e-17, -7.02959790e-15, -1.74744727e-15]),
 array([1., 1., 1., 1., 1., 1., 1., 1.]))

In [11]:
Y_test.mean(axis=0), Y_test.std(axis=0)

(array([ 0.01621399, -0.00112999,  0.01755134,  0.01955565,  0.01673425,
         0.00871709, -0.01095845, -0.02030808]),
 array([0.99488322, 0.99650294, 0.78686316, 0.96749519, 0.93053514,
        0.90833692, 1.01170468, 1.00410648]))

## Dummy regressor

In [12]:
from sklearn.dummy import DummyRegressor

In [13]:
regr = MultiOutputRegressor(DummyRegressor(strategy="mean")).fit(X_train, Y_train)
regr.predict(X_train)

array([[-2.22989316e-17, -3.83122584e-17,  1.19725807e-18, ...,
         4.54958068e-17, -7.02072135e-15, -1.75727554e-15],
       [-2.22989316e-17, -3.83122584e-17,  1.19725807e-18, ...,
         4.54958068e-17, -7.02072135e-15, -1.75727554e-15],
       [-2.22989316e-17, -3.83122584e-17,  1.19725807e-18, ...,
         4.54958068e-17, -7.02072135e-15, -1.75727554e-15],
       ...,
       [-2.22989316e-17, -3.83122584e-17,  1.19725807e-18, ...,
         4.54958068e-17, -7.02072135e-15, -1.75727554e-15],
       [-2.22989316e-17, -3.83122584e-17,  1.19725807e-18, ...,
         4.54958068e-17, -7.02072135e-15, -1.75727554e-15],
       [-2.22989316e-17, -3.83122584e-17,  1.19725807e-18, ...,
         4.54958068e-17, -7.02072135e-15, -1.75727554e-15]])

In [14]:
mean_absolute_error(Y_train, regr.predict(X_train))

0.7520506841719812

In [15]:
mean_absolute_error(Y_test, regr.predict(X_test))

0.7473980441803673

## Train a linear regression for debugging

In [16]:
regr = MultiOutputRegressor(Ridge(random_state=123)).fit(X_train, Y_train)
regr.predict(X_train)

array([[-3.55665513e-02, -2.79101710e-03, -3.97968237e-01, ...,
        -9.39772110e-03,  6.05940341e-01,  4.67702463e-01],
       [ 1.62366516e-02, -5.68281660e-03,  1.66705922e-01, ...,
        -1.90792653e-02, -2.11958735e-01, -8.31790141e-01],
       [ 1.21508354e-01, -8.92874878e-04,  2.18078322e-01, ...,
         8.26661821e-03,  2.45308151e-01,  2.67802098e-01],
       ...,
       [-1.51470886e-01, -7.14298286e-04,  1.08530067e-01, ...,
         1.08681843e-02, -2.90026348e-01,  1.83160605e-01],
       [ 1.23148773e-01, -3.68630926e-03,  4.82192951e-02, ...,
         4.07378792e-03,  1.40822159e-01, -9.40254972e-02],
       [-3.72500527e-02,  3.03308290e-03,  3.33386565e-01, ...,
         8.10580312e-03, -3.00204385e-01,  3.60992209e-01]])

In [17]:
mean_absolute_error(Y_train, regr.predict(X_train))

0.7285670975322824

In [18]:
mean_absolute_error(Y_test, regr.predict(X_test))

0.7245307881968837

## Simplest neural network

In [19]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [20]:
# Define Sequential model with 3 layers
model = keras.Sequential(
    [
        layers.Dense(input_shape, activation="relu", name="layer_in"),
        layers.Dense(128, activation="relu", name="layer2"),
        layers.Dense(64, activation="relu", name="layer3"),
        layers.Dense(output_shape, name="layer_out"),
    ]
)

model(X_train[0:1])

<tf.Tensor: shape=(1, 8), dtype=float32, numpy=
array([[ 0.1619258 ,  0.18810499, -0.2724566 ,  0.4558525 ,  0.15727292,
         0.12754606, -0.01106999,  0.1740163 ]], dtype=float32)>

In [21]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_in (Dense)            (1, 7)                    56        
                                                                 
 layer2 (Dense)              (1, 128)                  1024      
                                                                 
 layer3 (Dense)              (1, 64)                   8256      
                                                                 
 layer_out (Dense)           (1, 8)                    520       
                                                                 
Total params: 9856 (38.50 KB)
Trainable params: 9856 (38.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
model.compile(
    optimizer=keras.optimizers.RMSprop(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanAbsoluteError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanAbsoluteError()],
)



In [23]:
print("Fit model on training data")
history = model.fit(
    X_train,
    Y_train,
    batch_size=64,
    epochs=200,
    # We pass some validation for
    # monitoring validation loss and metrics
    # at the end of each epoch
    validation_data=(X_test, Y_test),
)

Fit model on training data
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200


Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200


Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200


Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200


Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [24]:
mean_absolute_error(Y_train, model.predict(X_train))



0.5412868607844716

In [25]:
mean_absolute_error(Y_test, model.predict(X_test))



0.5698212470333661