In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.insert(1, "../python/functions")
sys.path.insert(2, "../python/architecture")

In [3]:
from data_prep_functions import mnist_prep
from model_functions import *
from plotting_functions import *
import no_gpu
import reproducible




In [4]:
X_train, X_val, y_train, y_val = mnist_prep()

In [5]:
# use samller dataset for increased speed
X_train_small = X_train[:1000, :]
X_val_small = X_val[:500, :]
y_train_small = y_train[:1000]
y_val_small = y_val[:500]

n_cols = X_train_small.shape[1]

# EnKF

In [6]:
X_train = X_train_small
X_test = X_val_small
y_train = y_train_small
y_test = y_val_small

In [7]:
batch_size = 100
epochs = 5
particles = 10
early_stopping = 0.001
batch_normal = True # noch einbauen, aber Achtung mit den Dimensionen unten!!!
shuffle = True # noch einbauen!!!

In [8]:
layers = 5
neurons = [128, 128, 64, 32, 10]
n_cols = X_train.shape[1]

In [9]:
delta = 0.005
h_0 = 2
epsilon = 0.5

In [10]:
n = len(X_train)
num_batches = int(np.ceil(n / batch_size))
batch_indices = np.cumsum([0] + list(np.ones(num_batches) * batch_size))
batch_indices[-1] = n

In [11]:
X_batches = [X_train[int(batch_indices[i]):int(batch_indices[i+1])] for i in range(len(batch_indices)-1)]
y_batches = [y_train[int(batch_indices[i]):int(batch_indices[i+1])] for i in range(len(batch_indices)-1)]

In [12]:
model_dict = {}
weights_dict = {}
y_pred_dict = {}
jacobian_dict = {}
weights_vector_dict = {}

# init_model already has weights and biases following the Glorot distribution
# it can already be used to predict and evaluate, but it is very bad (<10% accuracy)
# only used to determine shapes and shape_elements via its weights
init_model = nn_model_structure(layers = layers,
                                neurons = neurons,
                                n_cols = n_cols)
init_model = nn_model_compile(init_model,
                              optimizer = "sgd")
weights = init_model.get_weights()
# shape contains the shapes of the weight matrices and bias vectors as a list of arrays
shapes = [np.array(params.shape) for params in weights]
# shape_elements contains the indices of the weights as a vector and tells where to cut
shape_elements = np.cumsum([0] + [np.prod(shape) for shape in shapes])

In [13]:
shapes

[array([784, 128]),
 array([128]),
 array([128, 128]),
 array([128]),
 array([128,  64]),
 array([64]),
 array([64, 32]),
 array([32]),
 array([32, 10]),
 array([10])]

In [14]:
weights[6]

array([[-0.03291825, -0.20898242,  0.07943602, ...,  0.04385055,
        -0.09227242,  0.03294281],
       [-0.18527476, -0.3060454 , -0.17189379, ...,  0.24283098,
        -0.08014601, -0.05802896],
       [ 0.14792517, -0.08376493,  0.07699804, ..., -0.03988482,
         0.02643473, -0.01785932],
       ...,
       [-0.02243756,  0.07744864,  0.04440521, ..., -0.23079783,
         0.19677767,  0.10716619],
       [ 0.14128311,  0.03734275,  0.09165913, ...,  0.2800443 ,
        -0.3229967 ,  0.18782766],
       [ 0.23849224,  0.09487124,  0.10161119, ..., -0.10860008,
        -0.10969386,  0.32146764]], dtype=float32)

In [15]:
for i in range(particles):
    # just an initial model with the correct structure regarding neurons, layers, activation functions, Glorot initialization
    model = nn_model_structure(layers = layers,
                               neurons = neurons,
                               n_cols = n_cols)
    model = nn_model_compile(model,
                             optimizer = "sgd")
    # for every particle write the model in a dictionary
    model_dict["model_{}".format(str(i+1))] = model
    
    # for every particles write the weights and biases in a dictionary
    weights_dict["model_{}".format(str(i+1))] = model_dict["model_{}".format(str(i+1))]\
                                                    .get_weights()

In [16]:
print(model_dict["model_1"].get_weights())

[array([[ 0.05316269,  0.01764145,  0.01522263, ...,  0.07493449,
        -0.0201476 ,  0.09051652],
       [-0.00516469,  0.07823637,  0.02811813, ..., -0.0174622 ,
        -0.02144132, -0.02854406],
       [-0.04033467,  0.00961152,  0.03930797, ..., -0.0174436 ,
         0.03845177, -0.03893438],
       ...,
       [ 0.04866969,  0.05048861, -0.01158144, ..., -0.03740012,
         0.01336047, -0.01347332],
       [ 0.01840364, -0.02679037,  0.03217081, ..., -0.0442641 ,
        -0.0488762 , -0.01088821],
       [-0.07488225, -0.07203993, -0.02490162, ...,  0.01187966,
         0.01317079,  0.01347652]], dtype=float32), array([-0.02369975,  0.033563  ,  0.06631573,  0.11554006, -0.05914802,
        0.08650133, -0.04362187,  0.11998139,  0.02238612,  0.05546513,
        0.07225065, -0.05853405, -0.02861994, -0.044171  ,  0.06349412,
       -0.18844545, -0.03606726, -0.01002328,  0.09155541,  0.18229206,
        0.20059158,  0.04867344, -0.15830076,  0.11646345, -0.02610004,
        0.

In [17]:
print(weights_dict["model_1"])

[array([[ 0.05316269,  0.01764145,  0.01522263, ...,  0.07493449,
        -0.0201476 ,  0.09051652],
       [-0.00516469,  0.07823637,  0.02811813, ..., -0.0174622 ,
        -0.02144132, -0.02854406],
       [-0.04033467,  0.00961152,  0.03930797, ..., -0.0174436 ,
         0.03845177, -0.03893438],
       ...,
       [ 0.04866969,  0.05048861, -0.01158144, ..., -0.03740012,
         0.01336047, -0.01347332],
       [ 0.01840364, -0.02679037,  0.03217081, ..., -0.0442641 ,
        -0.0488762 , -0.01088821],
       [-0.07488225, -0.07203993, -0.02490162, ...,  0.01187966,
         0.01317079,  0.01347652]], dtype=float32), array([-0.02369975,  0.033563  ,  0.06631573,  0.11554006, -0.05914802,
        0.08650133, -0.04362187,  0.11998139,  0.02238612,  0.05546513,
        0.07225065, -0.05853405, -0.02861994, -0.044171  ,  0.06349412,
       -0.18844545, -0.03606726, -0.01002328,  0.09155541,  0.18229206,
        0.20059158,  0.04867344, -0.15830076,  0.11646345, -0.02610004,
        0.

In [18]:
for i in range(1):
    print(model_dict["model_{}".format(str(i+1))].evaluate(X_val_small, y_val_small)[1])

0.09000000357627869


In [19]:
len(X_batches)

10

bei b=6 geht was schief -> vorher steigen die Accuracies (zumindest für manche Partikel), dann wieder die 8,4 %

da wird der Loss zu groß und dadurch kommen nan

da werden die Gewichte und Bias zu groß

kleineres $h_t$ hilft (Gewichte und Bias werden langsamer groß), aber Accuracy steigt weniger (vllt auch nur langsamer)

#### Lösungsidee: 1) Batch Normalization, 2) (muss sowieso auch noch gemacht werden) Mittelwerte der Partikel

### Nach dem ersten Schleifendurchlauf erhalten wir nicht mehr die Wahrscheinlichkeiten, sondern nur noch einen 0-1-Vektor.

In [20]:
for b in range(10):    
    for i in range(particles):
        # for every particle write the predictions on the training batches in a dictionary
        y_pred_dict["model_{}".format(str(i+1))] = model_dict["model_{}".format(str(i+1))]\
                                                        .predict(X_batches[b])

        # for every particle write the Jacobian in a dictionary
        jacobian_dict["model_{}".format(str(i+1))] = (-1) * np.multiply(np.array(y_batches[b]), 
                                                                        np.array(1 / (y_pred_dict["model_{}".format(str(i+1))] + delta)))
    #print(y_pred_dict["model_1"])
    # bis hier hin alles gut     
    # compute the mean of the predictions
    y_pred_mean = np.mean(list(y_pred_dict.values()), axis = 0)
    # bis hier ok
    # compute the matrix D elementwise
    d = np.zeros(shape = (particles, particles))
    for k in range(particles):
        y_pred_centered = y_pred_dict["model_{}".format(str(k+1))] - y_pred_mean
        for j in range(particles):
            d[k][j] = np.sum(np.multiply(y_pred_centered, jacobian_dict["model_{}".format(str(j+1))]))
                                    # d sieht recht einfach aus, aber das wird wohl stimmen
    print(d)
    # compute the scalar h_t
    h_t = h_0 / (np.sqrt(np.sum(d**2)) + epsilon)
    print(h_t)
    # bis hier wohl ok
    # Reshape the weights and biases so that they are no longer matrices and vectores, but now one single vector
    for i in range(particles):
        weights_array = np.array([])
        for j in range(len(weights_dict["model_{}".format(str(i+1))])):
            weights_array = np.append(weights_array, np.reshape(weights_dict["model_{}".format(str(i+1))][j], (1, -1)).ravel())
        weights_vector_dict["model_{}".format(str(i+1))] = weights_array
    # der Teil ist jetzt gedebugged    
    # matrix with particle parameters as row vectors
    weights_all_ptcls = np.array(list(weights_vector_dict.values()))

    # compute the matrix with the updates for each particle
    weights_all_ptcls = weights_all_ptcls - h_t * np.matmul(d, weights_all_ptcls)

    for i in range(particles):
        # write the updates back into the dictionary
        weights_vector_dict["model_{}".format(str(i+1))] = weights_all_ptcls[i]
        # reshape the updates, so that they are of the original matrx and vector shape
        for l in range(len(shape_elements)-1):
            start = shape_elements[l]
            end = shape_elements[l+1]
            weights_dict["model_{}".format(str(i+1))][l] = np.reshape(weights_vector_dict["model_{}".format(str(i+1))][start:end], tuple(shapes[l]))
        # set new weights for model
        model_dict["model_{}".format(str(i+1))].set_weights(weights_dict["model_{}".format(str(i+1))])

[[  7.05965137  -1.44345415   1.21363175   2.56430531  -2.22411537
   -2.39497185  -1.070292    -6.72883701  -2.48137069   3.85155702]
 [  3.97607327  11.7235527    4.20490885   1.53150916   7.77554512
    4.04066896   5.21844006  11.82972813   6.46336555   0.93292797]
 [  2.73013878   0.99683487   6.16775227   3.56657839  -2.27708817
    2.24420023   1.11264026   2.09004378   2.5626204    2.39438558]
 [  3.00444555  -3.98108625   0.8612324    8.13779259  -2.39688563
   -4.16340351   0.52769005 -14.5675106    0.99047017   3.11611414]
 [  2.35826468   4.66225052   0.22539949   1.48329949  15.06003189
    3.85338354   1.53232479   1.3410902    1.56644535   1.6479938 ]
 [ -3.21744108  -1.08049726  -1.79644895  -4.92745972   1.20095086
    8.27916241  -1.21106625   5.36889219  -1.71267414  -4.26534748]
 [ -2.01967812   0.30137306  -1.73136902   0.70015967  -1.06893718
   -0.39008796   3.8077879   -0.73828328   1.00964153  -2.40166616]
 [ -9.34454727   2.14297533  -4.56899405 -10.87569904  

[[ 2.24739027e+00 -1.46898615e+00 -2.56501412e+00 -1.74057674e+00
  -1.63877356e+00 -2.90112829e+00 -2.30663633e+00 -2.58515573e+00
   6.33011103e-01 -1.73822540e+02]
 [ 1.55652738e+00  9.19865966e-01 -4.73730206e-01  1.52875423e-01
   1.07559681e-01 -4.30520654e-01 -2.92351246e-02 -5.03555417e-01
   1.52038908e+00 -9.78535461e+01]
 [ 2.48993307e-01 -7.35921383e-01 -9.87249136e-01 -6.83503211e-01
  -1.13855422e+00 -1.09933066e+00 -9.45007920e-01 -1.18840241e+00
   3.16778064e-01 -7.28926010e+01]
 [ 1.09493041e+00 -7.56896734e-02 -6.44290328e-01  5.81355095e-02
  -3.65165591e-01 -3.42888176e-01 -2.59173930e-01 -5.95651627e-01
   1.22303486e+00 -7.39274216e+01]
 [ 1.79028487e+00  6.22461438e-01 -4.09267426e-01  3.32337618e-01
   5.44129729e-01 -3.65155697e-01  4.99888659e-02 -1.05294347e-01
   1.85866833e+00 -8.40365143e+01]
 [ 2.44435120e+00  1.56291175e+00  1.17943645e+00  1.88088107e+00
   1.21546495e+00  2.53026867e+00  1.69511330e+00  1.00336289e+00
   2.31050539e+00 -3.06390610e+01

In [21]:
model_dict["model_1"].layers[4].output

<tf.Tensor 'dense_4/Identity:0' shape=(None, 10) dtype=float32>

In [22]:
for i in range(particles):
    print(model_dict["model_{}".format(str(i+1))].evaluate(X_val_small, y_val_small)[1])

0.09000000357627869
0.09000000357627869
0.09000000357627869
0.09000000357627869
0.09000000357627869
0.1340000033378601
0.09000000357627869
0.09399999678134918
0.14399999380111694
0.05999999865889549


In [23]:
model_dict

{'model_1': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae7a89248>,
 'model_2': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae7abc0c8>,
 'model_3': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae7aec348>,
 'model_4': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae7b1a588>,
 'model_5': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae7b4c708>,
 'model_6': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae8b4b9c8>,
 'model_7': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae8b7ec88>,
 'model_8': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae8badec8>,
 'model_9': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae8be41c8>,
 'model_10': <tensorflow.python.keras.engine.sequential.Sequential at 0x24ae8c125c8>}

In [24]:
model_dict["model_1"].get_weights()[8][:,0]

array([ 0.12591217,  0.01057448,  0.21442693,  0.03240171,  0.07029884,
       -0.251279  ,  0.1538534 , -0.00100886, -0.1178964 , -0.00651607,
        0.12146336,  0.12653674,  0.00696334,  0.04561593,  0.00995865,
       -0.12705459, -0.04315743, -0.10660747, -0.04043254, -0.02224591,
        0.01094058,  0.02111131, -0.00282068,  0.07026877,  0.14894909,
       -0.13326827,  0.00809106, -0.15563634, -0.01804565,  0.02865029,
       -0.10181283,  0.06930839], dtype=float32)

In [25]:
model_dict["model_1"].get_weights()[9][0]

-0.20680207

In [26]:
mean_weights = list(np.mean(list(weights_dict.values()), axis = 0))
init_model.set_weights(mean_weights)

In [27]:
init_model.evaluate(X_test, y_test)[1]



0.09000000357627869

In [28]:
y_pred_dict["model_1"]

array([[0.10271356, 0.09694748, 0.10435107, 0.0987933 , 0.08917125,
        0.1104252 , 0.1160948 , 0.08989395, 0.09472495, 0.0968845 ],
       [0.10266121, 0.09695563, 0.10442731, 0.09872796, 0.08914363,
        0.11043162, 0.11601891, 0.08997165, 0.09475704, 0.09690497],
       [0.10268528, 0.0970133 , 0.10437334, 0.09871385, 0.08917442,
        0.11045294, 0.1160096 , 0.08996806, 0.09476309, 0.09684609],
       [0.10265462, 0.09698213, 0.10441468, 0.09873136, 0.08911644,
        0.1104462 , 0.11605607, 0.08990762, 0.09475164, 0.09693924],
       [0.10272592, 0.09695638, 0.10443249, 0.09877756, 0.08915027,
        0.1104805 , 0.1160489 , 0.08981936, 0.09471747, 0.09689111],
       [0.10265537, 0.09699152, 0.10443045, 0.09873138, 0.08910485,
        0.11043829, 0.11603524, 0.08990853, 0.09476124, 0.09694318],
       [0.10267518, 0.09696589, 0.10441116, 0.09874541, 0.08916063,
        0.11045223, 0.11601214, 0.08991739, 0.09474778, 0.09691209],
       [0.10271488, 0.09703455, 0.1043732

In [29]:
jacobian_dict["model_1"]

array([[ -0.       ,  -0.       ,  -0.       ,  -0.       ,  -0.       ,
         -0.       ,  -0.       ,  -0.       ,  -0.       ,  -9.815036 ],
       [ -0.       ,  -0.       ,  -0.       ,  -0.       ,  -0.       ,
         -0.       ,  -8.263171 ,  -0.       ,  -0.       ,  -0.       ],
       [ -0.       ,  -0.       ,  -0.       ,  -0.       ,  -0.       ,
         -0.       ,  -0.       ,  -0.       ,  -0.       ,  -9.818737 ],
       [ -9.288964 ,  -0.       ,  -0.       ,  -0.       ,  -0.       ,
         -0.       ,  -0.       ,  -0.       ,  -0.       ,  -0.       ],
       [ -0.       ,  -0.       ,  -0.       ,  -0.       ,  -0.       ,
         -0.       ,  -0.       , -10.546369 ,  -0.       ,  -0.       ],
       [ -0.       ,  -9.804737 ,  -0.       ,  -0.       ,  -0.       ,
         -0.       ,  -0.       ,  -0.       ,  -0.       ,  -0.       ],
       [ -0.       ,  -0.       ,  -0.       ,  -0.       , -10.62015  ,
         -0.       ,  -0.       ,  -0.       