In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import signal
import sys
!{sys.executable} -m pip install scikit-dsp-comm
import sk_dsp_comm.sigsys as ss 
from sklearn.model_selection import train_test_split
import mcfly
import tensorflow as tf
import sys
import os
import pandas as pd
np.random.seed(20)

In [None]:
def make_patterns_ndim(
    number_of_datapoints
):
    
    X = np.zeros([number_of_datapoints,200,1])
    S = np.zeros(number_of_datapoints*200)
    Y = np.zeros([number_of_datapoints,2])
    t = np.linspace(0,200,200)
    x1 = ss.rect(t,50)
    x2 = ss.rect(t-37.5,25)
    x3 = 1 - ss.rect(t-25,50)

    for i in range(number_of_datapoints):

        if np.random.uniform(-1,1) >= 0:
            delay = np.random.randint(200)
            x = np.sin(2*np.pi*t/50)
            q = np.roll(x,delay)
            Y[i,0] = 1
            Y[i,1] = 0
        else:
            delay = np.random.randint(200)
            x4 = x3*np.sin(2*np.pi*t/50)
            x = x1 - x2 + x4
            q = np.roll(x,delay)
            Y[i,0] = 0
            Y[i,1] = 1
           
        S[i*200:(i+1)*200] = q

        X[i,:,0] = q

    return X, Y, S





In [None]:
X, y, S = make_patterns_ndim(number_of_datapoints=1000)

train_ratio = 0.70
validation_ratio = 0.1
test_ratio = 0.2

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 - train_ratio)

X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio))


In [None]:
t = np.linspace(0,200,200)
plt.plot(t,X[9,:,0])
plt.grid()
plt.xlabel('Time (s)')

In [None]:
models = mcfly.modelgen.generate_models(X_train.shape,
                                  number_of_output_dimensions=2,
                                  number_of_models = 4)

In [None]:
models_to_print = range(len(models))
for i, item in enumerate(models):
    if i in models_to_print:
        model, params, model_types = item
        print("-------------------------------------------------------------------------------------------------------")
        print("Model " + str(i), '\n')          
        print("Model type:", model_types, '\n')        
        print("Hyperparameters:")
        print(params)
        print(" ")
        print("Model description:")
        model.summary()        
        print(" ")

In [None]:
# Specify in which directory you want to store the data:
directory_to_extract_to = 'put here your path'
# Define directory where the results, e.g. json file, will be stored
resultpath = os.path.join(directory_to_extract_to, 'data\\models')
if not os.path.exists(resultpath):
        os.makedirs(resultpath)

In [None]:
outputfile = os.path.join(resultpath, 'modelcomparison.json')
histories, val_accuracies, val_losses = mcfly.find_architecture.train_models_on_samples(X_train, y_train,
                                                                           X_val, y_val,
                                                                           models,nr_epochs=5,
                                                                           subset_size=300,
                                                                           verbose=True,
                                                                           outputfile=outputfile)
print('Details of the training process were stored in ',outputfile)

In [None]:
metric = 'accuracy'
modelcomparisons = pd.DataFrame({'model':[str(params) for model, params, model_types in models],
                       'train_{}'.format(metric): [history.history[metric][-1] for history in histories],
                       'train_loss': [history.history['loss'][-1] for history in histories],
                       'val_{}'.format(metric): [history.history['val_{}'.format(metric)][-1] for history in histories],
                       'val_loss': [history.history['val_loss'][-1] for history in histories]
                       })
modelcomparisons.to_csv(os.path.join(resultpath, 'modelcomparisons.csv'))

modelcomparisons

In [None]:
best_model_index = np.argmax(val_accuracies)
best_model, best_params, best_model_types = models[best_model_index]
print('Model type and parameters of the best model:')
print(best_model_types)
print(best_params)

In [None]:
#We make a copy of the model, to start training from fresh
nr_epochs = 5
datasize = X_train.shape[0] #
history = best_model.fit(X_train[:datasize,:,:], y_train[:datasize,:],
              epochs=nr_epochs, validation_data=(X_val, y_val))

In [None]:
modelname = 'my_bestmodel.h5'
model_path = os.path.join(resultpath,modelname)

In [None]:
best_model.save(model_path)

In [None]:
model_reloaded = tf.keras.models.load_model(model_path)
np.all([np.all(x==y) for x,y in zip(best_model.get_weights(), model_reloaded.get_weights())])


In [None]:
## Inspect model predictions on validation data
datasize = X_val.shape[0]
probs = model_reloaded.predict(X_val[:datasize,:,:],batch_size=1)

In [None]:
#columns are predicted, rows are truth
labels = ['normal', 'anomaly']
#columns are predicted, rows are truth
predicted = probs.argmax(axis=1)
y_index = y_val.argmax(axis=1)
confusion_matrix = pd.crosstab(pd.Series(y_index), pd.Series(predicted))
confusion_matrix.index = [labels[i] for i in confusion_matrix.index]
confusion_matrix.columns = [labels[i] for i in confusion_matrix.columns]
confusion_matrix.reindex(columns=[l for l in labels], fill_value=0)
confusion_matrix


In [None]:
## Test on Testset
## Inspect model predictions on validation data
datasize = X_test.shape[0]
probs = model_reloaded.predict(X_test[:datasize,:,:],batch_size=1)
#columns are predicted, rows are truth
labels = ['normal', 'anomaly']
#columns are predicted, rows are truth
predicted = probs.argmax(axis=1)
y_index = y_test.argmax(axis=1)
confusion_matrix = pd.crosstab(pd.Series(y_index), pd.Series(predicted))
confusion_matrix.index = [labels[i] for i in confusion_matrix.index]
confusion_matrix.columns = [labels[i] for i in confusion_matrix.columns]
confusion_matrix.reindex(columns=[l for l in labels], fill_value=0)
confusion_matrix
