In [None]:
!nvidia-smi

#mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

#install keras and tensorflow 
!pip install keras==2.4.3
!pip install tensorflow==2.3.0

  
#import python packages
import os
import os.path
import tensorflow as tf
import scipy.io as sio
from tensorflow import keras
tf.keras.backend.clear_session()
import numpy as np
import time
from tensorflow.python.keras.layers import Input, Dense, Conv1D, Conv1DTranspose, MaxPool1D, UpSampling1D, concatenate,Flatten,Reshape,Cropping1D
from tensorflow.python.keras.models import Model
import matplotlib.pyplot as plt



In [None]:
#import time data
# time_data = np.loadtxt('/content/gdrive/MyDrive/analytic_training_data_seqpar/7/time_analytic.txt',delimiter=',')   #sequential
time_data = np.loadtxt('/content/gdrive/MyDrive/analytic_training_data/44/time_analytic.txt',delimiter=',')   #parallel

#import noise-free data
# curve_data = np.loadtxt('/content/gdrive/MyDrive/analytic_training_data_seqpar/7/clean_data_analytic.txt',delimiter=',')
curve_data = np.loadtxt('/content/gdrive/MyDrive/analytic_training_data/44/clean_data_analytic.txt',delimiter=',')

#import noisy data
# noisy_data = np.loadtxt('/content/gdrive/MyDrive/analytic_training_data_seqpar/7/noisy_data_analytic.txt',delimiter=',')
noisy_data = np.loadtxt('/content/gdrive/MyDrive/analytic_training_data/44/noisy_data_analytic.txt',delimiter=',')

# change axes for plotting later
curve_data = np.transpose([curve_data])
curve_data = np.swapaxes(curve_data, 0, -1)
time_data = np.transpose([time_data])
time_data = np.swapaxes(time_data, 0, -1)
noisy_data = np.transpose([noisy_data])
noisy_data = np.swapaxes(noisy_data, 0, -1)

#using np.squeeze to convert curve_data from 3 dimensional to 2 dimentional
curve_data_squeezed = np.squeeze(curve_data)
noisy_data_squeezed = np.squeeze(noisy_data)

#choose what amount of the data set to keep for validation
percent_validation = 0.1

#uses the above to explicitly set how many data sets in the validation set
amount_validation = round(curve_data.shape[1]*percent_validation)

#set the training input and output
#for noise removal, input will be noisy, output will be noise-free
training_input = noisy_data_squeezed[0:noisy_data_squeezed.shape[0]-amount_validation]
training_output = curve_data_squeezed[0:curve_data_squeezed.shape[0]-amount_validation]

#clear the model (from any previous usage)
tf.keras.backend.clear_session()  #found online https://www.tensorflow.org/api_docs/python/tf/keras/backend/clear_session

#assign the number of epochs
n_epochs = 3000

#assign dimensions of convolutional filter
s = 40
w = 40

#include biasing?
UB = 'False'   

#autoencoder convolutional neural network

#take an input of (40,1), this is the number of data points in each data set
input = Input((training_input.shape[1],1))

#convolution
conv1 = Conv1D(s,w,padding='same',activation=tf.nn.relu,use_bias=UB)(input)

#max-pooling
pool1 = MaxPool1D(2,padding='same')(conv1)

#convolution
conv2 = Conv1D(2*s,w,padding='same',activation=tf.nn.relu,use_bias=UB)(pool1)

#max-pooling
pool2 = MaxPool1D(2,padding='same')(conv2)

#deconvolution
deconv1 = Conv1DTranspose(2*s,w,padding='same',activation=tf.nn.relu,use_bias=UB)(pool2)

#concatenation
concat1 = concatenate([pool2,deconv1],axis=1)

#deconvolution
deconv2 = Conv1DTranspose(s,w,padding='same',activation=tf.nn.relu,use_bias=UB)(concat1)

#concatenation
concat2 = concatenate([pool1,deconv2],axis=1)

#set the final outcome as the output
output = Conv1D(1,w,padding='same',activation=tf.nn.relu,use_bias=UB)(concat2)

#create model
model=Model(input,output)

#show a summary of dimension manipulation
model.summary()

#configure the model for training use
model.compile(optimizer = keras.optimizers.Adam(learning_rate=0.00010), loss="mean_squared_error", metrics=["accuracy"])

#set a checkpoint location
checkpoint_path = '/content/gdrive/MyDrive/'
checkpoint_dir = os.path.dirname(checkpoint_path)

#early stopping?
#tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)

cp_callback = [tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1, save_freq=1000)]

#assign validation data
validation_clean = curve_data_squeezed[curve_data_squeezed.shape[0]-amount_validation:curve_data_squeezed.shape[0]] 
validation_noisy = noisy_data_squeezed[curve_data_squeezed.shape[0]-amount_validation:curve_data_squeezed.shape[0]]

validation_input = validation_noisy
validation_output = validation_clean

#train the network
history = model.fit(training_input, training_output, validation_data=(validation_input, validation_output), batch_size=32, epochs=n_epochs, callbacks=cp_callback) #change batch size to 32

model.save_weights(checkpoint_path.format(epoch=0))



In [None]:
#load checkpoint
model.load_weights(checkpoint_path)

#convert to 2 dimensional arrays
time_data = np.squeeze(time_data)
training_input = np.squeeze(training_input)

#initialise empty arrays
training_predict = []
training_predict_matrix = np.array([])

#set lengths of arrays
length_curve_data = curve_data.shape[1]
length_difference = length_curve_data-amount_validation

#stop error messages
plt.rcParams.update({'figure.max_open_warning': 0})

count = 0

#entire dataset
for n in range(length_curve_data):
  training_predict = np.squeeze(np.squeeze(model.predict(curve_data[:,n,:])))

  training_predict_matrix = np.append(training_predict_matrix, training_predict/max(training_predict))

#small amount of figures
for n in range(length_curve_data-(length_curve_data-10)):
  training_predict = np.squeeze(np.squeeze(model.predict(curve_data[:,n,:])))
  # plot the training input
  plt.plot1 = plt.figure()
  plt.plot(time_data,training_input[n,:])
  plt.title("Validation"+" at n= "+str(n))
  plt.savefig("/content/gdrive/MyDrive/validation"+str(n))

  #plot the training output
  plt.plot2 = plt.figure()
  plt.plot(time_data,training_predict/max(training_predict))
  plt.title("Prediction"+" at n= "+str(n))
  plt.savefig("/content/gdrive/MyDrive/prediction"+str(n))

  plt.plot3 = plt.figure()
  plt.plot(time_data,training_output[n,:]/np.amax(training_output[n,:]))
  plt.title("Truth"+" at n= "+str(n))
  plt.savefig("/content/gdrive/MyDrive/expected"+str(n))

  plt.plot4 = plt.figure()
  plt.plot(time_data,training_predict/max(training_predict)-training_output[n,:]/np.amax(training_output[n,:]))
  plt.title("Residual"+" at n= " +str(n))
  plt.savefig("/content/gdrive/MyDrive/residual"+str(n))

  plt.plot5 = plt.figure()
  plt.plot(time_data,training_output[n,:]/np.amax(training_output[n,:]), label="Truth")
  plt.plot(time_data,training_predict/max(training_predict), label="Prediction")
  plt.title("Prediction Plotted on top of Truth")
  plt.legend()
  plt.savefig("/content/gdrive/MyDrive/preditionexpected"+str(n))

#investigate loss
plt.plot6 = plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.ylim([1e-5, 1e-3])

np.savetxt("/content/gdrive/MyDrive/training_input.txt", training_input)
np.savetxt("/content/gdrive/MyDrive/training_output.txt", training_output)
np.savetxt("/content/gdrive/MyDrive/time_data.txt", time_data)
np.savetxt("/content/gdrive/MyDrive/training_predict_matrix.txt", training_predict_matrix)