In [None]:
# import used tools


import pathlib

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from scipy import stats
from scipy.stats import norm



print(tf.__version__)

In [None]:
# import the data for dim=8
# column ‘0’ is the target: robustness of coherence
# the rest columns are the matrix elements
# the filenames can be renamed in the matlab program which generates the data

dataset_path = 'realEntries8.csv'
dataset_path_1 = 'realEntries8-1.csv'
dataset_path_2 = 'realEntries8-2.csv'
dataset_path_3 = 'realEntries8-3.csv'
dataset_path_4 = 'realEntries8-4.csv'
dataset_path_5 = 'realEntries8-5.csv'
dataset_path_6 = 'small_coherence_8.csv'
dataset_path_7 = 'realEntriesPure8.csv'



column_names=list(range(65)) # coherence + dim * dim
column_names = [str(x) for x in column_names]
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)
raw_dataset_1 = pd.read_csv(dataset_path_1, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)
raw_dataset_2 = pd.read_csv(dataset_path_2, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)
raw_dataset_3 = pd.read_csv(dataset_path_3, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)
raw_dataset_4 = pd.read_csv(dataset_path_4, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)
raw_dataset_5 = pd.read_csv(dataset_path_5, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)
raw_dataset_6 = pd.read_csv(dataset_path_6, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)

raw_dataset_7 = pd.read_csv(dataset_path_7, names=column_names,
                      na_values = "?", comment='\t',
                      skipinitialspace=True)


raw_dataset = pd.concat([raw_dataset, raw_dataset_1, raw_dataset_2, raw_dataset_3, raw_dataset_4, raw_dataset_5, raw_dataset_6, raw_dataset_7],ignore_index=True)


dataset = raw_dataset.copy()
dataset=dataset.sample(frac=1.0).reset_index(drop=True)

dataset.head()
#dataset.shape

In [None]:
dataset.shape

In [None]:
# To check for bad values in the data and remove those rows.

dataset.isna().sum()
dataset = dataset.dropna()

In [None]:
# To split the data into a training set and a test set.

train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
# To check the size of the data.
test_dataset.shape

In [None]:
# To inspect the elements of the matrices in the training set.
train_stats = train_dataset.describe()
train_stats.pop("0")
train_stats = train_stats.transpose()
train_stats

In [None]:
# To separate the target from the dataset.
train_labels = train_dataset.pop('0')
test_labels = test_dataset.pop('0')

In [None]:
# To normalize the data.
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset).fillna(0)
normed_test_data = norm(test_dataset).fillna(0)

In [None]:
# To build the model.
def build_model():
  model = keras.Sequential([
    layers.Dense(256, activation='relu', input_shape=[len(train_dataset.keys())]),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),    
    layers.Dense(1)
  ])

  #optimizer = tf.keras.optimizers.RMSprop(0.01)
  optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [None]:
model = build_model()

In [None]:
model.summary()

In [None]:
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch
    
  plt.style.use('seaborn')  
    
  plt.figure(figsize=(4,3),dpi=120)
  plt.xlabel('Epoch')
  plt.ylabel('MAE')
  plt.plot(hist['epoch'], hist['mae'],
           label='Train Error', color='#00468C')
  plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Validation Error', color='#E95C4B')
  plt.ylim([0,0.14])
  plt.legend()
  #plt.grid(True)
  plt.savefig('d8MAE.eps',bbox_inches='tight')

  plt.figure(figsize=(4,3),dpi=120)
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error',color='#00468C')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error',color='#E95C4B')
  plt.ylim([0,0.3])
  plt.legend()
  plt.grid(True)
  #plt.tight_layout()
  plt.show()


In [None]:
# To train the model

class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')
    
EPOCHS = 1000

model = build_model()

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()])

plot_history(history)

In [None]:
# To check the condition of the test set.


loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)

print("Testing set Mean Abs Error: {:5.2f}".format(mae))

In [None]:
# check the performance of prediction


test_predictions = model.predict(normed_test_data).flatten()


r2 = round(1 - sum((test_labels - test_predictions) ** 2)/sum((test_labels - np.mean(test_labels)) ** 2),4)

plt.figure(figsize=(4,4),dpi=120)
plt.style.use('classic')
plt.text(1.5,0.75, "d = 4",fontsize=16, fontweight='bold',fontfamily='Times New Roman')
plt.text(1.5,0.5,"R$^2$ = " + str(r2) + "",fontsize=16, fontweight='bold',fontfamily='Times New Roman')
plt.scatter(test_labels[:1000], test_predictions[:1000],
            s=20,c='none',edgecolor=(20/255,81/255,124/255),marker="o",alpha=1,linewidth=0.3)
plt.xlabel('Actual ROC')
plt.ylabel('Predicted ROC')
plt.axis('equal')
plt.axis('square')
plt.xlim([-0.,4.])
plt.ylim([-0.,4.])
plt.plot([0, 10.05], [0, 10],c='r',lw=1.5,alpha=1)
plt.xticks(fontsize=12, fontname='Times New Roman') 
plt.yticks(fontsize=12, fontname='Times New Roman')  


plt.savefig('d8.eps',bbox_inches='tight')
plt.show()

In [None]:
# save the model

model.save('d8.keras')

In [None]:
# load the model

model = tf.keras.models.load_model('d8.keras')