# Imports

In [1]:
#Required packages to run the code
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import signal
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split
import keras
from keras.callbacks import EarlyStopping
from sklearn.mixture import GaussianMixture
from keras.models import *
from keras.layers import *
import sklearn
from sklearn import svm
import pickle
import seaborn as sns
import geopandas
import random
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.manifold import TSNE
%store -r
%matplotlib tk

Unable to restore variable 'history_100_125', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_100_25', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_100_375', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_100_50', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_100_625', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_100_75', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_100_875', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_150_125', ignoring (use %store -d to forget!)
The error was: <class 'KeyError'>
Unable to restore variable 'history_150_25', ignoring (use %store -d to for

# Save and load Models

In [2]:
#Save or load a trainned CNN model
def save_model_cnn(model,name):
    model.save(name)

def load_model_cnn(name):
    return keras.models.load_model(name)

def save_model_svm(name,model):
    pickle.dump(model, open(name, 'wb'))
    
def load_model_svm(name):
    return pickle.load(open(name, 'rb'))

# Gen A and Graph

In [3]:
def get_adjacency(sz,p,undirected):
    '''
    Generates a realization of an Erdős–Rényi Graph Model, undirected or directed.
    -First generates of matrix of random floating point numbers in the range [0.0, 1.0].
    -If those values are <=p then there is no edge between pairs
    -Makes the matrix symmetric if the graoh is undirected

        Parameters:
                sz (int): Number of nodes
                p (int): Probability of existing an edge between each pair of nodes

        Returns:
                adj (2darray): Adjacency matrix
    '''
    adj = np.random.random((sz, sz)) <= p
    adj = np.triu(adj.astype(int))
    np.fill_diagonal(adj,0)
    if(undirected):
        adj = adj + adj.T
    return adj

def get_A(adj,c,rho):
    '''
    Generates the connectivity matrix (interaction weights) from the adjacency matrix according to the laplacian rule

        Parameters:
                adj (2darray): Adjacency matrix
                c,rho (int): Numbers between 0 and 1, to make the spectral radius < 1

        Returns:
                A (2darray): Connectivity matrix
    '''    
    sz = len(adj)
    Dvec = np.sum(adj, axis=1)
    Dmax = np.max(Dvec)
    ccc = c*1/Dmax
    D = np.diag(Dvec)
    L = D - adj
    Ap = np.eye(sz) - ccc*L
    A = rho * Ap
    return A

# Noise

In [4]:
def generate_noise(N, n_samples, alpha, beta):
    '''
        y(n + 1) = alpha * x1(n+1) + beta * 1*1.T * X2(n+1)

        Parameters:
                N (int): number of nodes
                n_samples (int): number of samples
                alpha (float): Standard Deviation of noise X1
                Beta (float): Standard Deviation of noise X2

        Returns:
                z (2darray): Time series data of the graph
    ''' 
    
    ones = np.ones((N,N)) * beta/np.sqrt(N)
    
    z = np.zeros((n_samples, N))
    
    for i in range(n_samples):
        x1 = np.random.normal(size=(1,N))
        x2 = np.random.normal(size=(1,N))
        
        z[i,:] = alpha * x1 + np.matmul(x2,ones)
        
    return z

# Non-Linear

In [5]:
def nonlinear_fun(x):
    '''
    Transforms the given input according to the following function
            
            Parameters:
                x (int): Value to be transformed
                
            Returns:
                x (int): Transformed x value
    ''' 
    return np.arctan(x)

def gen_time_series_non_linear(A,tsize,x0,noise):
    '''
    Generates the syntetic time series data given the connectivity matrix and the initial condiction x(0), 
    according to the dynnamical rule y(n + 1) = Ay(n) + x(n + 1)

        Parameters:
                A (2darray): Connectivity matrix
                tsize (int): Time series size - number of samples
                x0 (int): Initial condition x(0), in this case is zero
                qsi (int): Noise standart deviation 

        Returns:
                x (2darray): Time series data of the graph
    ''' 
    sz = len(A)
    x = np.zeros((tsize,sz))
    x[0,:] = np.ones((1,sz))*x0
    for i in range(1,tsize):
        for j in range(sz):
            nxt = np.dot(A[j,:],x[i-1,:]) + noise[i,:]
            x[i,:] = nonlinear_fun(nxt)
    return x

# Create Dataset

In [6]:
def get_rs(time_series,k):
    '''
    Compute k Features for the nodes 1 and 2

        Parameters:
            t_1 (2darray): time series of node 1
            t_2 (2darray): time series of node 2
            k (int): number of features (power of y(m+1))
        
        Returns:
            r1s (3darray): new R1's for non linear dynamical systems
            r0s (2darray): R0's of the non linear dynamical system
    '''

    sz1,sz2 = time_series.shape

    r1s = np.zeros((k,sz2,sz2))

    aux1 = time_series[0:(sz1-1),:]
    aux2 = time_series[1:(sz1),:]

    r0s = np.matmul(time_series.T,time_series)
    r0s_inverted = np.linalg.inv(r0s)

    for i in range(k):
        aux = np.matmul(np.power(aux2.T,i),aux1)

        r1s[i,:,:] = aux

    return r0s_inverted,r1s

def create_dataset(sz,tsize,undirected,A,time_series,n_features):
    '''
    Generates the synthectic data, extracts the features and returns the tranning/testing dataset

        Parameters:
                sz (int): Number of nodes
                p (int): Probability of existing an edge between each pair of nodes
                c,rho (int): Numbers between 0 and 1, to make the spectral radius < 1  
                tsize (int): Time series size - number of samples
                x0 (int): Initial condition x(0), in this case is zero
                qsi (int): Noise standart deviation 
                A (2darray): Grown-Truth matrix A

        Returns:
                data (2darray): Matrix containing the feature-vectors between each pair of nodes
                target (1darray): Ground-truth - pairs are connected or disconnected
    '''

    #Is the graph undirected or directed
    if(undirected):
        
        #Create data structures
        upper = int(sz*(sz-1)/2)  #Number of elements in the upper matrix
        data = np.zeros((n_features,upper))
        target = np.zeros((1,upper))
        
        #Goes through each pair (of the upper matrix) and computes the non linear time laged matrix (excludes diagonal)
        counter = 0

        r0s_inverted,r1s = get_rs(time_series,n_features)

        for i in range(sz):
            for j in range(i+1,sz):
                #Extracts the first negative and positive lags
                data[:,counter] = r1s[:,i,j] * r0s_inverted[i,j]
                #Saves the data
                target[0,counter] = A[i,j]
                counter += 1
    return data,target

# Train CNN

In [7]:
def train_model(X_train,y_train,n_features):
    cb = EarlyStopping(monitor='val_loss', mode='min',patience=7)

    #CNN architecture
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, strides=2,activation='relu', input_shape=(n_features,1)))
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dropout(0.2))
    model.add(Dense(200, activation='tanh'))
    model.add(Dense(100, activation='tanh'))
    model.add(Dense(1,activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    
    #Save the rmsesparse_categorical_crossentropy
    history = model.fit(X_train, y_train, epochs=200, validation_split=0.1, callbacks=[cb], verbose = 0)
    return model

def train_cnn_new_model(X_train,y_train,n_features):
    cb = EarlyStopping(monitor='val_loss', mode='min',patience=20)

    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(n_features, 1)))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam',loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),metrics=['accuracy'])

    history = model.fit(X_train, y_train, epochs=200, validation_split=0.1, callbacks=[cb], verbose = 0)
    return model

# Parameters and Gen Data

In [38]:
#Parameters
sz = 40     #Number of nodes
p = 0.50  #Probability of nodes being connected (Erdős–Rényi)
c = 0.9
rho = 0.75

#Define the range of noise variance
alpha = 0.1
beta = 0
tsize = 1000000    #Number of samples (time series size)
x0 = 0            #Initial condition
k = 500

undirected = True #graph undirected or not

adj = get_adjacency(sz,p,undirected)

A = get_A(adj,c,rho)

noise = generate_noise(sz,tsize,alpha,beta)

#get time series with diagonal noise
time_series = gen_time_series_non_linear(A,tsize,x0,noise)

data,target = create_dataset(sz,tsize,undirected,A,time_series,k)

y = target>0
y = y.astype(int)

%store data
%store y

Stored 'data' (ndarray)
Stored 'y' (ndarray)


In [8]:
data_cnn_4 = data[147:151,:]

data_cnn_10 = data[144:154,:]

data_cnn_24 = data[137:161,:]

data_cnn_30 = data[134:164,:]

data_cnn_40 = data[129:169,:]

data_cnn_50 = data[224:274,:]

data_cnn_60 = data[119:179,:]

data_cnn_70 = data[114:184,:]

data_cnn_80 = data[109:189,:]

data_cnn_90 = data[104:194,:]

data_cnn_100 = data[199:299,:]

data_cnn_150 = data[174:324,:]

data_cnn_200 = data[149:349,:]

data_cnn_250 = data[124:374,:]

data_cnn_300 = data[99:399,:]

data_cnn_350 = data[74:424,:]

data_cnn_400 = data[49:449,:]

data_cnn_450 = data[24:474,:]

data_cnn_500 = data

# TSNE

In [21]:
idx_c = y[0] > 0
idx_d = y[0] < 1

X_tsne_c = TSNE(learning_rate=100).fit_transform(data.T[idx_c,:])
X_tsne_d = TSNE(learning_rate=100).fit_transform(data.T[idx_d,:])

plt.figure()
plt.scatter(X_tsne_c[:, 0], X_tsne_c[:, 1],c = 'green')
plt.scatter(X_tsne_d[:, 0], X_tsne_d[:, 1],c = 'red')
plt.title("Feature Reduction to 2d using t-SNE")
plt.legend(['Connected','Disconected'], loc = 4)

(780, 500)




<matplotlib.legend.Legend at 0x2521f8802b0>

# MI Between Features and A

In [40]:
ks = np.arange(0,k,1)

skb = SelectKBest(mutual_info_classif, k=1).fit(data.T,y.T.flatten())
mi = skb.scores_

plt.figure()
plt.plot(ks,mi)
plt.title("Mutual-Info between A and Non-Linear Features")
plt.xlabel("k values")
plt.ylabel("Mutual-Information")

Text(0, 0.5, 'Mutual-Information')

# Train CNN

## 50 features

In [10]:
nruns = 10
models_list = []
performance_list = []
for i in range(nruns):
    print("Run " + str(i+1))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_50.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,50)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_50features_0.50p_sergioArchitecture')

Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50




INFO:tensorflow:Assets written to: cnn_nonlinear_50features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_50features_0.50p_sergioArchitecture\assets


## 100 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_100.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,100)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_100features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49




INFO:tensorflow:Assets written to: cnn_nonlinear_100features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_100features_0.50p_sergioArchitecture\assets


## 150 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_150.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,150)
    
    models_list.append(model)

    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_150features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49




INFO:tensorflow:Assets written to: cnn_nonlinear_150features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_150features_0.50p_sergioArchitecture\assets


## 200 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_200.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,200)
    
    models_list.append(model)
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_200features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49




INFO:tensorflow:Assets written to: cnn_nonlinear_200features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_200features_0.50p_sergioArchitecture\assets


## 250 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_250.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,250)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_250features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49




INFO:tensorflow:Assets written to: cnn_nonlinear_250features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_250features_0.50p_sergioArchitecture\assets


## 300 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_300.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,300)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_300features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49




INFO:tensorflow:Assets written to: cnn_nonlinear_300features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_300features_0.50p_sergioArchitecture\assets


## 350 Features

In [10]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_350.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,350)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_350features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9




INFO:tensorflow:Assets written to: cnn_nonlinear_350features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_350features_0.50p_sergioArchitecture\assets


## 400 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_400.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,400)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_400features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9




INFO:tensorflow:Assets written to: cnn_nonlinear_400features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_400features_0.50p_sergioArchitecture\assets


## 450 Features

In [9]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_450.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,450)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_450features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9




INFO:tensorflow:Assets written to: cnn_nonlinear_450features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_450features_0.50p_sergioArchitecture\assets


## 500 Features

In [10]:
nruns = 10
models_list = []
performance_list = []

for i in range(nruns):
    print("Run " + str(i))
    
    X_train, X_test, y_train, y_test = train_test_split(data_cnn_500.T,y.T, test_size=0.2, random_state=42)
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    #Train the model
    model = train_model(X_train,y_train,500)
    
    models_list.append(model)
    
    #Disconnected samples
    idd = y_test < 1
    idd = np.squeeze(idd)

    #Connected samples
    idc = y_test > 0
    idc = np.squeeze(idc)

    #Predict the samples seperaly
    dpred = model.predict(X_test[idd,:,:],verbose = 0)
    cpred = model.predict(X_test[idc,:,:],verbose = 0)
    
    threshold = (max(dpred) + min(cpred)) / 2
    
    trued = np.sum(dpred<threshold)
    truec = np.sum(cpred>threshold)
    true = trued + truec
    acc = true/(len(dpred)+len(cpred))*100
    
    performance_list.append(acc)
    
#Select the best model
idx = performance_list.index(max(performance_list))
best_model = models_list[idx]
save_model_cnn(best_model,'cnn_nonlinear_500features_0.50p_sergioArchitecture')

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9




INFO:tensorflow:Assets written to: cnn_nonlinear_500features_0.50p_sergioArchitecture\assets


INFO:tensorflow:Assets written to: cnn_nonlinear_500features_0.50p_sergioArchitecture\assets


# Test

## 50% Prob

In [23]:
#Parameters
t_samples = 100000
step = 5000
n_runs = 10

sz = 40     #Number of nodes
p = 0.50  #Probability of nodes being connected (Erdős–Rényi)
c = 0.9
rho = 0.75
undirected = True

#Define the range of noise variance
alpha = 0.1
beta = 0
x0 = 0            #Initial condition
k = 500

x = np.arange(step,t_samples,step)

s1 = "C:/Users/seabr/Desktop/investigação/Code/models/NonLinear/cnn_nonlinear_"
s2 = "features_0.50p_sergioArchitecture"

models = ["500"]
models_performance = np.zeros((len(models),len(x)))

count = 0
for i in models:
    models[count] = s1 + i + s2
    count +=1


count_2 = 0

for samples in x:
    print("Samples: " + str(samples))
    performance_list = np.zeros((len(models),n_runs))

    for i in range(n_runs):

        adj = get_adjacency(sz,p,undirected)

        A = get_A(adj,c,rho)

        noise = generate_noise(sz,samples,alpha,beta)

        #get time series with diagonal noise
        time_series = gen_time_series_non_linear(A,samples,x0,noise)

        data,target = create_dataset(sz,samples,undirected,A,time_series,k)

        data_cnn_50 = data[224:274,:]
        data_cnn_50 = data_cnn_50.T.reshape((data_cnn_50.T.shape[0], data_cnn_50.T.shape[1], 1))
        
        data_cnn_100 = data[199:299,:]
        data_cnn_100 = data_cnn_100.T.reshape((data_cnn_100.T.shape[0], data_cnn_100.T.shape[1], 1))

        data_cnn_150 = data[174:324,:]
        data_cnn_150 = data_cnn_150.T.reshape((data_cnn_150.T.shape[0], data_cnn_150.T.shape[1], 1))

        data_cnn_200 = data[149:349,:]
        data_cnn_200 = data_cnn_200.T.reshape((data_cnn_200.T.shape[0], data_cnn_200.T.shape[1], 1))

        data_cnn_250 = data[124:374,:]
        data_cnn_250 = data_cnn_250.T.reshape((data_cnn_250.T.shape[0], data_cnn_250.T.shape[1], 1))

        data_cnn_300 = data[99:399,:]
        data_cnn_300 = data_cnn_300.T.reshape((data_cnn_300.T.shape[0], data_cnn_300.T.shape[1], 1))

        data_cnn_350 = data[74:424,:]
        data_cnn_350 = data_cnn_350.T.reshape((data_cnn_350.T.shape[0], data_cnn_350.T.shape[1], 1))

        data_cnn_400 = data[49:449,:]
        data_cnn_400 = data_cnn_400.T.reshape((data_cnn_400.T.shape[0], data_cnn_400.T.shape[1], 1))

        data_cnn_450 = data[24:474,:]
        data_cnn_450 = data_cnn_450.T.reshape((data_cnn_450.T.shape[0], data_cnn_450.T.shape[1], 1))

        data_cnn_500 = data
        data_cnn_500 = data_cnn_500.reshape((data_cnn_500.T.shape[0], data_cnn_500.T.shape[1], 1))

        #datas = [data_cnn_50,data_cnn_100,data_cnn_150,data_cnn_200,data_cnn_250,data_cnn_300,data_cnn_350,data_cnn_400,data_cnn_450,data_cnn_500]
        datas = [data_cnn_500]

        y = target>0
        y = y.astype(int)

        #get predicts
        count = 0
        for m in models:
            model = load_model_cnn(m)

            pred = model.predict(datas[count],verbose = 0)

            y_pred = pred.T > np.mean(pred)
            y_pred = y_pred.astype(int)
            trues = np.sum(y == y_pred)
            acc = (trues/len(y_pred[0]))*100

            performance_list[count][i] = acc

            count += 1

    for j in range(len(models)):
        models_performance[j][count_2] = np.mean(performance_list[j])
    
    count_2 +=1

%store models_performance


Samples: 5000
Samples: 10000
Samples: 15000
Samples: 20000
Samples: 25000
Samples: 30000
Samples: 35000
Samples: 40000
Samples: 45000
Samples: 50000
Samples: 55000
Samples: 60000
Samples: 65000
Samples: 70000
Samples: 75000
Samples: 80000
Samples: 85000
Samples: 90000
Samples: 95000
Stored 'models_performance' (ndarray)


# PLOT

In [9]:
t_samples = 100000
step = 5000
x = np.arange(step,t_samples,step)

plt.figure()
plt.plot(x,models_performance[0])
plt.title("500 features results on non-linear system")
plt.ylabel('Accuracy')
plt.xlabel('Samples')
plt.legend(['500 features model'],loc = 4)

[[50.25641026 49.30769231 50.07692308 49.21794872 50.6025641  50.3974359
  49.64102564 48.98717949 49.67948718 50.15384615 49.64102564 49.96153846
  50.28205128 50.84615385 50.43589744 50.61538462 50.12820513 50.15384615
  50.03846154]]


<matplotlib.legend.Legend at 0x1dc9ec82e00>