# Data analysis of the results from the regular graph network

This notebook generates the plots and analyses the results from the direct memory and 3-bit parity tasks performed on the regular graph network defined in "Reservoir network training" notebook.

In [None]:
#import packages

import sklearn as skl
import matplotlib.pyplot as plt
import numpy as np
import sys
from scipy import stats
from scipy.sparse import csr_matrix
import warnings
warnings.filterwarnings('ignore')

from scipy.io import loadmat

import math
import random

# 1. Define the parameters and helper functions

Choose the parameters of the given simulation you want to study and define functions for plotting the results and quantifying their significance. The results of the simulations are generated in the "Reservoir network training" notebook.

### 1.1 Define the parameters

In [None]:
#parameters to look into (choose from the previously run simulations)

K = 8
n_spins = 300
n_tau = 9
tau = np.arange(n_tau)+1
var_e = 5
sigma_e_critical = np.array([np.sqrt(var_e)])
perturbations = 10
it = 50

### 1.2 Helper funciton to plot the simulation results

In [None]:
import matplotlib.colors as mcolors

def plot_results(X, tau, var_w, var_w_critical, data_type, tau1, tau2, var_idx):

    X_mean = np.mean(X, axis = 2)
    X_ste = np.std(X, axis = 2)*(1/np.sqrt(X.shape[2])) #standard error calculation (standard deviation/sqrt(N))
    plt.figure(figsize=(10,8))
    for i,c in zip(var_idx, mcolors.TABLEAU_COLORS):
        plt.plot(tau[tau1:tau2], X_mean.T[tau1:tau2, i], c=c)
        plt.errorbar(tau[tau1:tau2],X_mean[i,tau1:tau2], yerr = X_ste[i,tau1:tau2], 
                     label = "var_w {}".format(np.round(var_w[i],3)))
    
    plt.xlabel("tau", fontsize = 15)
    
    if data_type == "MI":
        plt.ylabel("MI", fontsize = 15)
        plt.title("Mutual Information for varying distances from the critical line: var_w = "
                  +str(np.round(var_w_critical,3)), fontsize = 15)
        
    if data_type == "acc":
        plt.ylabel("Accuracy", fontsize = 15)
        plt.title("Mean accuracy for varying distances from the critical line: var_w = "
                  +str(np.round(var_w_critical,3)), fontsize = 15)
    plt.legend(bbox_to_anchor=(0.7, 1), loc='upper left', prop = {'size' : 15})

### 1.3 Helper funcitons to quantify the statistical significance of the results

In [None]:
from scipy import stats
import pandas as pd


def p_value_matrix(X, idx_var):
    '''
    Computes the p-value for a certain mean (given tau and var_w) to be larger than the value at criticality.
    Parameters:
    X (np.3darray): numpy array containing the given score (MI or accuracy) in (n_var, n_tau, n_samples):
    idx_var (int): index for the variance in X where the network is at criticality
    '''
    n_var, n_tau, n_samples = X.shape
    X_pval = np.zeros((n_var, n_tau))
    
    rvs_crt = X[idx_var,:,:] #score at network criticality for values of tau and n_samples
    
    for i in range(n_var):
        rvs = X[i,:,:]
        
        #perform t-test, note that you want it to be 1-tailed (check whether score is greater) and
        #do not assume equal variance
        p_vals = stats.ttest_ind(rvs, rvs_crt, axis = 1, equal_var=False, alternative = "greater")[1]
        
        X_pval[i,:] = np.round(p_vals, 4)

    return X_pval

def highlighter(cell_value):
    if cell_value < 0.05:
        return "background-color: yellow"
    
def p_val_dataframe(X, data_type, task_type, var_w_array = var_w_perturbed, var_w_crit= var_w_critical,  
                    K = K, n_spins = n_spins, var_e = var_e):
    
    '''
    Generates a dataframe of p-values representing whether the MI or eman accuracy are higher than that at the critical
    line for a range of var_w and tau values. 
    Parameters:
    X (np.3darray): numpy array containing the given score (MI or accuracy) in (n_var, n_tau, n_samples):
    data_type (str): can either be MI or mean accuracy
    task_type(str): can either be xor task or direct memory task
    var_w_perturbed(np.array): array with different var_w values
    var_w_crit(np.array): array with one value (flaot) denoting the critical value of var_w
    K (int): connectivity of the network
    n_spins (int): number of neurons in the network
    var_e (float): variance of the encoder
    '''
    
    #find the index of the critical var_w
    idx_var_crit =np.where(np.isclose(var_w_array,var_w_crit))[0][0]
    
    #generate the matrix of p-values, either for MI or accuracy score
    p_val_mat = p_value_matrix(X, idx_var_crit)
    
    #define the rows of the dataframe as the values of var_w
    rows = np.round(var_w_array.reshape(perturbations,),3).astype(str)
    
    #generate the dataframe
    df = pd.DataFrame(p_val_mat, index = rows.astype(str), columns = tau)
    df = df.rename_axis("tau", axis="columns")
    df = df.rename_axis("var_w", axis = "index")
    
    # properties of dataframe, applied when not exported to latex
    cell_hover = {
    "selector": "td:hover",
    "props": [("background-color", "red")]
    }
    index_names = {
        "selector": ".index_name",
        "props": "font-style: italic; color: darkgrey; font-weight:normal;"
    }
    headers = {
        "selector": "th:not(.index_name)",
        "props": "background-color: red; color: #f0e;"
    }

    properties = {"border": "1px solid black", "width": "65px", "text-align": "center"}
    
    #export to latex
    df_latex = df.style.applymap(highlighter)\
      .set_properties(**properties).format(precision = 3).to_latex(
    caption="p-value for " +str(data_type)+ " difference on the " +str(task_type)+" task with $\sigma_e^2$ = " +str(var_e)+", K = " +str(K)+" and " +str(n_spins)+ " neurons.\
    Marked in yellow are the respective values of $\sigma_w^2$ and $\tau$ that are significantly greater than \
    the "+str(data_type)+ " at criticality at the $\alpha = 0.05$ confidence level. The critical value for $\sigma_w^2$ is highlighted\
    in red.",
    position = "!h",
    clines="all;index",
    convert_css=True,
    position_float="centering",
    multicol_align="|c|",
    hrules=True)

    
    return df_latex


# 2. Results for direct memory task

Plot the results for MI and accuracy on the direct memory for a range of network parameters as a function of delay time. Quantify the statistical significance results at a 95% Confidence interval. In particular, this code looks at whether MI was statisticlaly significantly higher than the value at criticality.

In [None]:
#path where the data is stored
path = "/home/elosegui/MSc_thesis_project/numpy_results/direct_memory_task/"

#load data
mean_accuracy = np.load(path+"accuracy_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")
MI_score = np.load(path+"MI_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")
var_w_perturbed = np.load(path+"var_w_perturbed_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")
var_w_critical = np.load(path+"var_w_critical_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")





### 2.1 Plot the results for Mutial Information and Accuracy

In [None]:
#subset of MI results

path_plots = "/home/elosegui/MSc_thesis_project/numpy_results/direct_memory_task/plots/"

X = MI_score
data_type = "MI"
tau1 = 3
tau2 = 8

var1_idx = 4
var2_idx = 9
var_idx = [3,8,9]

plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical, 
             data_type, tau1, tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+"zoom")



In [None]:
#subset of accuracy results

X = mean_accuracy
data_type = "acc"
tau1 = 3
tau2 = 7
var_idx = [4,8,9]

plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical[0], 
             data_type, tau1, tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+"zoom")

In [None]:
#all MI results

tau1 = 0
tau2 = 10
var_idx = np.arange(10)

plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical[0], 
             data_type, tau1, tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e))

In [None]:
#print significance test table in latex format for MI

print(p_val_dataframe(MI_score, "MI", "direct memory", var_w_array = var_w_perturbed, var_w_crit= var_w_critical,  
                    K = K, n_spins = n_spins, var_e = var_e))


In [None]:
#print significance test table in latex format for accuracy

print(p_val_dataframe(mean_accuracy, "mean accuracy", "direct memory", var_w_array = var_w_perturbed, var_w_crit= var_w_critical,  
                    K = K, n_spins = n_spins, var_e = var_e))





# Results for 3-bit parity task

Plot the results for MI and accuracy on the 3-bit parity task for a range of network parameters as a function of delay time. Quantify the statistical significance results at a 95% Confidence interval. In particular, this code looks at whether MI was statisticlaly significantly higher than the value at criticality.

In [None]:
#path where the data is stored
path = "/home/elosegui/MSc_thesis_project/numpy_results/3bit_parity/"

#load data
mean_accuracy = np.load(path+"accuracy_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")
MI_score = np.load(path+"MI_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")
var_w_perturbed = np.load(path+"var_w_perturbed_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")
var_w_critical = np.load(path+"var_w_critical_"+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+".npy")

In [None]:
#plot a subset of MI results

path_plots = "/home/elosegui/MSc_thesis_project/numpy_results/3bit_parity/plots/"

X = MI_score
data_type = "MI"
tau1 = 0
tau2 = 4
var1_idx = 4
var2_idx = 9

var_idx = [4,7,9]


plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical, 
             data_type, tau1, tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+"zoom")

In [None]:
#plot all MI results

tau1 = 0
tau2 = 10
var_idx = np.arange(10)

plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical, data_type, tau1, 
             tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e))

In [None]:
#plot a subset of accuracy results

X = mean_accuracy
data_type = "acc"
tau1 = 0
tau2 = 4
var_idx = [4,7,9]


plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical, 
             data_type, tau1, tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e)+"zoom")

In [None]:
#plot all accuracy results

tau1 = 0
tau2 = 10
var_idx = np.arange(10)

plot_results(X, tau, var_w_perturbed.reshape(perturbations,), var_w_critical, data_type, tau1, 
             tau2, var_idx)

plt.savefig(path_plots+data_type+"K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e))

In [None]:
#print the table of the significance of MI results in latex format

print(p_val_dataframe(MI_score, "MI", "3-bit parity", var_w_array = var_w_perturbed, var_w_crit= var_w_critical,  
                    K = K, n_spins = n_spins, var_e = var_e))

In [None]:
#print the table of the significance of accuracy results in latex format

print(p_val_dataframe(mean_accuracy, "mean accuracy", "3-bit parity", var_w_array = var_w_perturbed, var_w_crit= var_w_critical,  
                    K = K, n_spins = n_spins, var_e = var_e))

# 3. Paff and Naff

Claculate the theoretical probability of the input affecting a neuron (Paff) and the expected number of neurons affected by the input (Naff) as a function of the delay time, for a range of network parameters (sub-critical, critical and super-critical).

### 3.1 Helper funcitons for calculating theoretical values of Paff and Naff 

In [None]:
#Look at the evolution of Paff in time

from scipy.stats import binom

def Paff_tau(var_w, var_e, K, s_spins, dec, t):
    '''
    Probability of a spin being affected by the input at least 1 time step ago, a and b need not be different sings
    Parameters:
    var_w (float): variance of the weights
    var_e(flaot): encoder variance
    n_spins(int): total number of available spins
    s_spins (int): number of spins affected by the input in the previous time step
    '''
    var_b = s_spins*var_w #+ (dec**t)*var_e
    var_a = (K-s_spins)*var_w+var_e
    Paff = 2*np.arctan(np.sqrt(var_b/var_a))/(np.pi)
    
    return Paff

def Paff_tau2(var_w, var_e, K, p):
    var_b = p*var_w
    var_a = (1-p)*var_w+(var_e/K)
    Paff = 2*np.arctan(np.sqrt(var_b/var_a))/(np.pi)
    
    return Paff

def Paff_tau_binom(var_w, var_e, K, p, dec, t):
    '''
    Probability of a spin being affected by the input at least 1 time step ago, a and b need not be different sings
    Parameters:
    var_w (float): variance of the weights
    var_e(flaot): encoder variance
    n_spins(int): total number of available spins
    p (float): probability at the previous time step
    '''
    
    Paff = 0
        
    for i in range(K):
        pk = binom.pmf(i, K, p)
        Paff = Paff+ pk*Paff_tau(var_w, var_e, K, i, dec, t)
    return Paff
    

def Paff_evolution_binom(tau, var_w, var_e, K, N):
    '''
    Compute Paff and affected number of spins tau time steps ago
    '''
    
    Paff_array = np.zeros(tau) #Initialize the array of probaiblity of being affected
    s_array_mean = np.zeros(tau) #Expected number of affected spins (out of N)
    s_array_var = np.zeros(tau) #Variance of number of affected spins (out of N)
    n_affected = np.zeros(tau)
    
    #initialize probability of being affected
    var_b_0 = var_e
    var_a_0 = K*var_w
    Paff_0 = 2*np.arctan(np.sqrt(var_b_0/var_a_0))/(np.pi)
   
    #initialize all values at tau = 0
    Paff_array[0] = Paff_0
    s_array_mean[0] = N*Paff_0
    s_array_var[0] = N*Paff_0*(1-Paff_0)
    n_affected[0] = N*Paff_0
    
    for i in range(1,tau):
        Paff_array[i] = Paff_tau_binom(var_w, var_e, int(K), Paff_array[i-1], 0.3, i)
        s_array_mean[i] = Paff_array[i]*s_array_mean[i-1]
        s_array_var[i] = s_array_mean[i-1]*(1-Paff_array[i])*Paff_array[i]
        n_affected[i] = N* Paff_array[i-1]
        
    
    return Paff_array, s_array_mean, s_array_var, n_affected
        

### 3.2 Helper funcitons for plotting the values of Paff and Naff

In [None]:
def plot_paff_analytical(tau, var_w, var_e, K, N, var1_idx, var2_idx, var_w_critical):
    
    for i in range(var1_idx,var2_idx):
        Paff_array, s_array_mean, s_array_var, n_affected = Paff_evolution_binom(tau, var_w[i], var_e, K, N)
        plt.plot(np.arange(tau), Paff_array, label = "var_w =  {}".format(np.round(var_w[i],3)))
        #plt.errorbar(tau[tau1:tau2],X_mean[i,tau1:tau2], yerr = X_ste[i,tau1:tau2], label = "var_w {}".format(np.round(var_w[i],3)))
    plt.title("Paff for varying distances from the critical line: var_w = "                  
              +str(np.round(var_w_critical,3))+", K = "+str(K))
    plt.xlabel("tau")
    plt.legend(bbox_to_anchor=(0.65, 1), loc='upper left')

def plot_paff_analytical_naff(tau, var_w, var_e, K, N, var1_idx, var2_idx, var_w_critical):
    
    for i in range(var1_idx,var2_idx):
        Paff_array, s_array_mean, s_array_var, n_affected = Paff_evolution_binom(tau, var_w[i], var_e, K, N)
        plt.plot(np.arange(tau)[1:],n_affected[1:], label = "var_w {}".format(np.round(var_w[i],3)))
        #plt.errorbar(tau[tau1:tau2],X_mean[i,tau1:tau2], yerr = X_ste[i,tau1:tau2], label = "var_w {}".format(np.round(var_w[i],3)))
    plt.title("Naff for varying distances from the critical line: var_w = "                  
              +str(np.round(var_w_critical,3)))
    plt.xlabel("tau")
    plt.legend(bbox_to_anchor=(0.67, 1), loc='upper left')



### 3.3 Plots for the Paff and Naff

In [None]:
K = 8
n_spins = 300
var_e = 5
n_tau = 10
var1_idx = 0
var2_idx = 4

var_w = np.array([0.1,0.2,0.3,0.4])
path_plots = "/home/elosegui/MSc_thesis_project/numpy_results/paff_plots/"
plot_paff_analytical(n_tau,var_w, var_e, K, n_spins, var1_idx, var2_idx, var_w_critical)

#plt.savefig(path_plots+"Paff_K" +str(K)+"_var_e"+str(var_e))

In [None]:
n_tau = 10
plot_paff_analytical_naff(n_tau, var_w_perturbed, var_e, K, n_spins, var1_idx, var2_idx, var_w_critical)

plt.savefig(path_plots+"Naff_K" +str(K)+"_iterations"+str(it)+"_N"+str(n_spins)+"_var_e"+str(var_e))