In [36]:
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
import matplotlib.cm as cm
from matplotlib.colors import Normalize

In [72]:
# data loading

act_path = os.getcwd()
weights = np.zeros((96,78))
delay = np.zeros((96,78))
fitness= np.zeros((96))
range_weights = 20.
range_delay = np.array([1.,10.])
range_fitness = 10000.

for data_number in range(96):
    file_name = "individual_99_" + str(data_number) + "_result.csv"
    data_path = os.path.join(act_path, "csv_files_data1", file_name)
    df = pd.read_csv(data_path, delimiter=',', header=None)
    df = df.to_numpy()
    weights[data_number,:] = df[0,:]
    delay[data_number,:] = df[1,:]
    fitness[data_number] = df[2,0] 
    
fit_mean = np.mean(fitness) # to do: min, max, mean, standerd dev.
# print(fit_mean)

# numpy arrays
weights = np.clip(weights, -20., 20.)
delay = np.clip(delay, 1., 10.)
fitness = np.clip(fitness, -10000., 10000.)

# data frames for all indiviuals together
weights_df = pd.DataFrame(weights)
delay_df = pd.DataFrame(delay)
fitness_df = pd.DataFrame(fitness)

# data frames for each individual (weights + delay)
df_list = []
weights_delay = np.zeros((78,2))
for data_number in range(96):
    weights_delay[:,0] = weights[data_number, :]
    weights_delay[:,1] = delay[data_number, :]
    df_list.append(pd.DataFrame(weights_delay))


In [99]:
# PCA
'''
We suggest to plot the sub-space that
is defined by the first three (and subsequent) principal components of the data combined with a color
coded fitness (see [7]). Is there a relation between principal components and the fitness of the data?
'''

scaler = MinMaxScaler()
transformed_weights = scaler.fit_transform(weights)
transformed_delay = scaler.fit_transform(delay)
data_merged = np.concatenate((transformed_weights, transformed_delay), axis=1)
data_merged = pd.DataFrame(data_merged)

pca = PCA(n_components=3)
pca_result = pca.fit_transform(data_merged)

data_merged['pca-one'] = pca_result[:,0]
data_merged['pca-two'] = pca_result[:,1] 
data_merged['pca-three'] = pca_result[:,2] 
data_merged['fitness'] = fitness

for v in pca.explained_variance_ratio_:
    print('Explained variation per principal component: {}%'.format(round(v*100,2)))

Explained variation per principal component: 12.74%
Explained variation per principal component: 10.17%
Explained variation per principal component: 6.09%


In [102]:
# plot pca results

fig = plt.figure(1)
ax = fig.add_subplot(111, projection='3d')

'''
# Let's set up the first dataset of weights_df. 
p1 = ax.plot(data_merged['pca-one'], 
             data_merged['pca-two'], 
             data_merged['pca-three'], 
             'o', c='b',                                                 
             alpha = 0.6,                           
             markersize=3, 
             markeredgecolor='black',
             markeredgewidth=0.1)

'''
colors = fitness.astype(int)
ax.scatter(data_merged['pca-one'], data_merged['pca-two'], data_merged['pca-three'], c=colors, cmap = 'PuO', alpha=0.5)


# We will then label the three axes using the percentages explained for each major component.
ax.set_xlabel('PCA-1, ' +  str(round(pca.explained_variance_ratio_[0]*100,2)) + '% Explained', fontsize=7)
ax.set_ylabel('PCA-2, ' +  str(round(pca.explained_variance_ratio_[1]*100,2)) + '% Explained', fontsize=7)
ax.set_zlabel('PCA-3, ' +  str(round(pca.explained_variance_ratio_[2]*100,2)) + '% Explained', fontsize=7)


fig.legend(fontsize = 'x-small', loc='upper center', markerscale=2)
plt.autoscale()
plt.rcParams["figure.dpi"] = 1000                                   # set the figure resolution dpi value to 1000
plt.show()

fig_name = '3D_scatterplot_PCA.png'
fig.savefig(fig_name)

ValueError: 0     37
1     33
2     25
3     28
4    -93
      ..
91    33
92    15
93    50
94    43
95    37
Name: fitness, Length: 96, dtype: int32 is not a valid value for color

In [104]:
'''
Extract the mean weights of the three different
connection types of the network (layer 1 to layer 2, within layer 2, layer 2 to layer 3; see Fig. 1a) and
plot them in an analogous manner to the PCA plot.
'''
# layer 1 to layer 2: 6x6=36 weights
# within layer 2: 5x6 = 30 weights
# layer 2 to layer 3: 6x2 = 12 weights
weights1 = weights[:,0:35]
weights2 = weights[:,36:65]
weights3 = weights[:,65:77]
mean_weights1=np.zeros((weights.shape[0]))
mean_weights2=np.zeros((weights.shape[0]))
mean_weights3=np.zeros((weights.shape[0]))

for data_number in range(weights.shape[0]):
    mean_weights1[data_number] = np.mean(weights1[data_number,:])
    mean_weights2[data_number] = np.mean(weights2[data_number,:])
    mean_weights3[data_number] = np.mean(weights3[data_number,:])


In [106]:
# plot mean weights

fig = plt.figure(1)
ax = fig.add_subplot(111, projection='3d')

colors = fitness.astype(int)
ax.scatter(mean_weights1, mean_weights2, mean_weights3, c=colors, cmap = 'PuOr', alpha=0.5)


# We will then label the three axes using the percentages explained for each major component.
ax.set_xlabel('Mean Weights 1->2, ', fontsize=7)
ax.set_ylabel('Mean Weights 2->2, ', fontsize=7)
ax.set_zlabel('Mean Weights 2->3, ', fontsize=7)


# fig.legend(fontsize = 'x-small', loc='upper center', markerscale=2)
plt.autoscale()
plt.rcParams["figure.dpi"] = 1000                                   # set the figure resolution dpi value to 1000
plt.show()

fig_name = '3D_scatterplot_mean.png'
fig.savefig(fig_name)

NameError: name 'mean_weigths1' is not defined