In [None]:
# Import plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="white")
import numpy as np
import pandas as pd
%matplotlib notebook

# Bokeh for interactive plots
from bokeh.io import push_notebook, output_notebook, show as bokeh_show
from bokeh.plotting import figure as bokeh_figure
output_notebook()

# Import our analysis modules
%load_ext autoreload
%autoreload 1
%aimport optimize.bpop_analysis_stn
%aimport optimize.bpop_analysis_pop

resp_analysis = optimize.bpop_analysis_stn
pop_analysis = optimize.bpop_analysis_pop

import pprint
pp = pprint.PrettyPrinter(indent=2)

In [None]:
# 100 individuals, 100 generations IBEA dataset
checkpoint_files = [
    ['IBEA_100gen',
     '/home/luye/cloudstore_m/simdata/marasco_folding/optimization_run_20171103_2/opt_checkpoints_cdf893c2.pkl',
     '/home/luye/cloudstore_m/simdata/marasco_folding/optimization_run_20171103_2/opt_checkpoints_cdf893c2_settings.pkl'],
    ['NSGA2_100gen',
     '/home/luye/cloudstore_m/simdata/marasco_folding/optimization_run_20171103_1/opt_checkpoints_3210b868.pkl',
     '/home/luye/cloudstore_m/simdata/marasco_folding/optimization_run_20171103_1/opt_checkpoints_3210b868_settings.pkl']
]

opt_data = pd.DataFrame(checkpoint_files, columns=['name', 'checkpoints_file', 'settings_file'])

## Load data

In [None]:
# Choose optimisation to analyse
# opt_data['checkpoints_file'][0] # address by row index
opt_name = 'IBEA_100gen'
idx = opt_data.index[opt_data['name'] == opt_name][0] # addres by value in field

cp_file = opt_data['checkpoints_file'][idx]
settings_file = opt_data['settings_file'][idx]

print("Analysing data from files:\n{}\n{}".format(cp_file, settings_file))

In [None]:
import cPickle as pickle
%%timeit

# Old pickling method
# with open(checkpoints_file, 'r') as f:
#     checkpoint = pickle.load(f)
#     # old_param_names = pickle.load(f)

# New pickling method
with open(cp_file, "rb") as f:
    while True:
        try:
            checkpoint = pickle.load(f)
        except EOFError:
            break

# Get variables
hof = checkpoint['halloffame']
log = checkpoint['logbook']
pareto_front = checkpoint['paretofront']

In [None]:
with open(settings_file, 'r') as f:
    opt_settings = pickle.load(f)
    
pp.pprint(opt_settings)

## Plot optimisation logs

In [None]:
# Plot evolution of fitness values
fig, ax = pop_analysis.plot_log(log, ymax='std')
pop_analysis.plot_log(log, ymax='min')

# Plot parameter diversity

The individual format included in the hall of fame is as follows:

```python
Individual = {
    'fitness': {
        'wvalues':         <list(float)> fitness scores,
        'sum':             <float> sum of fitness scores,
        'new_squared_sum': <float> sum of squared fitness scores}
    'obj_size':           int,
    'param_size':         int,
    'get_param_values()': <list(float)> parameter values,
}
```

## Range of fitness scores

IMPORTANT NOTES:
- for selection into the hall of fame, the sum of squares of fitness scores (distances) is used
- however, during evolutionary selection, the selector of the MOO (IBEA or NSGA2 selector) determines how fitness values are compared

In [None]:
print("\nSum of squared error for best individuals:\n")

costs = np.array([-ind.fitness.neg_squared_sum for ind in hof])
print(costs.reshape((costs.size/5,-1), order='F'))

## Range and variance of parameters

Take 50 best individuals, plot total range of their parameters, standard deviation of each parameter.

In [None]:
ind_as_rows = np.array([ind.get_param_values() for ind in hof])
print("Found {} individuals with {} parameters each".format(ind_as_rows.shape[0], ind_as_rows.shape[1]))
# print(ind_as_cols)

param_names = opt_settings['opt_param_names'] # same order as in individuals
ind_data = pd.DataFrame(ind_as_rows, columns=param_names)
# print ind_data

In [None]:
ind_cov = np.cov(ind_as_rows.T)
param_cov_data = pd.DataFrame(ind_cov, index=param_names, columns=range(ind_cov.shape[1]))

pd.set_option('display.precision', 3)
print("Parameter covariance matrix:\n")
print(param_cov_data)

In [None]:
# Plot covariance matrix

# Generate a mask for the upper triangle
mask = np.zeros_like(param_cov_data, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
mask[np.diag_indices_from(mask)] = True

# Set up the matplotlib figure
f, ax = plt.subplots()

# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(param_cov_data, 
            mask=None, cmap=cmap, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

plt.yticks(rotation=0)
plt.title("Covariance matrix")

In [None]:
# Sorted standard deviations
params_std = np.sqrt(np.diag(ind_cov))
sorted_index = np.argsort(params_std)[::-1] # high to low
sorted_std = params_std[sorted_index]
sorted_params = [param_names[i] for i in sorted_index]
std_data = pd.DataFrame(sorted_std.T, index=sorted_params, columns=['std'])

print("Parameters with larges standard deviation:\n")
print(std_data)

# Fitness VS Parameters

- TODO: make plots of fitness VS each param value as in https://github.com/BlueBrain/BluePyOpt
- TODO: see possible figures in `/home/luye/cloudstore_m/notes/publications_lkmn/SfN_2017/example_figures`
- TODO: use bokeh for glyphs
    + use 'ray' for param values with colored 'wedge' or 'annular_wedge' for fitness scores,
    + XY axis: can use params with largest variance
    + XY axis: can use force layout according to cartesian vector distance

In [None]:
# figure, show = bokeh_figure, bokeh_show
from bokeh.models import Circle, Line, MultiLine, HoverTool, TapTool, BoxSelectTool
from bokeh.models.graphs import from_networkx, NodesOnly, NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.palettes import Spectral4
import matplotlib.colors

# Plotting graphs with Bokeh tutorials:
# - https://bokeh.pydata.org/en/latest/docs/user_guide/graph.html#networkx-integration
# - https://github.com/bokeh/bokeh/blob/master/examples/plotting/file/graphs.py

num_ind, num_par = ind_data.shape

# Generate node positions using force directed algorithm
import networkx as nx
ind_graph = nx.Graph()
for i, ind in enumerate(hof):
    ind_graph.add_node(i, fitness=ind.fitness.sum)
    
# Create individual euclidean distance matrix (forces)
ind_dists = np.zeros((num_ind, num_ind))
for i in range(num_ind):
    for j in range(i):
        norm2 = np.sqrt(np.sum(np.square(ind_as_rows[i][:]-ind_as_rows[j][:])))
        ind_dists[i][j] = norm2
        ind_graph.add_edge(i, j, dist=norm2)

# Calculate node positions
node_pos = nx.spring_layout(ind_graph, weight='dist', iterations=100) # random initial positins, use 'dist' as forces, 50

# Draw graph using Bokeh
xscale, yscale = 2.0, 2.0
f1 = figure(width=800, height=600, 
            x_range=(-.6*xscale,.6*xscale), 
            y_range=(-.6*yscale,.6*yscale))
f1.title.text = "Individuals cartesian distance"

# Add interactivity
hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("fitness", "@fitness")] + [(pname, "@"+pname) for pname in param_names]
)
tap = TapTool()
f1.add_tools(hover, tap)
# TODO: add tooltip with fitness & params data, color nodes by fitness

# Make graph renderer
graph_renderer = from_networkx(ind_graph, nx.spring_layout, weight='dist', 
                               iterations=100, scale=xscale, center=(0,0))

# Add fitness values to datasource
ind_fitness = [ind.fitness.sum for ind in hof]
graph_renderer.node_renderer.data_source.add(ind_fitness, 'fitness')

# Add color values to datasource
cm = plt.get_cmap('coolwarm')
fit_scales = np.interp(ind_fitness, [min(ind_fitness), max(ind_fitness)], [0., 1.]) # normalise
fit_colors = [matplotlib.colors.rgb2hex(cm(scale)) for scale in fit_scales]
graph_renderer.node_renderer.data_source.add(fit_colors, 'fit_color')

for i, pname in enumerate(param_names):
    graph_renderer.node_renderer.data_source.add(ind_as_rows[:,i], pname)


# Control node and edge rendering
graph_renderer.node_renderer.glyph = Circle(size=8, fill_color='fit_color')
graph_renderer.node_renderer.selection_glyph = Circle(size=5, fill_color=Spectral4[2])
graph_renderer.node_renderer.hover_glyph = Circle(size=5, fill_color=Spectral4[1])
graph_renderer.edge_renderer.glyph = Line(line_alpha=0.0)
# graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.8, line_width=5)
# graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2], line_width=5)
# graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=Spectral4[1], line_width=5)

graph_renderer.selection_policy = NodesOnly()
graph_renderer.inspection_policy = NodesOnly()
# graph_renderer.selection_policy = NodesAndLinkedEdges()
# graph_renderer.inspection_policy = EdgesAndLinkedNodes()

f1.renderers.append(graph_renderer)


# For each parameter, draw all the rays in once call
# angles = [360.0/num_par*i for i in range(num_par)]

# for j in range(num_par):
#     f1.ray(
#         x=node_x, y=node_y, 
#         length=ind_as_rows[:][j], 
#         angle=angles[j], angle_units="deg", 
#         color="#FB8072", line_width=2)

h1 = show(f1, notebook_handle=True) # handle can be used for figure interaction

In [None]:
print(hof[0].fitness.sum)

In [None]:
print(ind_as_rows[:,1])
ind_as_rows.shape