## Setup imports

In [None]:
import os
import sys
import itertools
import warnings
from IPython.core.interactiveshell import InteractiveShell

import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.metrics import normalized_mutual_info_score 

# We need to be able to access the files/directories outside of the `Notebooks` directory.
sys.path.append(os.path.abspath(os.path.join("..")))

from analysis import visualize
from analysis.resampling import Resampler
from analysis.graph_analysis import NeuronNetwork
from analysis.place_cell_analysis import apply_cantor_pairing
from analysis.clustering import affinity_propagation, extract_clusters
from analysis.analysis_utils import Mouse, find_file, extract_epochs, filter_epochs, downsample_dataframe

In [None]:
%matplotlib inline
InteractiveShell.ast_node_interactivity = "all"

# This is to supress future warnings about sklearn's nmi function.
warnings.simplefilter(action='ignore', category=FutureWarning)

##### We load & preprocess the data for a mouse.

In [None]:
s = pd.read_csv("data/artificial_epm_s.csv", header=0)
c = pd.read_csv("data/artificial_epm_c.csv", header=0)
behavior = pd.read_csv("data/epm_behavior.csv", header=0)

# For convenience, cast every column name to an int.
s.columns = [int(col) for col in s.columns]
c.columns = [int(col) for col in c.columns]

behavior_column_names = ['X_center',
                         'Y_center',
                         'Area',
                         'Areachange',
                         'Elongation',
                         'Distance_moved',
                         'Velocity',
                         'Arena_centerpoint',
                         'Open1_centerpoint',
                         'Open2_centerpoint',
                         'Closed1_centerpoint',
                         'Closed2_centerpoint',
                         'OpenArms_centerpoint',
                         'ClosedArms_centerpoint',
                         'Hardware_command',
                         'Hardware_command_2',
                         'Hardware_command_3',
                         'Hardware_command_4',
                         'Result_1']

# Drop every 3rd row from the behavior dataframe
behavior = downsample_dataframe(behavior, 3)

# Rename the behavior dataframe columns and drop all useless column vectors.
behavior.columns = behavior_column_names
behavior.drop(["Hardware_command", "Hardware_command_2", "Hardware_command_3", "Hardware_command_4", "Result_1"], axis=1, inplace=True)

# We add a time series column using Timedelta, where the amount of periods is the current
# total amount of frames in the dataframe. 10 fps implies 100 milliseconds (ms) per frame, 
# so we set each period to be 100 ms long.
behavior = behavior.assign(Trial_time=pd.timedelta_range(0, periods=len(behavior.index), freq="100ms"))

# Create a Mouse object to store all of the data for a particular mouse
mickey = Mouse(name="Mickey", age=1, sex='M', cell_transients=c, spikes=s, behavior=behavior)

# Convert dataframe to a boolean matrix, where spikes := 1 and no spike := 0
# mickey.spikes = mickey.spikes.where(mickey.spikes==0, 1)

# Add "Center" column to concatenated dataframe
center = (mickey.spikes_and_beh["OpenArms_centerpoint"]) + (mickey.spikes_and_beh["ClosedArms_centerpoint"])
center = 1-center
mickey.spikes_and_beh["Center"] = center

## Heatmaps

#### Plot a heatmap and a "true" scatterplot for a single neuron.
#### The heatmap below does *not* take "time spent in bins" or anything else into account. If you want to account for such things, the onus is on you to set the weights and/or "bin" your data as you see fit. `plot_heatmap` was implemented to be as useful as possible for as many people as possible. Thus, this is a feature, not a bug.

In [None]:
neuron = 35

# Make a copy of the coordinate Series in order to preserve the original data.
x_coords = mickey.spikes_and_beh["X_center"].copy()
y_coords = mickey.spikes_and_beh["Y_center"].copy()

x = x_coords.astype(int)
y = y_coords.astype(int)

# We need to create a vector of boolean values for when the (provided) neuron 
# fired and didn't fire. This is passed to the plot_heatmap function in order 
# to only create a heatmap based on where the neuron fired. 
weights = (mickey.spikes_and_beh[neuron] != 0).astype(int)
weights = pd.Series(weights)

title = "{}, neuron {}".format(mickey.name, neuron)

visualize.plot_heatmap(x, y, sigma=2, title=title, bins=50, figsize=(6, 6), weights=weights, savefig=False);

# -----------------------

# Now, we plot a scatterplot to compare the heatmap with the precise firing positions of the neuron. 

plt.figure(figsize=(6, 6));
plt.scatter(x_coords, y_coords, marker='o');

# Get the coordinates where the neuron actually fired.
x_1 = x_coords.loc[mickey.spikes_and_beh[neuron] != 0]
y_1 = y_coords.loc[mickey.spikes_and_beh[neuron] != 0]

# Overlay the first scatterplot with a scatterplot of positions where the neuron fired.
plt.scatter(x_1, y_1, marker='x', color='red');

plt.title(title);

## Place Cell Analysis

In [None]:
# Extract the X and Y coordinate column vectors and cast all their values to int.
x_coords = mickey.spikes_and_beh["X_center"].astype(int)
y_coords = mickey.spikes_and_beh["Y_center"].astype(int)

# Shift all coordinate values by increasing all of them by the minimum value. This
# is necessary in order to apply the cantor pairing function, since the cantor 
# pairing function is only defined on the natural numbers, i.e., {0, 1, 2, 3, ...}.
x_coords += abs(x_coords.min())
y_coords += abs(y_coords.min())

# Reduce the dimensionality of the coordinates, since sklearn's mutual information 
# function only allows you to compute the NMI between two arrays.
z_coords = apply_cantor_pairing(x_coords.tolist(), y_coords.tolist())
z_coords = pd.Series(data=z_coords)
z_coords = z_coords.astype(int)

In [None]:
def nmi_wrapper(dataframe, beh_col_vec):
    """Wrapper to apply sklearn's nmi function to each neuron column 
       vector of dataframe and a given behavior column vector.
    """
    return dataframe.apply(normalized_mutual_info_score, args=(beh_col_vec,))

In [None]:
%time permutation_distributions = Resampler.shuffle(10000, mickey.spikes, nmi_wrapper, z_coords, flip_roll=True)

In [None]:
permutation_distributions.hist(alpha=0.5, color="pink", bins="auto", figsize=(23, 10));

In [None]:
original_statistics = nmi_wrapper(drd218.spikes, z_coords)
original_statistics = original_statistics.to_dict()

for neuron, original_stat in original_statistics.items():
    p_value = Resampler.p_value(original_stat, permutation_distributions[neuron])
    if p_value < 0.05:
        print("neuron {}: p<{}".format(neuron, p_value))

In [None]:
for neuron, original_stat in original_statistics.items():
    result = Resampler.two_tailed_test(original_stat, permutation_distributions[neuron])
    print("neuron {}: result={}".format(neuron, result))

## Cell Selectivity

##### Use the `shuffle` function in order to create a permutation distribution, for each neuron, of the difference of means.

In [None]:
%time permutation_distr = Resampler.shuffle(10000, mickey.spikes, Resampler.diff_of_mean_rate, mickey.spikes_and_beh["OpenArms_centerpoint"], mickey.spikes_and_beh["ClosedArms_centerpoint"])

##### Plot the permutation distributions

In [None]:
permutation_distr.hist(alpha=0.5, color="green", bins="auto", figsize=(23, 10));

##### Classify cells by the behavior for which they are selective.

In [None]:
original_diff_of_means = dict(zip(mickey.spikes.columns, Resampler.diff_of_mean_rate(mickey.spikes, mickey.spikes_and_beh["OpenArms_centerpoint"], mickey.spikes_and_beh["ClosedArms_centerpoint"])))

p_values = {}
for neuron, original_stat in original_diff_of_means.items():
    p_value = Resampler.p_value(original_diff_of_means[neuron], permutation_distr[neuron])
    p_values[neuron] = p_value
    print("neuron {}: p={}".format(neuron, p_value))

In [None]:
classified_cells = {}

for neuron, p_value in p_values.items():
    if p_value < 0.05 and original_diff_of_means[neuron] > 0:
        classified_cells[neuron] = "OpenArms_centerpoint"
    elif p_value < 0.05 and original_diff_of_means[neuron] < 0:
        classified_cells[neuron] = "ClosedArms_centerpoint"
    elif p_value >= 0.05:
        classified_cells[neuron] = "Not-selective"
        
classified_cells

##### Plot a pie chart in order to visualize the proportions of cells that are selective for behaviors of interest.

In [None]:
open_selective = 0
closed_selective = 0
not_seletive = 0

for cell, classification in classified_cells.items():
    if classification == "OpenArms_centerpoint":
        open_selective += 1
    elif classification == "ClosedArms_centerpoint":
        closed_selective += 1
    elif classification == "Not-selective":
        not_seletive += 1
        
# Plot a pie chart, where the slices will be ordered and plotted counter-clockwise:
sizes = [open_selective, closed_selective, not_seletive]
visualize.pie_chart(sizes, "open selective", "closed selective", "not selective")

## Clustering

##### The functions below are stand-alone functions meant to help with clustering analysis.

In [None]:
def plot_traces(clusters, mouse, all_beh_intervals=None, **kwargs):
    figsize = kwargs.get("figsize", (15, 6))
    hspace = kwargs.get("hspace", 0.0)
    title = kwargs.get("title", "Title Goes Here")
    total_subplots = 0
    
    for cluster in clusters:
        if len(clusters[cluster]) > 1:
            total_subplots += len(clusters[cluster])
            
    cmap = get_cmap(max(clusters.keys())+1, name="Dark2")
    fig, ax = plt.subplots(total_subplots, 1, figsize=figsize)
    
    ax_index = 0
    dataframe = mouse.cell_transients
    bg_colors = ["blue", "orange", "red", "green"]

    for cluster in clusters:
        if len(clusters[cluster]) > 1:
            for index in range(0, len(clusters[cluster])):
                ax[ax_index].plot(dataframe.index, dataframe[clusters[cluster][index]], c=cmap(cluster), lw=1)
                
                if all_beh_intervals:
                    for i, behavior_intervals in enumerate(all_beh_intervals):
                        for interval in behavior_intervals:
                            ax[ax_index].axvspan(interval[0], interval[-1], alpha=0.1, color=bg_colors[i])
                            ax[ax_index].axis("off")
                        
                ax_index += 1
            
    plt.subplots_adjust(wspace=0, hspace=hspace)
    
    if kwargs.get("save", False):
        plt.savefig(title+".pdf", dpi=600)
        
    plt.show();
    
def get_cmap(n, name="tab20"):
    """ Returns a function that maps each index in 0,1,...,n-1 
        to a distinct RGB color; the keyword argument name must 
        be a standard mpl colormap name.
    """
    return plt.cm.get_cmap(name, n)

##### We find all possible clusters of neurons for drd87 in EPM during the overall session. 

In [None]:
from analysis.clustering import compute_connections

In [None]:
connections = compute_connections(mickey.spikes)
neuron_network = NeuronNetwork(mickey.spikes.columns, connections)

sim_matrix = nmi_matrix(mickey.spikes)
clusters = affinity_propagation(sim_matrix)

##### We plot the imaged neurons by their actual positions, and we color code each neuron by its respective cluster.

In [None]:
node_colors = [clusters[key] for key in sorted(clusters.keys())]
_, weights = zip(*nx.get_edge_attributes(neuron_network.network, "weight").items())
_ = neuron_network.plot(node_color=node_colors, figsize=(10, 10), node_size=600, edge_color=weights, edge_cmap=plt.cm.Pastel1, save=False)

##### We plot the continuous time series plots, for each neuron that was assigned to a cluster with $2$ or more neurons. Each time series plot is color coded its respective cluster. 

In [None]:
all_behavior_intervals = []
for behavior in ["OpenArms_centerpoint", "ClosedArms_centerpoint", "Center"]:
    epochs = extract_epochs(mickey, behavior)
    behavior_intervals = filter_epochs(epochs[1], framerate=1, seconds=1)
    all_behavior_intervals.append(behavior_intervals)

In [None]:
clusters = extract_clusters(clusters)
plot_traces(clusters, mickey, all_beh_intervals=all_behavior_intervals, figsize=(25, 20), hspace=0.0, save=False, title="traces")

## Graph Theoretical Analysis

##### Compute the mean betweeness centrality of drd87's neuron network.

In [None]:
neuron_network.mean_betw_cent()

##### Compute the average clustering coefficient for drd87's network of neurons.

In [None]:
nx.average_clustering(neuron_network.network, weight="weight")

##### Compute the average clustering coefficient for each extracted cluster.

In [None]:
for _, cluster in clusters.items():
    avg_cluster_coeff = np.mean(list(nx.clustering(neuron_network.network, weight="weight", nodes=cluster).values()))
    print("avg clustering coefficient of {} = {}".format(cluster, avg_cluster_coeff))    

##### Compute the mean shortest path length of drd87's network of neurons. Recall: "random and complex networks have short mean path lengths (high global efficiency of parallel information transfer)" (Bullmore & Sporns 2009)

In [None]:
neuron_network.avg_shortest_path_len()

##### We the compute the degree assortativity of the neuron network.

In [None]:
nx.degree_pearson_correlation_coefficient(drd87_graph.network, weight="weight")