## Analyze Neuron Loadings - Geographical Distributions
This workflow will analyze the loadings of each neuron on all components and create visualizations thereof to better understand the components being worked with for each fish. Then, neurons with loadings on components will be binned and a ratios of components for each bin will be analyzed.

In [1]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.patches import Patch
import numpy as np
import pandas as pd
import math
from sklearn import preprocessing
from util_functions import list_subjects, load_file, pickle_file, starting_run, finished_run

### Notebook Parameters

In [2]:
model_df = load_file('data_meta_pickled/pca_models_pickled_df.pickle')
subject_coord_dict = {}
plotting_list = []
neuron_xyz_dir = 'data_raw_pickled/'
minimum_neuron_count_in_bin = 50
num_bins = {'x': 40, 'y':20, 'z':2}
color_map = ['blue','red','green','orange','pink','black','brown','yellow','grey','purple']
legend_data=[Patch(facecolor=color_map[index], edgecolor='black',label='Component '+ str(index)) for index in range(0,len(color_map))]

### Notebook Functions

#### Data Manipulation Functions

In [3]:
def calc_component_loadings(model):
    #Note that we take the absolute value of all eigenvalues. This is because we want
    #to capture the relationship between components and neurons, not necessarily the
    #direction of that relationship.
    dataset = np.absolute(model.components_)
    neuron_sums = dataset.sum(axis=0, keepdims=True)
    return pd.DataFrame(dataset / neuron_sums).transpose()

In [4]:
def normalize_subject_coordinates(subject_coords):
    norm_coord_df = pd.DataFrame(index=range(0,subject_coords.shape[0]))
    for axis in num_bins:
        n_bins = num_bins[axis]
        axis_data = subject_coords[[axis]]
        data_min = axis_data.min()
        data_max = axis_data.max()
        data_step_size = (data_max - data_min)/n_bins
        norm_coord_df[axis]=axis_data.div(data_step_size)
    return norm_coord_df

In [5]:
def bin_coords(norm_coords):
    norm_coords['bin'] = norm_coords['x'].apply('ceil').add(norm_coords['y'].apply('ceil').mul(num_bins['x']).add(norm_coords['z'].apply('ceil').mul(num_bins['x']).mul(num_bins['y'])))

In [6]:
def calc_bin_scores(bin_labels, loadings):
    loadings['bin'] = bin_labels
    bins = loadings.groupby(['bin'])
    bin_neuron_counts = bins.count()[0]
    bin_scores = bins.sum()
    valid_bin_scores = bin_scores[bin_neuron_counts > minimum_neuron_count_in_bin]
    return pd.DataFrame(preprocessing.StandardScaler().fit_transform(valid_bin_scores)).set_index(valid_bin_scores.index)

#### Plotting Functions

In [7]:
def plot_zebrafish(x,y,z,colors,title,fig,subplot_num):
    ax = fig.add_subplot(13,1,subplot_num, projection='3d')
    ax.scatter(x, y, z, c=colors, s=.1)
    ax.set_xlim3d(0,2000)
    ax.set_ylim3d(0,1500)
    ax.set_zlim3d(0,250)
    ax.view_init(elev=70, azim=135)
    ax.legend(handles=legend_data, loc='upper left')
    ax.set_title(title)
    return fig

In [8]:
def plot_neuron_loadings(x,y,z,loadings,subject,stimulus,fig):
    loading_colors = loadings.iloc[:, 0:10].apply(np.argmax,axis=1).apply(lambda comp: color_map[comp])
    title = 'Mapping of Zebrafish Neuron Component Loadings in ' + subject + (' stimulus_' + stimulus if stimulus != None else ' Across All Stimuli')
    return plot_zebrafish(x,y,z,loading_colors,title,fig,1)

In [9]:
def plot_bin_loadings(x,y,z,bins,bin_scores,subject,stimulus,fig):
    bins_main_component = bin_scores.apply(np.argmax,axis=1)
    loading_colors=bins.map(bins_main_component)
    title = 'Mapping of Zebrafish Neuron Bin Loadings in ' + subject + (' stimulus_' + stimulus if stimulus != None else ' Across All Stimuli')
    return plot_zebrafish(x,y,z,loading_colors,title,fig,2)

In [10]:
def plot_components(component_fxns, subject, stimulus, fig):
    component_functions = component_fxns.T
    
    #Plotting Aggregate Figure
    agg_ax = fig.add_subplot(13,1,3)
    for component_index in range(0,component_fxns.T.shape[0]):
        component = component_functions[component_index]
        agg_ax.plot(component, c=color_map[component_index])
    agg_ax.set_xlabel('Timepoint')
    agg_ax.set_ylabel('Component Value')
    agg_ax.set_title('Aggregate Plot of All Component Values over Stimulus Time for ' + subject + (' stimulus_' + stimulus if stimulus != None else ' Across All Stimuli'))
    
    #Plotting individual Figures
    for component_index in range(0,component_fxns.T.shape[0]):
        ax = fig.add_subplot(13,1,4+component_index)
        component = component_functions[component_index]
        ax.plot(component, c=color_map[component_index])
        ax.set_xlabel('Timepoint')
        ax.set_ylabel('Component Value')
        ax.set_title('Component ' +str(component_index) + ' Values over Stimulus Time for ' + subject + (' stimulus_' + stimulus if stimulus != None else ' Across All Stimuli'))
    return fig

### Main Code Execution

In [11]:
#Populate subject_coord_dict
starting_run('Computations')
for subject in list_subjects():
    subject_coords = pd.DataFrame(load_file(neuron_xyz_dir + subject + '_mat_dict.pickle')['cell_XYZ'], columns=['x','y','z'])
    # normalize coordinates
    norm_coords = normalize_subject_coordinates(subject_coords)
    bin_coords(norm_coords)
    #assign bins to each coordinate
    subject_coord_dict[subject] = {
        'xyz': subject_coords,
        'xyz_norm': norm_coords[['x','y','z']],
        'bin_labels': norm_coords[['bin']]
    }

Starting Computations 19:30:19.656533


In [12]:
bin_scores = []
for row in model_df.itertuples():
    model = load_file(row.model_path)
    loadings = calc_component_loadings(model)
    bin_labels = subject_coord_dict[row.subject]['bin_labels'][:loadings.shape[0]].loc[:, 'bin']
    bin_scores = calc_bin_scores(bin_labels, loadings)
    plotting_list.append({
        'subject': row.subject,
        'stimulus': row.stimulus,
        'x': subject_coord_dict[row.subject]['xyz'][['x']][:loadings.shape[0]],
        'y': subject_coord_dict[row.subject]['xyz'][['y']][:loadings.shape[0]],
        'z': subject_coord_dict[row.subject]['xyz'][['z']][:loadings.shape[0]],
        'loadings': loadings,
        'bins': bin_labels,
        'bin_scores': bin_scores,
        'component_fxns': model.transform(load_file(row.orig_data_path)),
        'save_path': 'figures/'+str(row.subject)+('_stimulus_' + row.stimulus if row.stimulus != None else '')+'_component_loading_plots.PNG'
    })
pickle_file('data_synthesized_pickled/geo_plotting_data.pickle', plotting_list)
finished_run('Computations')

Finished Computations 19:31:42.452432


In [24]:
plotting_list[0]['bin_scores']

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
859.0,-1.225337,-1.686673,-1.670435,-1.749318,-1.278705,-1.600191,-1.347184,-1.499326,-1.261266,-0.736511
860.0,0.068027,-0.461696,-0.593528,0.177862,-0.237793,-0.735562,1.093486,-0.219474,0.342483,1.340553
861.0,0.819674,0.046434,-0.152355,0.447555,0.329137,-0.009518,1.458340,0.451164,1.015309,2.387946
862.0,0.377580,0.085747,-0.423961,0.688114,0.030047,-0.043670,0.823522,-0.314502,0.723282,1.427799
863.0,-1.060103,-1.801966,-1.483616,-1.427408,-1.158318,-1.752293,-1.157734,-1.530487,-1.353661,-0.917071
...,...,...,...,...,...,...,...,...,...,...
2345.0,-0.835749,-1.033544,-0.937831,-0.676775,-0.786645,-0.542524,-1.095558,-0.807885,-1.081916,-0.843764
2346.0,-0.793740,-1.005180,-0.504489,-0.316411,-0.633489,-0.749396,-1.046807,-0.868710,-0.454954,-0.580370
2382.0,-0.978108,-1.013960,-0.564578,-0.917332,-0.666027,-0.879910,-1.096505,-0.982513,-0.695547,-0.838296
2383.0,-0.283861,0.055718,0.851395,1.034270,0.698694,-0.082625,-0.191481,-0.568307,0.442090,0.094276


### Figures
Figures below are organized as a specific set of figures repeated serially for each subject-stimulus pair:
##### Figure 1. Loadings of Components on Individual Neurons
Each point plotted corresponds to a neuron, located at the X, Y, and Z coordinates provided in original data. The plotted dataset creates a visualization of the fish facing leftwards into the page. Each neuron is colored based on the component on which it loads most heavily over the observed time-period. **Note that colors assigned to components remain constant throughout these figures**
##### Figure 2. Loadings of Components on Standardized Neuron Bins
Here each neuron is plotted according to the X, Y, and Z values given for it, but is colored according to the component loaded most heavily for its particular bin. Bins containing less than 50 neurons were not included in further analyses, and neurons corresponding to those bins are not shown in this figure.
##### Figure 3. Aggregate Graph of Component Functions
A graph of the values predicted by each component at each timepoint during the observed time period. These component "functions" are all placed on the same graph for comparison of their values over time relative to one another.
##### Figures 4 -> 13. Individual Graphs of Component Functions
Each component's predicted values over the timeseries are plotted individually.

In [13]:
for plot_dict in plotting_list:
    fig = plt.figure(figsize=(20, 50))
    fig = plot_neuron_loadings(
        plot_dict['x'],
        plot_dict['y'],
        plot_dict['z'],
        plot_dict['loadings'],
        plot_dict['subject'],
        plot_dict['stimulus'],
        fig
    )
    fig = plot_bin_loadings(
        plot_dict['x'],
        plot_dict['y'],
        plot_dict['z'],
        plot_dict['bins'],
        plot_dict['bin_scores'],
        plot_dict['subject'],
        plot_dict['stimulus'],
        fig
    )
    fig = plot_components(
        plot_dict['component_fxns'],
        plot_dict['subject'],
        plot_dict['stimulus'],
        fig
    )
    fig.tight_layout()
    plt.savefig('../data/' + plot_dict['save_path'])
    plt.close()