In [6]:
%%file utility.py

import math
import numpy as np

def get_entorpy(f_rate0, f_rate1):
    par_m = 24
    par_lambda = 250
    bias = 1/(2*par_m*par_lambda)
    entropy_relative = []
    assert len(f_rate0) == len(f_rate1)
    count_valid_f_rate = 0
    for i in range(len(f_rate0)):
        if np.isnan(f_rate0[i]) or np.isnan(f_rate1[i]):
            pass
        else:
            if f_rate0[i] > bias:
                entropy_relative_neu_i = f_rate0[i]*math.log((f_rate0[i] - bias)/(f_rate1[i] + bias)) - f_rate0[i] + f_rate1[i]
                entropy_relative.append(entropy_relative_neu_i)
            else:
                entropy_relative.append(f_rate1[i])
            count_valid_f_rate += 1
    return np.sum(entropy_relative)/count_valid_f_rate


def get_dist_l1(f_rate0, f_rate1):
    dist_l1 = []
    assert len(f_rate0) == len(f_rate1)
    count_valid_f_rate = 0
    for i in range(len(f_rate0)):
        if np.isnan(f_rate0[i]) or np.isnan(f_rate1[i]):
            pass
        else:
            dist_l1_neuron_i = abs(f_rate0[i] - f_rate1[i])
            dist_l1.append(dist_l1_neuron_i)
        count_valid_f_rate += 1

    return np.sum(dist_l1)/len(f_rate1)


def get_avg_search_times(react_times, base_line):
    return np.nanmean(react_times) - base_line


Overwriting utility.py


In [7]:
%%file gamma_fit.py

import random
import numpy as np
import math
from matplotlib import pyplot as plt
from random import shuffle
import itertools
from scipy import stats


class Gamma_Dist_Fitter:

    def __init__(self, look_up_time_data):
        self._look_up_time_data = look_up_time_data
        self._grps_count = look_up_time_data.shape[1]
        self._rand_grps = None
        self._nors_mean_list = []
        self._nor_stddev_list = []
        self._shape_val = None
        self._rate_val = None

    def select_grps_randomly(self):
        random_list = []
        while len(random_list) < self._grps_count//2:
            random_number = random.randint(0, self._grps_count-1)
            if random_number not in random_list:
                random_list.append(random_number)
        print('Random Numbers group :: ', random_list)
        self._rand_grps = random_list

    def mean_sd_rand_grps(self):
        for idx in self._rand_grps:
            srch_time_coli = np.array(self._look_up_time_data.values[:, idx][2:]).astype(np.float64)
            mean_ = np.nanmean(srch_time_coli)
            stddev_ = math.sqrt(np.nanvar(srch_time_coli))
            self._nors_mean_list.append(mean_)
            self._nor_stddev_list.append(stddev_)
        print('Means :: ', self._nors_mean_list)
        print('Standard Deviations :: ', self._nor_stddev_list)

    def mean_vs_sd_plot(self):
        ax = plt.subplot(111)
        plt.xlabel('Mean')
        plt.ylabel('Standard Deviation (sd)')
        ax.scatter(self._nors_mean_list, self._nor_stddev_list, color='r')
        plt.savefig('../plots/gamma_sd_mean.png')
        print("Plot Gamma std dev is saved.\n")
        plt.close()

    def find_shape_para_values(self):
        para_line = np.polyfit(self._nors_mean_list, self._nor_stddev_list, deg=1, full=True)
        self._shape_val = 1/math.pow(para_line[0][0], 2)
        print('Shape parameter :: ', self._shape_val)

    def find_rate_para_and_kolmogorov_stat(self):
        groups_left_outs = []
        nors_mean_list = []
        var_value_list = []
        cdf_values = []
        for idx in range(self._grps_count):
            if idx not in self._rand_grps:
                groups_left_outs.append(idx)

        for idx in groups_left_outs:
            srch_time_coli = np.array(self._look_up_time_data.values[:, idx][2:]).astype(np.float64)
            clean_srch_time_coli = [time for time in srch_time_coli if str(time) != 'nan']
            shuffle(clean_srch_time_coli)
            rdmized_search_times = clean_srch_time_coli[0:len(clean_srch_time_coli)//2]
            cdf_values.append(clean_srch_time_coli[len(clean_srch_time_coli)//2:len(clean_srch_time_coli)])
            mean_ = np.nanmean(rdmized_search_times)
            variance_ = np.nanvar(rdmized_search_times)
            nors_mean_list.append(mean_)
            var_value_list.append(variance_)


        para_line = np.polyfit(nors_mean_list, var_value_list, deg=1, full=True)
        self._rate_val = 1/para_line[0][0]
        print('Rate parameter :: ', self._rate_val)

        cdf_values = list(itertools.chain.from_iterable(cdf_values))
        sorted_cdf_value = np.sort(cdf_values)
        
        # Plot empirical gamma distribution
        y_cdf_emp_value = np.arange(len(sorted_cdf_value)) / float(len(sorted_cdf_value) - 1)
        plt.plot(sorted_cdf_value, y_cdf_emp_value)

        # # Plot gamma distribution
        x_gamma_val = np.linspace(0, sorted_cdf_value[-1], 200)
        y_gamma_val = stats.gamma.cdf(x_gamma_val, a=self._shape_val, scale=1/self._rate_val)
        plt.plot(x_gamma_val, y_gamma_val, color='r')
        plt.savefig('../plots/gamma_dist.png')
        plt.close()

        y_pdf = stats.gamma.rvs(size=len(cdf_values), a=self._shape_val, scale=1 / self._rate_val)
        kst_test = stats.ks_2samp(sorted_cdf_value, y_pdf)
        print('Kolmogorov statistic :: ', kst_test)


Overwriting gamma_fit.py


In [8]:
%%file line_fit.py

import numpy as np
import utility
from scipy.stats.mstats import gmean
from matplotlib import pyplot as plt


class LineFit:

    def __init__(self, srch_time_data, fire_rate_data):
        self._srch_time_data = srch_time_data
        self._fire_rate_data = fire_rate_data
        self._avg_search_times = []
        self._entropy_relative_data = []
        self._l1_dist_data = []
        self._inv_srch_time = []
        self._amGmSearchEntropyRatio = None
        self._amGmSearchL1DistanceRation = None

    def find_avg_search_time(self):
        len_search_data = self._srch_time_data.shape[1]
        print('Size of search data list :: ' + str(len_search_data))
        for i in range(len_search_data):
            search_time_col_i = np.array(self._srch_time_data.values[:, i][2:]).astype(np.float64)
            # print(search_time_col_i[2:])
            avg_search_time = utility.get_avg_search_times(search_time_col_i, 328)
            self._avg_search_times.append(avg_search_time)
            self._inv_srch_time = [1000 / search_time for search_time in self._avg_search_times]
        return self._avg_search_times

    def calc_entropy_and_l1_dist(self):
        set_count = 4
        col_cnt_per_set = 6
        for i in range(set_count):
            if i != 3:
                for j in range(col_cnt_per_set // 2):
                    col_idx = i * col_cnt_per_set + 2 * j
                    # print(col_idx)
                    f_rate_0 = np.array(self._fire_rate_data.values[:, col_idx][2:]).astype(np.float64)
                    f_rate_1 = np.array(self._fire_rate_data.values[:, col_idx + 1][2:]).astype(np.float64)
                    ij_relative_entropy = utility.get_entorpy(f_rate_0, f_rate_1)
                    self._entropy_relative_data.append(ij_relative_entropy)
                    ij_l1_distance = utility.get_dist_l1(f_rate_0, f_rate_1)
                    self._l1_dist_data.append(ij_l1_distance)

                    ji_relative_entropy = utility.get_entorpy(f_rate_1, f_rate_0)
                    self._entropy_relative_data.append(ji_relative_entropy)
                    ji_l1_distance = utility.get_dist_l1(f_rate_1, f_rate_0)
                    self._l1_dist_data.append(ji_l1_distance)
            else:
                for j in range(3):
                    col_idx = i * col_cnt_per_set + 2 * j
                    f_rate_0 = np.array(self._fire_rate_data.values[:, col_idx][2:]).astype(np.float64)
                    f_rate_1 = np.array(self._fire_rate_data.values[:, col_idx + 2][2:]).astype(np.float64)
                    ij_relative_entropy_1 = utility.get_entorpy(f_rate_0, f_rate_1)
                    ij_l1_distance_1 = utility.get_dist_l1(f_rate_0, f_rate_1)
                    ji_relative_entropy_1 = utility.get_entorpy(f_rate_1, f_rate_0)
                    ji_l1_distance_1 = utility.get_dist_l1(f_rate_1, f_rate_0)

                    f_rate_0 = np.array(self._fire_rate_data.values[:, col_idx][2:]).astype(np.float64)
                    f_rate_1 = np.array(self._fire_rate_data.values[:, col_idx + 3][2:]).astype(np.float64)
                    ij_relative_entropy_2 = utility.get_entorpy(f_rate_0, f_rate_1)
                    ij_l1_distance_2 = utility.get_dist_l1(f_rate_0, f_rate_1)
                    ji_relative_entropy_2 = utility.get_entorpy(f_rate_1, f_rate_0)
                    ji_l1_distance_2 = utility.get_dist_l1(f_rate_1, f_rate_0)

                    f_rate_0 = np.array(self._fire_rate_data.values[:, col_idx + 1][2:]).astype(np.float64)
                    f_rate_1 = np.array(self._fire_rate_data.values[:, col_idx + 2][2:]).astype(np.float64)
                    ij_relative_entropy_3 = utility.get_entorpy(f_rate_0, f_rate_1)
                    ij_l1_distance_3 = utility.get_dist_l1(f_rate_0, f_rate_1)
                    ji_relative_entropy_3 = utility.get_entorpy(f_rate_1, f_rate_0)
                    ji_l1_distance_3 = utility.get_dist_l1(f_rate_1, f_rate_0)

                    f_rate_0 = np.array(self._fire_rate_data.values[:, col_idx + 1][2:]).astype(np.float64)
                    f_rate_1 = np.array(self._fire_rate_data.values[:, col_idx + 3][2:]).astype(np.float64)
                    ij_relative_entropy_4 = utility.get_entorpy(f_rate_0, f_rate_1)
                    ij_l1_distance_4 = utility.get_dist_l1(f_rate_0, f_rate_1)
                    ji_relative_entropy_4 = utility.get_entorpy(f_rate_1, f_rate_0)
                    ji_l1_distance_4 = utility.get_dist_l1(f_rate_1, f_rate_0)

                    ij_relative_entropy = np.mean([ij_relative_entropy_1, ij_relative_entropy_2, ij_relative_entropy_3,
                                                   ij_relative_entropy_4])
                    ij_l1_distance = np.mean([ij_l1_distance_1, ij_l1_distance_2, ij_l1_distance_3, ij_l1_distance_4])
                    self._entropy_relative_data.append(ij_relative_entropy)
                    self._l1_dist_data.append(ij_l1_distance)

                    ji_relative_entropy = np.mean([ji_relative_entropy_1, ji_relative_entropy_2, ji_relative_entropy_3,
                                                   ji_relative_entropy_4])
                    ji_l1_distance = np.mean([ji_l1_distance_1, ji_l1_distance_2, ji_l1_distance_3, ji_l1_distance_4])
                    self._entropy_relative_data.append(ji_relative_entropy)
                    self._l1_dist_data.append(ji_l1_distance)

        print('Size of relative entropy list :: ' + str(len(self._entropy_relative_data)))
        # print(self._entropy_relative_data)
        print('Size of L1 distance list :: ' + str(len(self._l1_dist_data)))
        # print(self._l1_dist_data)
        return self._entropy_relative_data, self._l1_dist_data

    @staticmethod
    def _fit_straight_line_through_origin(x_, y_):
        x_ = x_[:,np.newaxis]
        _a, _residuals, _, _ = np.linalg.lstsq(x_, y_, rcond=None)
        return _a, _residuals

    def plot_srch_vs_entropy(self):
        ax = plt.subplot(111)
        plt.xlabel('Relative Entropy distance')
        plt.gca().set_ylabel(r'$s^{-1}$')
        ax.scatter(self._entropy_relative_data, self._inv_srch_time, c='red')
        slope_, residual_error_ = LineFit._fit_straight_line_through_origin(np.array(self._entropy_relative_data),
                                                             np.array(self._inv_srch_time))
        print('Slope for relative entropy vs. inverse search time curve :: ', slope_[0])
        print('Residual error for the straight line fit for relative entropy :: ', residual_error_[0])
        ax.plot(self._entropy_relative_data, slope_*self._entropy_relative_data)
        plt.savefig('../plots/relative_entropy.png')
        plt.close()
        # plt.show()

    def plot_srch_vs_l1_dist(self):
        ax = plt.subplot(111)
        plt.xlabel('L1 distance')
        plt.gca().set_ylabel(r'$s^{-1}$')
        ax.scatter(self._l1_dist_data, self._inv_srch_time, c='red')
        slope_, residual_error_ = LineFit._fit_straight_line_through_origin(np.array(self._l1_dist_data),
                                                             np.array(self._inv_srch_time))
        print('Slope for l1 distance vs. inverse search time curve :: ', slope_[0])
        print('Residual error for the straight line fit for l1 distance :: ', residual_error_[0])
        ax.plot(self._l1_dist_data, slope_ * self._l1_dist_data)
        plt.savefig('../plots/l1_distance_plot.png')
        plt.close()

    def calc_am_gm_spread(self):
        search_entropy_product = np.multiply(self._avg_search_times, self._entropy_relative_data)
        search_l1_distance_product = np.multiply(self._avg_search_times, self._l1_dist_data)

        AmProductSearchEntropy = np.mean(search_entropy_product)
        GmProductSearchEntropy = gmean(search_entropy_product)
        print('Arithmetic mean for search * relative entropy :: ' + str(AmProductSearchEntropy))
        print('Geometric mean for search * relative entropy :: ' + str(GmProductSearchEntropy))

        AmProductSearchL1Distance = np.mean(search_l1_distance_product)
        GmProductSearchL1Distance = gmean(search_l1_distance_product)
        print('Arithmetic mean for search * L1 distance :: ' + str(AmProductSearchL1Distance))
        print('Geometric mean for search * L1 distance :: ' + str(GmProductSearchL1Distance))

        self._amGmSearchEntropyRatio = AmProductSearchEntropy / GmProductSearchEntropy
        print('Ratio of AM and GM for search * relative entropy :: ' + str(self._amGmSearchEntropyRatio))

        self._amGmSearchL1DistanceRation = AmProductSearchL1Distance / GmProductSearchL1Distance
        print('Ratio of AM and GM for search * L1 distance :: ' + str(self._amGmSearchL1DistanceRation))

        return self._amGmSearchEntropyRatio, self._amGmSearchL1DistanceRation


Overwriting line_fit.py


In [9]:
%%file main.py

import numpy as np
from scipy.stats.mstats import gmean
import pandas as pd
import line_fit as lf
import gamma_fit as gd


if __name__ == '__main__':
    print('Visual Neuroscience!')
    fire_rate_data = pd.read_csv('../data/02_data_visual_neuroscience_firingrates.csv')
    srch_time_data = pd.read_csv('../data/02_data_visual_neuroscience_searchtimes.csv')

    # Fit straight line
    line_fit_ = lf.LineFit(srch_time_data, fire_rate_data)
    avg_search_times = line_fit_.find_avg_search_time()
    entropy_relative_data, l1_distance_data = line_fit_.calc_entropy_and_l1_dist()
    line_fit_.plot_srch_vs_l1_dist()
    line_fit_.plot_srch_vs_entropy()
    amGmSearchEntropyRatio, amGmSearchL1DistanceRation = line_fit_.calc_am_gm_spread()

    # Fit Gamma Distribution
    gd_fit = gd.Gamma_Dist_Fitter(srch_time_data)
    gd_fit.select_grps_randomly()
    gd_fit.mean_sd_rand_grps()
    gd_fit.mean_vs_sd_plot()
    gd_fit.find_shape_para_values()
    gd_fit.find_rate_para_and_kolmogorov_stat()


Overwriting main.py


In [10]:
!python3 main.py

Visual Neuroscience!
Size of search data list :: 24
Size of relative entropy list :: 24
Size of L1 distance list :: 24
Slope for l1 distance vs. inverse search time curve ::  0.8394302855531001
Residual error for the straight line fit for l1 distance ::  25.873007527365438
Slope for relative entropy vs. inverse search time curve ::  2.1338921372658994
Residual error for the straight line fit for relative entropy ::  7.54286309707597
Arithmetic mean for search * relative entropy :: 518.8512594696418
Geometric mean for search * relative entropy :: 497.861183243305
Arithmetic mean for search * L1 distance :: 1518.6035208675921
Geometric mean for search * L1 distance :: 1345.392274970629
Ratio of AM and GM for search * relative entropy :: 1.0421604996187843
Ratio of AM and GM for search * L1 distance :: 1.1287440467136207
Random Numbers group ::  [15, 9, 16, 19, 18, 14, 20, 6, 11, 3, 2, 1]
Means ::  [669.9305555555555, 635.5277777777778, 567.8472222222222, 1400.3472222222222, 1198.33333333