In [16]:
# Version 2.2 (automated removal experiments) - new PN Activity representation (binomial dist.)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy import random
import pickle
import scipy.stats as st
import copy

# read csv 
df = pd.read_csv('connectivity matrix table 1.csv') # matrix of connections between neurons

# FUNCTIONS

# Filters dataframe based on neuron type. 
# Establishes a synapse threshold of greater than 1
# data - the dataframe
# col_substr - regex to select for substrings that represent neuron type in columns
# row_substr - regex to select for substrings that represent neuron type in rows
# row_header - header for indices
def filter_neuron_types(data, col_substr, row_substr, idx_header):
    first_column = df.iloc[:, 0] # gets first column to use as row indices
    # columns
    new_df = data.filter(regex=col_substr, axis='columns') 
    
    # rows
    new_df.insert(0, idx_header, first_column, True) # inserts column with row labels into the new dataframe
    new_df.set_index(idx_header, inplace=True) # sets inserted column as dataframe indicies
    new_df = new_df.filter(regex=row_substr, axis='index')
    
    # establishing a threshold of num synapses > 1 to count as a connection
    num_rows = len(new_df)
    # loops thru rows of new_df
    for i in range(num_rows):
        row = new_df.iloc[i] # gets row
        new_row = np.where(row == 1, 0, row) # if synaspse num = 1, change to 0
        new_df.iloc[i] = new_row
    
    return new_df

# Normalizes weights by column's sum in a pandas df
# df - dataframe
# sum_of_w - the sum of the weights
def normalize_weights_df(df, sum_of_w):
    col = len(df.columns)
    #df.iloc[:,0:col] = df.iloc[:,0:col].apply(lambda x: x / np.linalg.norm(x), axis=0)
    #df = normalize(df, axis=0, norm='l1') # normalize by column to sum of 1
    for c in range(col):
        w = df.iloc[:,c]
        s = sum(abs(w)) # sum of column
        if (s != 0):
            df.iloc[:,c] = [i/s for i in w]
    return sum_of_w * df

# Normalizes weights by column's sum in a numpy array
# arr - numpy array
# sum_of_w - the sum of the weights
def normalize_weights_np(arr, sum_of_w):
    col = len(arr[0])
    #df.iloc[:,0:col] = df.iloc[:,0:col].apply(lambda x: x / np.linalg.norm(x), axis=0)
    #df = normalize(df, axis=0, norm='l1') # normalize by column to sum of 1
    for c in range(col):
        w = arr[:,c]
        s = sum(abs(w)) # sum of column
        if (s != 0):
            arr[:,c] = [i/s for i in w]
    return sum_of_w * arr

# normalizes weights by the row's sum in a pandas df
def normalize_kc_weights_df(df, sum_of_w):
    row = len(df.index)
    for r in range(row):
        w = df.iloc[r]
        s = sum(abs(w)) # sum of row
        if (s != 0):
            df.iloc[r] = [i/s for i in w]
    return sum_of_w * df

# normalizes weights by the row's sum in a numpy array
def normalize_kc_weights_np(arr, sum_of_w):
    row = len(arr)
    for r in range(row):
        w = arr[r]
        s = sum(abs(w)) # sum of row
        if (s != 0):
            arr[r] = [i/s for i in w]
    return sum_of_w * arr

# transforms weight matrix into doubly stochastic matrix (rows and columns both sum to 1)
def transform_doubly_stochastic_df(df):
#     df = df.abs()
    row = len(df.index)
    col = len(df.columns)
    tol = 0.005
    done = False
    sum_r = np.zeros(row)
    sum_c = np.zeros(col)
    while not done:
        # normalize rows
        for r in range(row):
            w = df.iloc[r]
            s = sum(abs(w)) # sum of row
            if (s != 0):
                df.iloc[r] = [i/s for i in w]
                
        # normalize columns
        for c in range(col):
            w = df.iloc[:,c]
            s = sum(abs(w)) # sum of column
            if (s != 0):
                df.iloc[:,c] = [i/s for i in w]
                
        # check if rows and columns sum to 1
        sum_r = df.sum(axis=1)
        c1 = np.all(abs(sum_r - 1) <= tol)
        
        sum_c = df.sum(axis=0)
        c2 = np.all(abs(sum_c - 1) <= tol)
            
        if c1 == True and c2 == True:
            done = True
    return df

# Sets the bias for a single KC so that the KC fires 5% of 
# the time in response to any odor
# inputs - array of all points from all odors
# weights - array of weights (aka synapses) for a single KC
def set_kc_bias(inputs, weights):
    I = len(inputs)
    bias = 0
    
    kc_output = np.zeros((I,)) # array to store output values
    
    # loops thru all inputs and calculates and stores the KC output value
    for i in range(I):
        point = inputs[i]
        #print("point:\n{}, length: {}".format(point, len(point)))

        # equation
        x = np.dot(point, weights) # no bias yet (theta=0)
        #print("output value: {}".format(x))

        kc_output[i] = x  # add output value to output array
    
    kc_output.sort() # sort output values in ascending order
    #print("output values of single KC after sorting:\n{} \nlength: {}".format(kc_output, len(kc_output)))
    idx = int(0.05 * len(kc_output)) # getting index for the threshold that'll provide a 5% firing rate (out of 5000, this is the 251th output value with index of 250)
    #print("index: ", idx)
    bias = kc_output[idx]
    #print("bias: ", bias)
    
    return bias

# Calculates output of KC layer
# weights - weights (num of synapses) for connections from PNs to KCs
# biases - biases
# inputs - inputs from PNs
def build_pn_to_kc(inputs, weights, biases):
    
    # equation
    x1 = np.dot(inputs, weights) + biases # matrix multiplication to multiply inputs by weights
    #print("after matrix multiplication:\n{}, \nlength: {}".format(x1, len(x1)))

    # thresholding
    x1 = np.where(x1 > 0, 1, 0)
    #print("after thresholding:\n{}, \nlength: {}".format(x1, len(x1)))
    
    return x1

# Calculates output of MBON layer
# weights - weights (num of synapses) from KC to MBON
# inputs - inputs from KCs
# bias - biases
def build_kc_to_mbon(inputs, weights, bias):
    
    # equation
    x2 = np.dot(inputs, weights) + bias
    #print("after matrix multiplication:\n{}, \nnum of rows: {}".format(x2, len(x2)))

    # thresholding
    x2 = np.where(x2 > 0, 1, 0)
    #print("after thresholding:\n{}, \nlength: {}".format(x2, len(x2)))
    
    return x2

# Randomly prunes a certain number of pre-synaptic KCs and returns the new weight table (df) and the names of KCs with
# connections set to zero in each MBON
# df - synapse table
# num - number of KCs to remove
def prune_rand_kcs(df, num):
    num_mbons = len(df.columns)
    kc_names = []
    
    # loop thru each column = MBON
    for m in range(num_mbons):        
        idxs_with_conn = np.asarray(np.where(df.iloc[:,m] > 0)).flatten() # gets indicies where there is a connection to a KC
        
        # randomly select KC connections to be removed
        s = num
        if (len(idxs_with_conn) < num): # if less KC connections than num to remove, then remove all 
            s = len(idxs_with_conn)
        indicies = np.random.choice(a=idxs_with_conn, size=s, replace=False)
        
        kc_names.append(df.index[indicies]) # record names of KCs
        
        for i in indicies:
            df.iloc[i, m] = 0
    
    df = df.to_numpy()
    return df, kc_names

# ORIGINAL prune_top_kcs function
# Targeted pruning prunes KC connections for each MBON based on their synaptic weight value
# Returns the new weight table (df) and the names of KCs with connections set to zero in each MBON
# df - synapse table
# num - number of KCs to remove
def prune_top_kcs(prev_w, curr_w, num, run):
    num_mbons = 19
    kc_names = []
#     print(prev_w.shape)
    
    # loop thru each column = MBON
    for m in range(0,num_mbons):
        vals = abs(prev_w[run,:,m]) # absolute value of column
        idxs_with_conn = np.asarray(np.where(vals > 0)).flatten() # gets indicies where there is a connection to a KC
        
        # find KC connections with highest weight values to remove
        s = num
        if (len(idxs_with_conn) < num): # if less KC connections than num to remove, then remove all 
            s = len(idxs_with_conn)
        top_weight_idx = np.argpartition(vals, -1*s)[-1*s:] # gets indicies of top #num KC connections
        
        kc_names = list(curr_w.index[top_weight_idx]) # names of KC connections pruned
    
#         curr_w = curr_w.to_numpy()
        curr_w.iloc[top_weight_idx, m] = 0 # sets weight values of KC connections to be removed to zero

    return curr_w.to_numpy(), kc_names
        

# Randomly ablates a certain number of pre-synaptic KCs and returns the new weight table (df) and the names of KCs with
# connections set to zero in each MBON
# df - synapse table
# num - number of KCs to remove
def ablate_rand_kcs(df, num):
    num_mbons = len(df.columns)
    num_kcs = len(df)   
        
    # randomly select KC connections to be removed
    indicies = random.choice(num_kcs, size=num, replace=False)
    kc_names = list(df.index[indicies]) # record names of KCs
    # sets each row (= each KC) in indices to zero
    for i in indicies:
        df.iloc[i, :] = 0
    
    df = df.to_numpy()
    return df, kc_names

# Sets the all synaptic weight values of the top certain number of KCs to zero. Returns new weight table with
# KCs removed and a list of names of the KCs removed
# prev_w - weight table from previous run (np arr)
# curr_w - current weight table (df as input, return as np array)
# num - the number of KCs' connections to remove
# run - the realization (eg. 0, 1, 2...)
def ablate_top_kcs(prev_w, curr_w, num, run):
    sum_weights = abs(prev_w[run]).sum(axis=1) # total absolute value of final weights for each KC 
    top_weight_idx = np.argpartition(sum_weights, -1*num)[-1*num:]
    
    # record which KCs are removed
    kc_names = list(curr_w.index[top_weight_idx])
    
    curr_w = curr_w.to_numpy()
    curr_w[top_weight_idx,:] = 0 # sets weight values of KCs to be removed to zero
    
    return curr_w, kc_names

# Sets the all synaptic weight values of the top certain number (relative to the number of connections) of KCs to zero. Returns new weight table with
# KCs removed and a list of names of the KCs removed
# prev_w - weight table from previous run (np arr)
# curr_w - current weight table (df as input, return as np array)
# num - the number of KCs' connections to remove
# run - the realization (eg. 0, 1, 2...)
def ablate_top_kcs_rel(prev_w, curr_w, num, run, kc_mbon):
    sum_weights = abs(prev_w[run]).sum(axis=1) # total absolute value of final weights for each KC 
    num_nonzeros = (kc_mbon != 0).astype(int).sum(axis=1)
    rel_sum_weights = sum_weights.astype(float) / num_nonzeros.astype(float)
    top_weight_idx = np.argpartition(rel_sum_weights, -1*num)[-1*num:]
    
    # record which KCs are removed
    kc_names = list(curr_w.index[top_weight_idx])
    
    curr_w = curr_w.to_numpy()
    curr_w[top_weight_idx,:] = 0 # sets weight values of KCs to be removed to zero
    
    return curr_w, kc_names

In [18]:
# FILTER DATA FOR KCs TO MBONs CONNECTIONS -> kc_to_mbon
kc_mbon = filter_neuron_types(df, "(?=.*MBON)(?=.*left)", "(?=.*KC)(?=.*left)", "Presynaptic KCs") # filter w/ regex for KCs and MBON lefts

# take out MBONs with no KC connections
kc_mbon = kc_mbon.drop(kc_mbon.loc[:, (kc_mbon.sum(axis=0) == 0)], axis=1)

# adding a fictional MBON
fict_weights = np.full(len(kc_mbon), 0.1) # initializing all weights at 0.1
kc_mbon['MBON-fict'] = fict_weights

# normalizing weights
kc_mbon = transform_doubly_stochastic_df(kc_mbon)
# kc_mbon = kc_mbon.fillna(0)

# # labelling KCs
# for i in range(len(kc_mbon.index)):
#     kc_mbon.index.values[i] = kc_mbon.index.values[i] + ' {}'.format(i)
# # kc_mbon.rename(index=idx + ' {}'.format(0), inplace=True)

# kc_mbon

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Programs\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-18-e67b8364b865>", line 12, in <module>
    kc_mbon = transform_doubly_stochastic_df(kc_mbon)
  File "<ipython-input-16-80fb8f3b393c>", line 111, in transform_doubly_stochastic_df
    df.iloc[:,c] = [i/s for i in w]
  File "C:\Programs\anaconda3\lib\site-packages\pandas\core\indexing.py", line 692, in __setitem__
    iloc._setitem_with_indexer(indexer, value, self.name)
  File "C:\Programs\anaconda3\lib\site-packages\pandas\core\indexing.py", line 1637, in _setitem_with_indexer
    self._setitem_single_block(indexer, value, name)
  File "C:\Programs\anaconda3\lib\site-packages\pandas\core\indexing.py", line 1843, in _setitem_single_block
    self.obj[item_labels[indexer[info_axis]]] = value
  File "C:\Programs\anaconda3\lib\site-packages\pandas\core\frame.py", line 3163, 

TypeError: object of type 'NoneType' has no len()

In [None]:
kc_mbon

In [15]:
kc_mbon.sum(axis=1).to_numpy()

array([0.27269318, 0.24282569, 0.31546958, 0.23599444, 0.24357171,
       0.30614983, 0.27071439, 0.25623998, 0.24486724, 0.52731221,
       0.26183797, 0.25626968, 0.29474774, 0.34490174, 0.21793435,
       0.22655267, 0.28949145, 0.26391619, 0.26030507, 0.22744075,
       0.24379965, 0.19632163, 0.29310705, 0.27486326, 0.2456968 ,
       0.25573616, 0.31532432, 0.27866246, 0.14371549, 0.19772276,
       0.27259057, 0.19112358, 0.235331  , 0.6076217 , 0.20288129,
       0.62175066, 0.21301584, 0.19218737, 0.26192642, 0.26240715,
       0.2051365 , 0.39597467, 0.19978401, 0.11691821, 0.20917748,
       0.2136352 , 0.24241267, 0.16757324, 0.23785013, 0.26512284,
       0.20153689, 0.21675817, 0.25382077, 0.2783158 , 0.17552772,
       0.25450533, 0.22556028, 0.23126695, 0.23800786, 0.25890302,
       0.02540233, 0.19977348, 0.2169424 , 0.25855065, 0.22154557,
       0.25752809, 0.1723222 , 0.24502612, 0.02540233, 0.14781326,
       0.14781326, 0.19793822, 0.29124725, 0.02540233, 0.02540