# Random Forest Clustering.

### GOAL: To find the significant passive and kinetics parameters which contribute in differentiation of different sub-types of neurons.
### Data: npz file generated by executing save_params function in neuroRD ajustador project.

###### STEP-1: Data preparation.

In [4]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
data_arky_1 = np.load('data/fitgp-arky-chan_arky120938.npz')
data_arky_2 = np.load('data/fitgp-arky-chan_arky1382938.npz')
data_arky_3 = np.load('data/fitgp-arky-chan_arky140938.npz')
data_proto_1 = np.load('data/fitgp-proto-chan_proto0792938.npz')
data_proto_2 = np.load('data/fitgp-proto-chan_proto1542938.npz')

In [6]:
print(data_arky_1['paramnames'].shape, data_arky_2['paramnames'].shape, data_arky_3['paramnames'].shape, 
      data_proto_1['paramnames'].shape, data_proto_2['paramnames'].shape) # Verify shapes of datasets

(48,) (48,) (48,) (48,) (48,)


In [7]:
list(data_arky_1.keys())

['params', 'paramnames', 'fitvals', 'features']

In [8]:
data_arky_1['fitvals'][0].shape, data_arky_1['features'].shape

((13,), (15,))

In [9]:
def get_feature_df(np_data):
    features = [text.partition('=')[0] for text in np_data['features'][:-3]]
    features.append(np_data['features'][-3].split(':')[0])
    df = pd.DataFrame(np_data['fitvals'], columns=features)
    df['neuron'] = np_data['features'][-1].split('=')[-1]
    df['model'] = np_data['features'][-2].split('=')[-1]
    return df

In [10]:
arky_features_1 = get_feature_df(data_arky_1)
arky_features_2 = get_feature_df(data_arky_2)
arky_features_3 = get_feature_df(data_arky_3)
proto_features_1 = get_feature_df(data_proto_1)
proto_features_2 = get_feature_df(data_proto_2)

In [11]:
arky_params_1 = pd.DataFrame(data_arky_1['params'], columns = data_arky_1['paramnames'])
arky_params_2 = pd.DataFrame(data_arky_2['params'], columns = data_arky_2['paramnames'])
arky_params_3 = pd.DataFrame(data_arky_3['params'], columns = data_arky_3['paramnames'])
proto_params_1 = pd.DataFrame(data_arky_1['params'], columns = data_arky_1['paramnames'])
proto_params_2 = pd.DataFrame(data_arky_2['params'], columns = data_arky_2['paramnames'])

In [12]:
arky_df_1 = pd.concat([arky_params_1, arky_features_1], axis=1, sort=False)
arky_df_2 = pd.concat([arky_params_2, arky_features_2], axis=1, sort=False)
arky_df_3 = pd.concat([arky_params_3, arky_features_3], axis=1, sort=False)
proto_df_1 = pd.concat([proto_params_1, proto_features_1], axis=1, sort=False)
proto_df_2 = pd.concat([proto_params_2, proto_features_2], axis=1, sort=False)

In [13]:
proto_features_2.shape

(5600, 15)

In [14]:
proto_params_2.shape

(6000, 48)

In [15]:
df = arky_df_1.append(arky_df_2)
df = df.append(arky_df_3)
df = df.append(proto_df_1)
#df = df.append(proto_df_2) # features and parameters individual rows didn't match.
df.shape

(25200, 63)

###### STEP-2: Random forest classification model training.

In [16]:
df

Unnamed: 0,junction_potential,RA,RM,CM,Eleak,Cond_KDr_0,Cond_KDr_1,Cond_KDr_2,Cond_Kv3_0,Cond_Kv3_1,...,spike_time_fitness,spike_width_fitness,spike_height_fitness,spike_count_fitness,spike_ahp_fitness,ahp_curve_fitness,spike_range_y_histogram_fitness,total,neuron,model
0,-0.014233,0.103845,0.734922,0.010075,-0.050792,49.409317,6.840888,175.201521,397.959202,118.260811,...,0.147647,0.174661,0.001113,0.188789,0.152621,0.652962,1.231985,2.495253,arky,gp


In [20]:
print('Model: ',pd.unique(df['model']),'\n','Neuron_types: ', pd.unique(df['neuron']))

Model:  ['gp'] 
 Neuron_types:  ['arky' 'proto']
