# Intrinsic properties - Clustering with KMeans

### K-Means Clustering with scikit-learn

<img src="images/Cluster_KMeans.png" alt="Drawing" style="width: 350px;"/>

In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt


data = pd.read_csv("../datasets/Project_datasets/intrinsic_cluster_analysis.csv")
cleaned_data = data.drop(['Unnamed: 0'], axis = 1) #drop unwanted columns
cleaned_data.rename(columns={'Unnamed: 6':'Putative Cell Type'}, inplace=True) #Rename Column with putative cell type
cleaned_data.loc[0:16, 'Putative Cell Type'] = 'VIP+'
cleaned_data.loc[17:45, 'Putative Cell Type'] = 'NDNF-Like'
cleaned_data.loc[46:, 'Putative Cell Type'] = 'Jini Cells'
print(cleaned_data.info())
cleaned_data.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Delay               67 non-null     float64
 1   Rheobase            67 non-null     float64
 2   Rin                 67 non-null     float64
 3   tau                 67 non-null     float64
 4   Max                 67 non-null     float64
 5   Putative Cell Type  67 non-null     object 
dtypes: float64(5), object(1)
memory usage: 3.3+ KB
None


Unnamed: 0,Delay,Rheobase,Rin,tau,Max,Putative Cell Type
0,27.9,75.8,258.0,12.3,143.0,VIP+
1,331.9,44.8,412.0,13.4708,66.0,VIP+
2,134.8,36.0,536.5,23.9807,64.0,VIP+
3,176.8,26.6,692.0,27.1597,25.0,VIP+
4,41.42,104.0,318.632,13.0589,30.0,VIP+


In [2]:
from sklearn.preprocessing import StandardScaler

data_scaler = StandardScaler()

df = pd.DataFrame(cleaned_data.iloc[:,:5]) #dropping putative cell type as scaling will affect it weirdly
print (df.head())

scaled_data = data_scaler.fit_transform(df)
print ('Scaled data shape is',scaled_data.shape)

    Delay  Rheobase      Rin      tau    Max
0   27.90      75.8  258.000  12.3000  143.0
1  331.90      44.8  412.000  13.4708   66.0
2  134.80      36.0  536.500  23.9807   64.0
3  176.80      26.6  692.000  27.1597   25.0
4   41.42     104.0  318.632  13.0589   30.0
Scaled data shape is (67, 5)


In [3]:
df_scaled_data = pd.DataFrame(scaled_data)
df_scaled_data.rename(columns={0:'Delay', 1:'Rheobase', 2:'Input Resistance', 3:'Decay Tau', 4:'Max Firing'}, inplace=True) 
df_scaled_data.head()

Unnamed: 0,Delay,Rheobase,Input Resistance,Decay Tau,Max Firing
0,-1.597525,-0.532602,-0.435153,-0.318385,4.578711
1,0.701864,-0.944776,0.31213,-0.156236,1.002232
2,-0.788957,-1.06178,0.916265,1.299325,0.909337
3,-0.471278,-1.186761,1.670826,1.739598,-0.902127
4,-1.495263,-0.157658,-0.140937,-0.213282,-0.669888


In [4]:
OMP_NUM_THREADS = 1

In [5]:
from sklearn.cluster import KMeans
X = df_scaled_data.iloc[:46,:]

kmeans = KMeans(n_clusters=2, random_state=42, n_init="auto")
kmeans.fit(X)



In [6]:
X1 = df_scaled_data
cluster_labels = kmeans.predict(X1)
cluster_labels

array([0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0])

In [7]:
df = pd.concat ([cleaned_data, pd.DataFrame(cluster_labels, columns =['KMeans_ncluster_2'])], axis=1)
df.tail() #Output cluster classification

Unnamed: 0,Delay,Rheobase,Rin,tau,Max,Putative Cell Type,KMeans_ncluster_2
62,332.633343,90.67,192.53,5.62,56.0,Jini Cells,0
63,193.566657,146.67,270.96,11.89,31.0,Jini Cells,0
64,48.46667,92.67,231.92,9.09,38.4,Jini Cells,0
65,21.16667,412.67,127.94,12.01,0.0,Jini Cells,0
66,198.9,80.0,217.55,8.43,33.2,Jini Cells,0


In [8]:
df['KMeans_ncluster_2'].replace({0:'NDNF-Like', 1:'VIP+'}, inplace= True)
df['KMeans_ncluster_2'].head()

0    NDNF-Like
1    NDNF-Like
2         VIP+
3         VIP+
4    NDNF-Like
Name: KMeans_ncluster_2, dtype: object

sns.scatterplot(df,
            x = 'Rheobase',
            y = 'Rin',
            hue = 'KMeans_ncluster_2',
            palette = 'viridis'
           )

In [13]:
import plotly.express as px

fig1 = px.scatter_3d(df, 
                     x='Max', 
                     y='Delay',
                     z='Rin',
                     color='KMeans_ncluster_2',
                     labels={
                    'Max': 'Max Firing (# of APs)',
                     'Delay': 'Latency to 1st AP (ms)',
                     'Rin': 'Input Resistance (MOhms)'
                     },
             color_discrete_sequence=['#221150', '#d3436e'],
                    )
fig1.update_layout(
    scene = dict(
        xaxis = dict(nticks=4, backgroundcolor='rgb(254,217,166)',
                         gridcolor="white",
                         showbackground=True,
                         zerolinecolor="white",),
                     yaxis = dict(nticks=4, backgroundcolor='rgb(253,218,236)',
                        gridcolor="white",
                        showbackground=True,
                        zerolinecolor="white",),
                     zaxis = dict(nticks=4, backgroundcolor='rgb(222,203,228)',
                        gridcolor="white",
                        showbackground=True,
                        zerolinecolor="white",),
         
       ),
    font = dict(
        family="Times New Roman",
        size = 12),
    
    width=700,
    margin=dict(r=20, l=10, b=10, t=10))
fig1.update_traces(marker=dict(size=5))

fig1.show()

fig1.write_image("images/fig1.png")

In [14]:
df.columns

Index(['Delay', 'Rheobase', 'Rin', 'tau', 'Max', 'Putative Cell Type',
       'KMeans_ncluster_2'],
      dtype='object')

In [28]:
fig1 = px.scatter_3d(df, 
                     x='tau', 
                     y='Rheobase',
                     z='Rin',
                     color='KMeans_ncluster_2',
                     labels={
                    'tau': 'Decay Tau',
                     'Rheobase': 'Rheobase',
                     'Rin': 'Input Resistance (MOhms)'
                     },
             color_discrete_sequence=['#221150', '#d3436e'],
                    )
fig1.update_layout(
    scene = dict(
        xaxis = dict(nticks=4, backgroundcolor='rgb(254,217,166)',
                         gridcolor="white",
                         showbackground=True,
                         zerolinecolor="white",),
                     yaxis = dict(nticks=4, backgroundcolor='rgb(253,218,236)',
                        gridcolor="white",
                        showbackground=True,
                        zerolinecolor="white",),
                     zaxis = dict(nticks=4, backgroundcolor='rgb(222,203,228)',
                        gridcolor="white",
                        showbackground=True,
                        zerolinecolor="white",),
         
       ),
    font = dict(
        family="Times New Roman",
        size = 12),
    
    width=700,
    margin=dict(r=20, l=10, b=10, t=10))
fig1.update_traces(marker=dict(size=5))

fig1.show()

In [10]:
print(px.colors.qualitative.Pastel1)

['rgb(251,180,174)', 'rgb(179,205,227)', 'rgb(204,235,197)', 'rgb(222,203,228)', 'rgb(254,217,166)', 'rgb(255,255,204)', 'rgb(229,216,189)', 'rgb(253,218,236)', 'rgb(242,242,242)']
