## y70 Retinoblastoma Cells Clustering
* Agglomerative Clustering
* K-Menas Clustering
* Affinity Clustering
* Guassian Mixture Model Clustering

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation
from sklearn.mixture import GaussianMixture 
import os

In [None]:
os.listdir("../input/y79-retinoblastoma-cells")

In [None]:
data = pd.read_csv("../input/y79-retinoblastoma-cells/Y79_data.csv")
data[0:2]

In [None]:
data.info()

In [None]:
data2=data[['diameter','Vmb']]

# Agglomerative Clustering

In [None]:
def doAgglomerative(X, nclust=2):
    model = AgglomerativeClustering(n_clusters=nclust,affinity='euclidean',linkage='ward')
    clust_labels1 = model.fit_predict(X)
    return (clust_labels1)

clust_labels1 = doAgglomerative(data2,8)
agglomerative = pd.DataFrame(clust_labels1)
data2.insert((data2.shape[1]),'agglomerative',agglomerative)

In [None]:
data2.groupby('agglomerative').mean()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
scatter = ax.scatter(data2['diameter'],data2['Vmb'],c=agglomerative[0],s=50)
ax.set_title('Agglomerative Clustering')
ax.set_xlabel('diameter')
ax.set_ylabel('Vmb')
plt.colorbar(scatter)

# K-Menas Clustering

In [None]:
del data2
data2=data[['diameter','Vmb']]

In [None]:
def doKmeans(X, nclust=2):
    model = KMeans(nclust)
    model.fit(X)
    clust_labels = model.predict(X)
    cent = model.cluster_centers_
    return (clust_labels, cent)

clust_labels, cent = doKmeans(data2,8)
kmeans = pd.DataFrame(clust_labels)
data2.insert((data2.shape[1]),'kmeans',kmeans)

In [None]:
data2.groupby('kmeans').mean()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
scatter = ax.scatter(data2['diameter'],data2['Vmb'],c=kmeans[0],s=50)
ax.set_title('K-Means Clustering')
ax.set_xlabel('diameter')
ax.set_ylabel('Vmb')
plt.colorbar(scatter)

# Affinity Clustering

In [None]:
del data2
data2=data[['diameter','Vmb']]

In [None]:
def doAffinity(X):
    model = AffinityPropagation(damping=0.5,max_iter=250,affinity='euclidean')
    model.fit(X)
    clust_labels2 = model.predict(X)
    cent2 = model.cluster_centers_
    return (clust_labels2, cent2)

clust_labels2, cent2 = doAffinity(data2)
affinity = pd.DataFrame(clust_labels2)
data2.insert((data2.shape[1]),'affinity',affinity)

In [None]:
data2.groupby('affinity').mean()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
scatter = ax.scatter(data2['diameter'],data2['Vmb'],c=affinity[0],s=50)
ax.set_title('Affinity Clustering')
ax.set_xlabel('diameter')
ax.set_ylabel('Vmb')
plt.colorbar(scatter)

# Guassian Mixture Model Clustering

In [None]:
del data2
data2=data[['diameter','Vmb']]

In [None]:
def doGMM(X, nclust=2):
    model = GaussianMixture(n_components=nclust,init_params='kmeans')
    model.fit(X)
    clust_labels3 = model.predict(X)
    return (clust_labels3)

clust_labels3 = doGMM(data2,8)
gmm = pd.DataFrame(clust_labels3)
data2.insert((data2.shape[1]),'Guassian',gmm)

In [None]:
data2.groupby('Guassian').mean()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
scatter = ax.scatter(data2['diameter'],data2['Vmb'],c=gmm[0],s=50)
ax.set_title('Affinity Clustering')
ax.set_xlabel('diameter')
ax.set_ylabel('Vmb')
plt.colorbar(scatter)