### Importing Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import os
import time
import math
import seaborn as sns
import matplotlib.pyplot as plt
import random
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch 
from math import sqrt
from statistics import mean
random.seed(10)

sns.set(style="white", color_codes=True)
%matplotlib inline

### Defining Distances

In [None]:
def EuclideanDistance(x,y):
    return sqrt(((np.array(x) -np.array(y))**2).sum())

def ManhattanDistance(x,y):
    return abs(np.array(x) - np.array(y)).sum()

### Defining Linkages

In [None]:
def SingleLinkage(x,y):
    dist = []

    for i in x:
        for j in y:
            d = EuclideanDistance(i,j) 
            dist.append(d)
    return min(dist)

def CompleteLinkage(x,y):
    dist = []

    for i in x:
        for j in y:
            d = EuclideanDistance(i,j) 
            dist.append(d)
    return max(dist)

def AverageLinkage(x,y):
    dist = []

    for i in x:
        for j in y:
            d = EuclideanDistance(i,j) 
            dist.append(d)
    return mean(dist)

### Loading data

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Assg-1/cancer.csv')
df.drop('id',axis=1,inplace=True)
df.drop('Unnamed: 32',axis=1,inplace=True)
df.drop('diagnosis',axis=1,inplace=True)


### Initializing Clusters and points with nested lists for single linkage purpose

In [None]:
clusters = [[i] for i in df.index] 
points = [ [list(i)] for i in df.values]

### Plotting Dendrogram for single linkage

In [None]:
plt.figure(figsize=(20, 15))  
plt.title("Dendrogram")  
dendrogram = sch.dendrogram(sch.linkage(df.to_numpy(), method='single'))
plt.axhline(12,color ='red' , linestyle = '--')
plt.show()

In [None]:
n = 2#number of clusters

In [None]:
#Applying Clustering 

while len(clusters) > n:

    #Distance Matrix Calculation
    dm = np.zeros((len(clusters),len(clusters)))
    
    for i in range(len(clusters)):
        for j in range(len(clusters)):
            if i == j:
                dm[i][j] = 1000 
            else:
                dm[i][j] = SingleLinkage(points[i],points[j])
               
            

    m = dm.min()
    min_pt= np.where(dm == m) 
    clus_to_merge = [clusters[i] for i in set(min_pt[0])]
    new_cluster = [ point for cluster in clus_to_merge for point in cluster]
    pt_to_merge = [points[i] for i in set(min_pt[0])]
    new_point = [point for points in pt_to_merge for point in points]
    clusters.append(new_cluster)
    points.append(new_point)
    for i in clus_to_merge:
        clusters.remove(i)   
    for i in pt_to_merge:
        points.remove(i)

   
    print(len(clusters),clusters)

In [None]:
#Labeling of data points
label = np.zeros((len(df.index)),dtype=int)
category= 1

for cluster in clusters:
    for point in cluster:
        label[point] = category
    category+=1

### Number of points in the cluster

In [None]:
from collections import Counter, defaultdict
print("Number of points in the cluster")
print(Counter(label))

In [None]:
plt.figure(figsize=(10,7))
plt.title("Single Linkage")  
plt.scatter(df['radius_mean'],df['texture_mean'],c=label)
plt.show()

### Re-initializing Clusters and points with nested lists for complete linkage purposes

In [None]:
clusters = [[i] for i in df.index] 
points = [ [list(i)] for i in df.values]

### Plotting Dendrogram for complete linkage

In [None]:
plt.figure(figsize=(10, 8))  
plt.title("Dendrogram")  
dendrogram = sch.dendrogram(sch.linkage(df.to_numpy(), method='complete'))
plt.axhline(130,color ='red' , linestyle = '--')
plt.show()

In [None]:
n=2#number of clusters

In [None]:
#Application of clustering

while len(clusters) > n:

    #Calculation of Distance Matrix
    dm = np.zeros((len(clusters),len(clusters)))
    
    for i in range(len(clusters)):
        for j in range(len(clusters)):
            if i == j:
                dm[i][j] = 1000 
            else:
                dm[i][j] = CompleteLinkage(points[i],points[j])
               
            

    m = dm.min()
    min_pt= np.where(dm == m) 
    clus_to_merge = [clusters[i] for i in set(min_pt[0])]
    new_cluster = [ point for cluster in clus_to_merge for point in cluster]
    pt_to_merge = [points[i] for i in set(min_pt[0])]
    new_point = [point for points in pt_to_merge for point in points]
    clusters.append(new_cluster)
    points.append(new_point)
    for i in clus_to_merge:
        clusters.remove(i)   
    for i in pt_to_merge:
        points.remove(i)

   
    print(len(clusters),clusters)

In [None]:
#Labeling of data points
label = np.zeros((len(df.index)),dtype=int)
category= 1

for cluster in clusters:
    for point in cluster:
        label[point] = category
    category+=1

### Number of points in the cluster

In [None]:
from collections import Counter, defaultdict
print("Number of points in the cluster")
print(Counter(label))

Number of points in the cluster
Counter({1: 569})


In [None]:
plt.figure(figsize=(10,7))
plt.title("Complete Linkage")  
plt.scatter(df['radius_mean'],df['texture_mean'],c=label)
plt.show()

### Re-initializing Clusters and points with nested lists for average linkage purpose

In [None]:
clusters = [[i] for i in df.index] 
points = [ [list(i)] for i in df.values]

### Plotting Dendrogram for average linkage

In [None]:
plt.figure(figsize=(10, 8))  
plt.title("Dendrogram")  
dendrogram = sch.dendrogram(sch.linkage(df.to_numpy(), method='average'))
plt.axhline(35,color ='red' , linestyle = '--')
plt.show()

In [None]:
n=2 #number of clusters

In [None]:
#Application of clustering

while len(clusters) > n:

    #Calculation of Distance Matrix
    dm = np.zeros((len(clusters),len(clusters)))
    
    for i in range(len(clusters)):
        for j in range(len(clusters)):
            if i == j:
                dm[i][j] = 1000 
            else:
                dm[i][j] = AverageLinkage(points[i],points[j])
               
            

    m = dm.min()
    min_pt= np.where(dm == m)
    clus_to_merge = [clusters[i] for i in set(min_pt[0])]
    new_cluster = [ point for cluster in clus_to_merge for point in cluster]
    pt_to_merge = [points[i] for i in set(min_pt[0])]
    new_point = [point for points in pt_to_merge for point in points]
    clusters.append(new_cluster)
    points.append(new_point)
    for i in clus_to_merge:
        clusters.remove(i)   
    for i in pt_to_merge:
        points.remove(i)

   
    print(len(clusters),clusters)

In [None]:
#Labeling of data points
label = np.zeros((len(df.index)),dtype=int)
category= 1

for cluster in clusters:
    for point in cluster:
        label[point] = category
    category+=1

In [None]:
plt.figure(figsize=(10,7))
plt.title("Average Linkage")  
plt.scatter(df['radius_mean'],df['texture_mean'],c=label)
plt.show()

### Number of points in the cluster

In [None]:
from collections import Counter, defaultdict
print("Number of points in the cluster")
print(Counter(label))

Number of points in the cluster
Counter({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 1, 62: 1, 63: 1, 64: 1, 65: 1, 66: 1, 67: 1, 68: 1, 69: 1, 70: 1, 71: 1, 72: 1, 73: 1, 74: 1, 75: 1, 76: 1, 77: 1, 78: 1, 79: 1, 80: 1, 81: 1, 82: 1, 83: 1, 84: 1, 85: 1, 86: 1, 87: 1, 88: 1, 89: 1, 90: 1, 91: 1, 92: 1, 93: 1, 94: 1, 95: 1, 96: 1, 97: 1, 98: 1, 99: 1, 100: 1, 101: 1, 102: 1, 103: 1, 104: 1, 105: 1, 106: 1, 107: 1, 108: 1, 109: 1, 110: 1, 111: 1, 112: 1, 113: 1, 114: 1, 115: 1, 116: 1, 117: 1, 118: 1, 119: 1, 120: 1, 121: 1, 122: 1, 123: 1, 124: 1, 125: 1, 126: 1, 127: 1, 128: 1, 129: 1, 130: 1, 131: 1, 132: 1, 133: 1, 134