## Minor Project
### Crowd Classification using Deep Learning, Computer Vision and Decision Tree
### Guide: Dr. Sunil Kumar
### Students: Ananya Agrawal (199303010) & Hardik Srivastava (199303069)

## Part 1: Training a CNN Model to create crowd heatmap

Handling Imports

In [14]:
import keras
import os
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D, Reshape, Concatenate
from keras.optimizers import Adam
import tensorflow as tf
import sys
import cv2
import keras.backend as K
import math
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

Setting up path variables

In [15]:
dataset = "B"
train_path = './data/formatted_trainval/shanghaitech_part_' + dataset + '_patches_9/train/'
train_den_path = './data/formatted_trainval/shanghaitech_part_' + dataset + '_patches_9/train_den/'
val_path = './data/formatted_trainval/shanghaitech_part_' + dataset + '_patches_9/val/'
val_den_path = './data/formatted_trainval/shanghaitech_part_' + dataset + '_patches_9/val_den/'
test_path = './data/original/shanghaitech/part_' + dataset + '_final/test_data/images/'
test_den_path = './data/original/shanghaitech/part_' + dataset + '_final/test_data/ground_truth_csv/'

Load Training Data

In [16]:
training_images = os.listdir(train_path)
num_training_images = len(training_images)

train_data = []
for i in range(num_training_images):
    if i % 100 == 0:
        print(i, '/', num_training_images, "Loaded")
    name = training_images[i]
    img = cv2.imread(train_path + name, 0)
    img = np.array(img)
    img = (img - 127.5) / 128
    den = np.loadtxt(open(train_den_path + name[:-4] + '.csv'), delimiter = ",")
    den_quarter = np.zeros((int(den.shape[0] / 4), int(den.shape[1] / 4)))
    for i in range(len(den_quarter)):
        for j in range(len(den_quarter[0])):
            for p in range(4):
                for q in range(4):
                    den_quarter[i][j] += den[i * 4 + p][j * 4 + q]
    train_data.append([img, den_quarter])
print('Training Data Loaded')

0 / 3249 Loaded
100 / 3249 Loaded
200 / 3249 Loaded
300 / 3249 Loaded
400 / 3249 Loaded
500 / 3249 Loaded
600 / 3249 Loaded
700 / 3249 Loaded
800 / 3249 Loaded
900 / 3249 Loaded
1000 / 3249 Loaded
1100 / 3249 Loaded
1200 / 3249 Loaded
1300 / 3249 Loaded
1400 / 3249 Loaded
1500 / 3249 Loaded
1600 / 3249 Loaded
1700 / 3249 Loaded
1800 / 3249 Loaded
1900 / 3249 Loaded
2000 / 3249 Loaded
2100 / 3249 Loaded
2200 / 3249 Loaded
2300 / 3249 Loaded
2400 / 3249 Loaded
2500 / 3249 Loaded
2600 / 3249 Loaded
2700 / 3249 Loaded
2800 / 3249 Loaded
2900 / 3249 Loaded
3000 / 3249 Loaded
3100 / 3249 Loaded
3200 / 3249 Loaded
Training Data Loaded


Loading Test Data

In [17]:
test_images = os.listdir(test_path)
num_test_images = len(test_images)

test_data = []
for i in range(num_test_images):
    if i % 50 == 0:
        print(i, '/', num_test_images)
    name = 'IMG_' + str(i + 1) + '.jpg'
    img = cv2.imread(test_path + name, 0)
    img = np.array(img)
    img = (img - 127.5) / 128
    den = np.loadtxt(open(test_den_path + name[:-4] + '.csv'), delimiter = ",")
    den_quarter = np.zeros((int(den.shape[0] / 4), int(den.shape[1] / 4)))
    for i in range(len(den_quarter)):
        for j in range(len(den_quarter[0])):
            for p in range(4):
                for q in range(4):
                    den_quarter[i][j] += den[i * 4 + p][j * 4 + q]
    test_data.append([img, den_quarter])
        
print('Test data Loaded')

0 / 316
50 / 316
100 / 316
150 / 316
200 / 316
250 / 316
300 / 316
Test data Loaded


Shuffling data around

In [18]:
np.random.shuffle(train_data)

Generate X, Y training and testing data lists

In [19]:
X_train = []
y_train = []

for d in train_data:
    X_train.append(np.reshape(d[0], (d[0].shape[0], d[0].shape[1], 1)))
    y_train.append(np.reshape(d[1], (d[1].shape[0], d[1].shape[1], 1)))

X_train = np.array(X_train)
y_train = np.array(y_train)

X_test = []
y_test = []

for d in test_data:
    X_test.append(np.reshape(d[0], (d[0].shape[0], d[0].shape[1], 1)))
    y_test.append(np.reshape(d[1], (d[1].shape[0], d[1].shape[1], 1)))
X_test = np.array(X_test)
y_test = np.array(y_test)

Setup helper methods to calculate Mean Absolute Error, Mean Squared Error

In [21]:
def calc_mae(y_real, y_pred):
    return abs(K.sum(y_real) - K.sum(y_pred))

def calc_mse(y_real, y_pred):
    return ((K.sum(y_real) - K.sum(y_pred)) * (K.sum(y_real) - K.sum(y_pred)))

Setting up Model Architecture

In [22]:
inputs = Input(shape = (None, None, 1))
conv_m = Conv2D(20, (7, 7), padding = 'same', activation = 'relu')(inputs)
conv_m = MaxPooling2D(pool_size = (2, 2))(conv_m)
conv_m = (conv_m)
conv_m = Conv2D(40, (5, 5), padding = 'same', activation = 'relu')(conv_m)
conv_m = MaxPooling2D(pool_size = (2, 2))(conv_m)
conv_m = Conv2D(20, (5, 5), padding = 'same', activation = 'relu')(conv_m)
conv_m = Conv2D(10, (5, 5), padding = 'same', activation = 'relu')(conv_m)
#conv_m = Conv2D(1, (1, 1), padding = 'same', activation = 'relu')(conv_m)

conv_s = Conv2D(24, (5, 5), padding = 'same', activation = 'relu')(inputs)
conv_s = MaxPooling2D(pool_size = (2, 2))(conv_s)
conv_s = (conv_s)
conv_s = Conv2D(48, (3, 3), padding = 'same', activation = 'relu')(conv_s)
conv_s = MaxPooling2D(pool_size = (2, 2))(conv_s)
conv_s = Conv2D(24, (3, 3), padding = 'same', activation = 'relu')(conv_s)
conv_s = Conv2D(12, (3, 3), padding = 'same', activation = 'relu')(conv_s)
#conv_s = Conv2D(1, (1, 1), padding = 'same', activation = 'relu')(conv_s)

conv_l = Conv2D(16, (9, 9), padding = 'same', activation = 'relu')(inputs)
conv_l = MaxPooling2D(pool_size = (2, 2))(conv_l)
conv_l = (conv_l)
conv_l = Conv2D(32, (7, 7), padding = 'same', activation = 'relu')(conv_l)
conv_l = MaxPooling2D(pool_size = (2, 2))(conv_l)
conv_l = Conv2D(16, (7, 7), padding = 'same', activation = 'relu')(conv_l)
conv_l = Conv2D(8, (7, 7), padding = 'same', activation = 'relu')(conv_l)
#conv_l = Conv2D(1, (1, 1), padding = 'same', activation = 'relu')(conv_l)

conv_merge = Concatenate(axis = 3)([conv_m, conv_s, conv_l])
result = Conv2D(1, (1, 1), padding = 'same')(conv_merge)

model = Model(inputs = inputs, outputs = result)

adam = Adam(lr = 1e-4)
model.compile(loss = 'mse', optimizer = adam, metrics = [calc_mae, calc_mse])

2022-05-20 06:11:53.605409: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-05-20 06:11:53.605599: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-05-20 06:11:53.606000: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (bigfloppa): /proc/driver/nvidia/version does not exist
2022-05-20 06:11:53.607157: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super(Adam, self).__init__(name, **kwargs)


Traing model (while occasionally saving model and weights to disk for checkpointing)

In [23]:
best_mae = 10000
best_mae_mse = 10000
best_mse = 10000
best_mse_mae = 10000

for i in range(200):
    model.fit(X_train, y_train, epochs = 3, batch_size = 1, validation_split = 0.2)

    score = model.evaluate(X_test, y_test, batch_size = 1)
    score[2] = math.sqrt(score[2])
    print(score)
    if score[1] < best_mae:
        best_mae = score[1]
        best_mae_mse = score[2]
        json_string = model.to_json()
        open('model.json', 'w').write(json_string)
        model.save_weights('weights.h5')
    if score[2] < best_mse:
        best_mse = score[2]
        best_mse_mae = score[1]

    print('Best mae: ', best_mae, '(', best_mae_mse, ')')
    print('Best mse: ', '(', best_mse_mae, ')', best_mse)

Epoch 1/3
 556/2599 [=====>........................] - ETA: 1:04 - loss: 2.3923e-04 - calc_mae: 7.9498 - calc_mse: 194.0198

KeyboardInterrupt: 

==================

## Part 2: Generating CSV dataset for training a Decision Tree Classifier for classifying images into Sparse, Medium, Dense

Setting source data directory, along with classes and other variables

In [24]:
source_dir = "./data_subset"
classes = ["dense", "medium", "sparse"]

Handling imports

In [25]:
import numpy as np
import pandas as pd
from mat4py import loadmat
from sklearn.cluster import KMeans
import os
import math

Defining methods to handle clustering logic

In [26]:
K_CONSTANT = 5

In [27]:
# imgnum, ab, ac, ad, ae, bc, bd, be, cd, ce, de, target
IMG_NUM = []
SOURCE_DATASET = []
AB = []
AC = []
AD = []
AE = []
BC = []
BD = []
BE = []
CD = []
CE = []
DE = []
TARGET = []

In [28]:
# reads mat file and returns formatted list of points in it
def read_pts(file):
    data = loadmat(file)
    pts = []
    for loc in data['image_info']['location']:
        pts.append((int(loc[0]), int(loc[1])))
    return pts

# creates clusters of pts list
def make_clusters(pts):
    est = KMeans(K_CONSTANT)
    est.fit(pts)
    y_kmeans = est.predict(pts)
    # cluster list
    cluster_list = [[], [], [], [], []]
    for index in range(len(pts)):
        cluster_list[y_kmeans[index]].append(pts[index])
    return cluster_list

# returns dist between 2 points
def pt_dist(p1, p2):
    xx = p1[0] - p2[0]
    yy = p1[1] - p2[1]
    return math.sqrt(xx*xx + yy*yy)

# returns min dist between cluster 1 and cluster 2
def min_dist(c1, c2):
    min = 9999999
    for p1 in c1:
        for p2 in c2:
            d = pt_dist(p1, p2)
            if min > d:
                min = d
    return min

def handle_class(classname, source_dataset):
    files = os.listdir(source_dir + "/" + classname)
    for file in files:
        if file.endswith(".mat"):
            imgname = str(file)[7:-4]
            IMG_NUM.append(imgname)
            SOURCE_DATASET.append(source_dataset)
            TARGET.append(classname)
            all_clusters = make_clusters(read_pts(source_dir + "/" + classname + "/" + file))
            AB.append(min_dist(all_clusters[0], all_clusters[1]))
            AC.append(min_dist(all_clusters[0], all_clusters[2]))
            AD.append(min_dist(all_clusters[0], all_clusters[3]))
            AE.append(min_dist(all_clusters[0], all_clusters[4]))
            BC.append(min_dist(all_clusters[1], all_clusters[2]))
            BD.append(min_dist(all_clusters[1], all_clusters[3]))
            BE.append(min_dist(all_clusters[1], all_clusters[4]))
            CD.append(min_dist(all_clusters[2], all_clusters[3]))
            CE.append(min_dist(all_clusters[2], all_clusters[4]))
            DE.append(min_dist(all_clusters[3], all_clusters[4]))


handle_class(classes[0], "A")
handle_class(classes[1], "B")
handle_class(classes[2], "B")

dataset = pd.DataFrame()
dataset["IMG_NUM"] = IMG_NUM
dataset["SOURCE_DATASET"] = SOURCE_DATASET
dataset["AB"] = AB
dataset["AC"] = AC
dataset["AD"] = AD
dataset["AE"] = AE
dataset["BC"] = BC
dataset["BD"] = BD
dataset["BE"] = BE
dataset["CD"] = CD
dataset["CE"] = CE
dataset["DE"] = DE
dataset["TARGET"] = TARGET
display(dataset)

Unnamed: 0,IMG_NUM,SOURCE_DATASET,AB,AC,AD,AE,BC,BD,BE,CD,CE,DE,TARGET
0,28,A,105.095195,94.810337,9.055385,11.661904,138.311243,7.211103,8.062258,161.251357,3.162278,7.615773,dense
1,42,A,339.676317,62.801274,152.947703,59.3043,42.059482,57.45433,336.154726,84.504438,31.622777,385.149322,dense
2,341,A,307.483333,71.168813,52.239832,322.076078,557.288076,43.680659,542.86094,306.778422,234.326695,486.864458,dense
3,378,A,323.36048,32.015621,590.37107,294.183616,37.48333,21.095023,493.852205,302.828334,243.895469,728.715308,dense
4,14,A,215.520301,29.732137,33.136083,40.607881,31.622777,368.98916,35.22783,26.172505,41.629317,287.14108,dense
5,21,A,63.600314,41.868843,285.91782,69.46222,31.575307,23.769729,42.544095,347.416177,295.854694,38.013156,dense
6,275,A,464.474972,866.810821,705.01773,223.18154,249.777901,15.811388,25.0,273.248971,494.417839,260.37665,dense
7,395,A,59.908263,72.138755,377.690349,444.567205,171.283391,41.303753,494.725176,521.92145,272.442655,779.133493,dense
8,19,A,170.839106,515.139787,331.29594,5.0,198.365824,4.472136,5.0,18.11077,348.0,166.003012,dense
9,43,A,252.097203,32.388269,22.847319,15.231546,516.468779,431.885401,11.045361,44.407207,278.145645,216.92395,dense


In [29]:
dataset.to_csv("exported.csv")

=============================

## Part 3: Using the CSV dataset for training a Decision Tree Classifier for classifying images into Sparse, Medium, Dense

Handling Imports

In [30]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

Loading Data from CSV

In [40]:
cluster_data = pd.read_csv("exported.csv", index_col=0)
cluster_data.head()

Unnamed: 0,IMG_NUM,SOURCE_DATASET,AB,AC,AD,AE,BC,BD,BE,CD,CE,DE,TARGET
0,28,A,105.095195,94.810337,9.055385,11.661904,138.311243,7.211103,8.062258,161.251357,3.162278,7.615773,dense
1,42,A,339.676317,62.801274,152.947703,59.3043,42.059482,57.45433,336.154726,84.504438,31.622777,385.149322,dense
2,341,A,307.483333,71.168813,52.239832,322.076078,557.288076,43.680659,542.86094,306.778422,234.326695,486.864458,dense
3,378,A,323.36048,32.015621,590.37107,294.183616,37.48333,21.095023,493.852205,302.828334,243.895469,728.715308,dense
4,14,A,215.520301,29.732137,33.136083,40.607881,31.622777,368.98916,35.22783,26.172505,41.629317,287.14108,dense


Preparing X and Y slices of data, where X is our source data and Y contains the Target Class

In [41]:
X = cluster_data.values[:, 2:-1]
Y = cluster_data.values[:, -1]

Splitting data into Train and Test

In [77]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

Setting up functions to create decision tree

In [78]:
# Function to perform training with giniIndex.
def train_using_gini(X_train, X_test, y_train):
    # Creating the classifier object
    clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth=None)
    # Performing training
    clf_gini.fit(X_train, y_train)
    return clf_gini

# Function to make predictions
def prediction(X_test, clf_object):  
    # Predicton on test with giniIndex
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred

# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):      
    print("Confusion Matrix: ", confusion_matrix(y_test, y_pred))
    print ("Accuracy : ", accuracy_score(y_test,y_pred) * 100)
    print("Report : ", classification_report(y_test, y_pred))

In [79]:
clf_gini = train_using_gini(X_train, X_test, y_train)

y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)

Predicted values:
['medium' 'medium' 'medium' 'medium' 'medium' 'medium' 'sparse' 'sparse'
 'sparse' 'dense' 'medium' 'medium']
Confusion Matrix:  [[0 2 0]
 [1 2 0]
 [0 4 3]]
Accuracy :  41.66666666666667
Report :                precision    recall  f1-score   support

       dense       0.00      0.00      0.00         2
      medium       0.25      0.67      0.36         3
      sparse       1.00      0.43      0.60         7

    accuracy                           0.42        12
   macro avg       0.42      0.37      0.32        12
weighted avg       0.65      0.42      0.44        12



======================================

Handling Imports

In [1]:
import numpy as np
import cv2
import tensorflow as tf
from tensorflow import keras
import os
import random
import math
import sys
from pyheatmap.heatmap import HeatMap

2022-05-20 03:57:36.530766: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-05-20 03:57:36.538928: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-20 03:57:36.538945: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Loading Model and Weights from disk

In [2]:
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = keras.models.model_from_json(loaded_model_json)

model.load_weights("weights.h5")

print("Loaded model from disk successfully")

Loaded model from disk successfully


2022-05-20 03:57:41.206210: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-05-20 03:57:41.206316: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-05-20 03:57:41.206337: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (bigfloppa): /proc/driver/nvidia/version does not exist
2022-05-20 03:57:41.206566: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Setting up paths of image and other things

In [3]:
input_image = "/home/hardik/Projects/Minor-Project/new/IMG_233.jpg"

Loading and preprocessing input image to correspond to input layer size

In [4]:
img = np.array(cv2.imread(input_image, 0))
img = (img - 127.5) / 128
inputs = np.reshape(img, [1, 768, 1024, 1])

Inferencing through model

In [5]:
outputs = model.predict(inputs)
predicted_count = np.sum(outputs)



Printing predicted count

In [6]:
print("Predicted Count:", predicted_count)

Predicted Count: 27.358072


Generating density numpy array

In [7]:
y_p_den = np.reshape(outputs, (outputs.shape[1], outputs.shape[2]))

Generating Heatmap

In [8]:
den_resized = np.zeros((y_p_den.shape[0] * 4, y_p_den.shape[1] * 4))
for i in range(den_resized.shape[0]):
    for j in range(den_resized.shape[1]):
        den_resized[i][j] = y_p_den[int(i / 4)][int(j / 4)] / 16
den = den_resized
count = np.sum(den)
den = den * 10 / np.max(den)

crowd_img = cv2.imread(input_image, 1)

data = []
pts = []

for j in range(len(den)):
    for i in range(len(den[0])):
        for k in range(int(den[j][i])):
            data.append([i + 1, j + 1])
            pts.append((i + 1, j + 1))

hm = HeatMap(data, base=input_image)
hm.heatmap(save_as = 'output.png')
print("Heatmap Generated to output.png")

Heatmap Generated to output.png


### Step 2: Load data and create Decision Tree

Classification (Target Label Generation) using Decision Tree

Handling Imports

In [27]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

Loading Data from CSV

In [28]:
cluster_data = pd.read_csv("exported.csv", index_col=0)
cluster_data.head()

Unnamed: 0,IMG_NUM,SOURCE_DATASET,AB,AC,AD,AE,BC,BD,BE,CD,CE,DE,TARGET
0,28,A,226.2145,6.403124,174.287119,12.041595,86.815897,9.219544,6.324555,9.486833,7.071068,8.485281,dense
1,42,A,339.676317,62.801274,152.947703,70.710678,42.059482,57.45433,354.407957,84.504438,49.091751,385.149322,dense
2,341,A,71.168813,322.076078,307.483333,52.239832,234.326695,557.288076,306.778422,542.86094,486.864458,43.680659,dense
3,378,A,302.828334,21.633308,243.895469,37.48333,550.905618,728.715308,21.095023,294.183616,283.44488,493.852205,dense
4,14,A,26.172505,41.10961,287.14108,368.98916,29.732137,41.629317,31.622777,40.607881,215.520301,25.495098,dense


Preparing X and Y slices of data, where X is our source data and Y contains the Target Class

In [29]:
X = cluster_data.values[:, 2:-1]
Y = cluster_data.values[:, -1]

Splitting data into Train and Test

Setting up functions to create decision tree

In [34]:
# Function to perform training with giniIndex.
def train_using_gini(X_train, y_train):
    # Creating the classifier object
    clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth=None, min_samples_leaf=5)
    # Performing training
    clf_gini.fit(X_train, y_train)
    return clf_gini

# Function to make predictions
def prediction(X_test, clf_object):  
    # Predicton on test with giniIndex
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred

In [35]:
clf_gini = train_using_gini(X, Y)


# y_pred_gini = prediction(X_test, clf_gini)
# cal_accuracy(y_test, y_pred_gini)

In [24]:
import numpy as np
import pandas as pd
from mat4py import loadmat
from sklearn.cluster import KMeans
import os
import math

In [25]:

# creates clusters of pts list
def make_clusters(pts):
    est = KMeans(5)
    est.fit(pts)
    y_kmeans = est.predict(pts)
    # cluster list
    cluster_list = [[], [], [], [], []]
    for index in range(len(pts)):
        cluster_list[y_kmeans[index]].append(pts[index])
    return cluster_list

# returns dist between 2 points
def pt_dist(p1, p2):
    xx = p1[0] - p2[0]
    yy = p1[1] - p2[1]
    return math.sqrt(xx*xx + yy*yy)

# returns min dist between cluster 1 and cluster 2
def min_dist(c1, c2):
    min = 9999999
    for p1 in c1:
        for p2 in c2:
            d = pt_dist(p1, p2)
            if min > d:
                min = d
    return min

all_clusters = make_clusters(pts)

ab = min_dist(all_clusters[0], all_clusters[1])
ac = min_dist(all_clusters[0], all_clusters[2])
ad = min_dist(all_clusters[0], all_clusters[3])
ae = min_dist(all_clusters[0], all_clusters[4])
bc = min_dist(all_clusters[1], all_clusters[2])
bd = min_dist(all_clusters[1], all_clusters[3])
be = min_dist(all_clusters[1], all_clusters[4])
cd = min_dist(all_clusters[2], all_clusters[3])
ce = min_dist(all_clusters[2], all_clusters[4])
de = min_dist(all_clusters[3], all_clusters[4])

In [26]:
check_entry = [ab, ac, ad, ae, bc, bd, be, cd, ce, ce]
print(check_entry)

[563.4696087634186, 81.00617260431454, 301.24076749337894, 1.0, 282.71894170713074, 1.0, 373.5906851087163, 1.0, 1.0, 1.0]


In [36]:
y_pred_gini = prediction([check_entry], clf_gini)

Predicted values:
['dense']
