In [1]:
import pandas as pd
import numpy as np
import random
import math
data_frame = pd.read_csv('overdoses.csv', delimiter = ',')

data_frame['Population'] = data_frame['Population'].str.replace(',', '')
data_frame['Deaths'] = data_frame['Deaths'].str.replace(',', '')
data_frame[data_frame.columns[1:3]] = data_frame[data_frame.columns[1:3]].astype(float)

population_deaths_data = data_frame[['Population', 'Deaths']]


In [2]:
class K_Means:
    
    def __init__(self, k, tol=0.001, max_iter=300):
        self.k = k
        self.tol = tol
        self.max_iter = max_iter
    
    def find_centroids(self,data_frame_passed):
        self.centroids = {}
        self.classifications = {}
        shuffled_data_frame = data_frame_passed.sample(frac=1)
        for i in range(self.k):
            self.centroids[i] = shuffled_data_frame.iloc[0,i] 
            self.classifications[i] = []
        print(self.centroids)
    
    def classify(self,point):
        distance =[]
        class_to_which_it_belongs = 0
        for i in range(self.k):
            centroid_value = self.centroids.get(i)
            distance_calc = np.linalg.norm(point-centroid_value)
            if (i==0):
                min_distance = distance_calc
            elif (distance_calc < min_distance):
                min_distance = distance_calc
                class_to_which_it_belongs = i
        return class_to_which_it_belongs
             
    
    def find_centroid_again(self):
        for i in range(self.k):
            classified_list_points = self.classifications[i]
            centroid_val = sum(classified_list_points)/len(classified_list_points)
            self.centroids[i] = centroid_val
        print("New Centroids are")
        print(self.centroids)
        
    def classify_data_points_data_frame_first_time(self, data_frame_passed):    
        for row in range(len(data_frame_passed)):
            for col_num in data_frame_passed.columns:
                point = data_frame_passed.loc[row,col_num]
                classified_class_number = self.classify(point)
                self.classifications[classified_class_number].append(point)
        
    def clear_previous_classified_values_in_map(self):
         for k in range(self.k):
            self.classifications[k] = []
                        
    def initialise_new_ds(self):
        new_classified_points = {}
        for i in range(self.k):
            new_classified_points[i] = []
        return new_classified_points
    
    def fit(self,data_frame_passed):
        self.find_centroids(data_frame_passed)
        self.classify_data_points_data_frame_first_time(data_frame_passed) 
    
        for iter_number in range(500): 
            self.find_centroid_again()
            new_data_structure = self.initialise_new_ds()
            for class_number in range(self.k):
                list_of_points = self.classifications[class_number] 
                for index in range(len(list_of_points)):
                    class_to_which_it_belongs = self.classify(list_of_points[index])
                    new_data_structure[class_to_which_it_belongs].append(list_of_points[index])
            self.classifications = new_data_structure
            new_data_structure= None
            

                

In [3]:
k_means_object = K_Means(2)
k_means_object.fit(population_deaths_data)

{0: 1868516.0, 1: 125.0}
New Centroids are
{0: 7069180.704545454, 1: 80096.39285714286}
New Centroids are
{0: 9650870.896551725, 1: 502170.323943662}
New Centroids are
{0: 11357691.681818182, 1: 841796.5641025641}
New Centroids are
{0: 13027339.529411765, 1: 1133308.156626506}
New Centroids are
{0: 15675586.5, 1: 1447980.8068181819}
New Centroids are
{0: 16349693.909090908, 1: 1524524.8988764044}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729

New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.

New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.4, 1: 1606467.2777777778}
New Centroids are
{0: 17094729.