# KNN

In [1]:
import pandas as pd
from math import sqrt

In [2]:
data = pd.DataFrame({
    'Height': [158,158,158,160,160,163,163,160,163,165,165,165,168,168,168,170,170,170], 
    'Weight': [58,59,63,59,60,60,61,64,64,61,62,65,62,63,66,63,64,68], 
    'Size': ['M','M','M','M','M','M','M','L','L','L','L','L','L','L','L','L','L','L']}
)
data

Unnamed: 0,Height,Weight,Size
0,158,58,M
1,158,59,M
2,158,63,M
3,160,59,M
4,160,60,M
5,163,60,M
6,163,61,M
7,160,64,L
8,163,64,L
9,165,61,L


In [3]:
def dist(test_h, test_w, train_h, train_w):
    return sqrt((test_h-train_h)**2 + (test_w-train_w)**2)

## Without Standardization

In [4]:
test_data = [161, 61]
distances = []
for i in range(len(data['Height'])):
    height = data['Height'][i]
    weight = data['Weight'][i]
    distances.append(dist(test_data[0],test_data[1], height, weight))

data['Distance'] = distances

sorted_data = data.sort_values('Distance')
sorted_data = sorted_data[:5]
sorted_data

Unnamed: 0,Height,Weight,Size,Distance
4,160,60,M,1.414214
6,163,61,M,2.0
3,160,59,M,2.236068
5,163,60,M,2.236068
7,160,64,L,3.162278


In [5]:
predicted_size = sorted_data.max()['Size']

print(f'{test_data[0]}cm height and {test_data[1]}kg weight person belongs to "{predicted_size}" size.')

161cm height and 61kg weight person belongs to "M" size.


## With Standardization

In [6]:
def standard(x, min, max):
    return (x - min) / (max - min)

In [7]:
max_h = max(data['Height'])
min_h = min(data['Height'])

max_w = max(data['Weight'])
min_w = min(data['Weight'])

print(f'Min Height: {min_h}')
print(f'Max Height: {max_h}\n')
print(f'Min Weight: {min_w}')
print(f'Max Weight: {max_w}')

Min Height: 158
Max Height: 170

Min Weight: 58
Max Weight: 68


In [8]:
new_heights = []
new_weights = []
for i in range(len(data['Height'])):
    height = data['Height'][i]
    weight = data['Weight'][i]

    new_heights.append(standard(height, min_h, max_h))
    new_weights.append(standard(weight, min_w, max_w))

data['Height'] = new_heights
data['Weight'] = new_weights
data

Unnamed: 0,Height,Weight,Size,Distance
0,0.0,0.0,M,4.242641
1,0.0,0.1,M,3.605551
2,0.0,0.5,M,3.605551
3,0.166667,0.1,M,2.236068
4,0.166667,0.2,M,1.414214
5,0.416667,0.2,M,2.236068
6,0.416667,0.3,M,2.0
7,0.166667,0.6,L,3.162278
8,0.416667,0.6,L,3.605551
9,0.583333,0.3,L,4.0


In [9]:
test_data = [161, 61]
# standardized the testing data height
test_data[0] = standard(test_data[0], min_h, max_h)
# standardize the testing data weight
test_data[1] = standard(test_data[1], min_w, max_w)
test_data

[0.25, 0.3]

In [10]:
distances = []
for i in range(len(data['Height'])):
    height = data['Height'][i]
    weight = data['Weight'][i]
    distances.append(dist(test_data[0],test_data[1], height, weight))

data['Distance'] = distances
data

Unnamed: 0,Height,Weight,Size,Distance
0,0.0,0.0,M,0.390512
1,0.0,0.1,M,0.320156
2,0.0,0.5,M,0.320156
3,0.166667,0.1,M,0.216667
4,0.166667,0.2,M,0.130171
5,0.416667,0.2,M,0.194365
6,0.416667,0.3,M,0.166667
7,0.166667,0.6,L,0.311359
8,0.416667,0.6,L,0.343188
9,0.583333,0.3,L,0.333333


In [11]:
sorted_data = data.sort_values('Distance')
sorted_data = sorted_data[:5]
sorted_data

Unnamed: 0,Height,Weight,Size,Distance
4,0.166667,0.2,M,0.130171
6,0.416667,0.3,M,0.166667
5,0.416667,0.2,M,0.194365
3,0.166667,0.1,M,0.216667
7,0.166667,0.6,L,0.311359


In [12]:
predicted_size = sorted_data.max()['Size']

print(f'After standardization, 161cm height and 61kg weight person belongs to "{predicted_size}" size.')

After standardization, 161cm height and 61kg weight person belongs to "M" size.
