Refression dengan KNN (K Nearest Neighbours)

In [2]:
import pandas as pd
sensus = {'tinggi': [158, 170, 183, 191, 155, 163, 180, 158, 170], 
         'jk': ['pria', 'pria', 'pria', 'pria', 'wanita', 'wanita', 'wanita', 'wanita', 'wanita'],
         'berat': [64, 86, 84, 80, 49, 59, 67, 54, 67]}
sensus_df = pd.DataFrame(sensus)
sensus_df

Unnamed: 0,tinggi,jk,berat
0,158,pria,64
1,170,pria,86
2,183,pria,84
3,191,pria,80
4,155,wanita,49
5,163,wanita,59
6,180,wanita,67
7,158,wanita,54
8,170,wanita,67


In [3]:
import numpy as np
x_train = np.array(sensus_df[['tinggi', 'jk']])
y_train = np.array(sensus_df['berat'])
print(f'x_train:\n{x_train}\n')
print(f'y_train: {y_train}')

x_train:
[[158 'pria']
 [170 'pria']
 [183 'pria']
 [191 'pria']
 [155 'wanita']
 [163 'wanita']
 [180 'wanita']
 [158 'wanita']
 [170 'wanita']]

y_train: [64 86 84 80 49 59 67 54 67]


In [8]:
x_train_transposed = np.transpose(x_train)
print(f'x_train:\n{x_train}\n')
print(f'x_train_transposed: {x_train_transposed}')

x_train:
[[158 'pria']
 [170 'pria']
 [183 'pria']
 [191 'pria']
 [155 'wanita']
 [163 'wanita']
 [180 'wanita']
 [158 'wanita']
 [170 'wanita']]

x_train_transposed: [[158 170 183 191 155 163 180 158 170]
 ['pria' 'pria' 'pria' 'pria' 'wanita' 'wanita' 'wanita' 'wanita'
  'wanita']]


In [9]:
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
jk_binarised = lb.fit_transform(x_train_transposed[1])
print(f'jk: {x_train_transposed[1]}\n')
print(f'jk_binarised:\n{jk_binarised}')

jk: ['pria' 'pria' 'pria' 'pria' 'wanita' 'wanita' 'wanita' 'wanita' 'wanita']

jk_binarised:
[[0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]]


In [10]:
jk_binarised = jk_binarised.flatten()
jk_binarised

array([0, 0, 0, 0, 1, 1, 1, 1, 1])

In [11]:
x_train_transposed[1] = jk_binarised
x_train = x_train_transposed.transpose()
print(f'x_train_transposed:\n{x_train_transposed}\n')
print(f'x_train:\n{x_train}')

x_train_transposed:
[[158 170 183 191 155 163 180 158 170]
 [0 0 0 0 1 1 1 1 1]]

x_train:
[[158 0]
 [170 0]
 [183 0]
 [191 0]
 [155 1]
 [163 1]
 [180 1]
 [158 1]
 [170 1]]


In [12]:
from sklearn.neighbors import KNeighborsRegressor
K = 3
model = KNeighborsRegressor(n_neighbors=K)
model.fit(x_train, y_train)

KNeighborsRegressor(n_neighbors=3)

In [13]:
x_new = np.array([[155, 1]])
x_new

array([[155,   1]])

In [14]:
y_pred = model.predict(x_new)
y_pred

array([55.66666667])

In [15]:
x_test = np.array([[168, 0], [180, 0], [160, 1], [169, 1]])
y_test = np.array([65, 96, 52, 67])
print(f'x_test:\n{x_test}\n')
print(f'y_test: {y_test}')

x_test:
[[168   0]
 [180   0]
 [160   1]
 [169   1]]

y_test: [65 96 52 67]


In [16]:
y_pred = model.predict(x_test)
y_pred

array([70.66666667, 79.        , 59.        , 70.66666667])

In [19]:
from sklearn.metrics import r2_score
r_squared = r2_score(y_test, y_pred)
print(f'R-square: {r_squared}')

R-square: 0.6290565226735438


In [20]:
from sklearn.metrics import mean_absolute_error
MAE = mean_absolute_error(y_test, y_pred)
print(f'MAE: {MAE}')

MAE: 8.333333333333336


In [21]:
from sklearn.metrics import mean_squared_error
MSE = mean_squared_error(y_test, y_pred)
print(f'MSE: {MSE}')

MSE: 95.8888888888889


In [29]:
from scipy.spatial.distance import euclidean

# tinggi dalam milimeter
x_train = np.array([[1700, 0], [1600, 1]])
x_new = np.array([[1640, 0]])
[euclidean(x_new[0], d) for d in x_train]

[60.0, 40.01249804748511]

In [30]:
#tinggi dalam meter
x_train = np.array([[1.7, 0], [1.6, 1]])
x_new = np.array([[1.64, 0]])
[euclidean(x_new[0], d) for d in x_train]

[0.06000000000000005, 1.0007996802557444]

In [31]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()

In [32]:
#tinggi dalam milimeter
x_train = np.array([[1700, 0], [1600, 1]])
x_train_scaled = ss.fit_transform(x_train)
print(f'x_train_scaled:\n{x_train_scaled}\n')

x_new = np.array([[1640, 0]])
x_new_scaled = ss.transform(x_new)
print(f'x_new_scaled: {x_new_scaled}\n')

jarak = [euclidean(x_new_scaled[0], d) for d in x_train_scaled]
print(f'jarak: {jarak}')

x_train_scaled:
[[ 1. -1.]
 [-1.  1.]]

x_new_scaled: [[-0.2 -1. ]]

jarak: [1.2, 2.154065922853802]


In [33]:
#tinggi dalam meter
x_train = np.array([[1.7, 0], [1.6, 1]])
x_train_scaled = ss.fit_transform(x_train)
print(f'x_train_scaled:\n{x_train_scaled}\n')

x_new = np.array([[1.64, 0]])
x_new_scaled = ss.transform(x_new)
print(f'x_new_scaled: {x_new_scaled}\n')

jarak = [euclidean(x_new_scaled[0], d) for d in x_train_scaled]
print(f'jarak: {jarak}')

x_train_scaled:
[[ 1. -1.]
 [-1.  1.]]

x_new_scaled: [[-0.2 -1. ]]

jarak: [1.2000000000000026, 2.1540659228538006]


In [34]:
#training set
x_train = np.array([[158, 0], [170, 0], [183, 0], [191, 0], [155, 1], [163, 1], [180, 1], [158, 1], [170, 1]])
y_train = np.array([64, 86, 84, 80, 49, 59, 67, 54, 67])

#test set
x_test = np.array([[168, 0], [180, 0], [160, 1], [169, 1]])
y_test = np.array([65, 96, 52, 67])

In [35]:
x_train_scaled = ss.fit_transform(x_train)
x_test_scaled = ss.fit_transform(x_test)
print(f'x_train_scaled:\n{x_train_scaled}\n')
print(f'x_test_scaled:\n{x_test_scaled}\n')

x_train_scaled:
[[-0.9908706  -1.11803399]
 [ 0.01869567 -1.11803399]
 [ 1.11239246 -1.11803399]
 [ 1.78543664 -1.11803399]
 [-1.24326216  0.89442719]
 [-0.57021798  0.89442719]
 [ 0.86000089  0.89442719]
 [-0.9908706   0.89442719]
 [ 0.01869567  0.89442719]]

x_test_scaled:
[[-0.17557375 -1.        ]
 [ 1.50993422 -1.        ]
 [-1.29924573  1.        ]
 [-0.03511475  1.        ]]



In [37]:
model.fit(x_train_scaled, y_train)
y_pred = model.predict(x_test_scaled)

MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print(f'MAE: {MAE}')
print(f'MSE: {MSE}')

MAE: 7.583333333333336
MSE: 85.13888888888893
