**mounting drive**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd


**read the dataset**

In [3]:
realestate_dataset=pd.read_csv("/content/drive/MyDrive/ML_TRAINING/Dataset/Real estate valuation data set.csv")

In [4]:
realestate_dataset.head(10)

Unnamed: 0,No,transaction date,house age,distance to the nearest MRT station,number of convenience stores,latitude,longitude,house price of unit area
0,1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1
5,6,2012.667,7.1,2175.03,3,24.96305,121.51254,32.1
6,7,2012.667,34.5,623.4731,7,24.97933,121.53642,40.3
7,8,2013.417,20.3,287.6025,6,24.98042,121.54228,46.7
8,9,2013.5,31.7,5512.038,1,24.95095,121.48458,18.8
9,10,2013.417,17.9,1783.18,3,24.96731,121.51486,22.1


**Check null values**

In [5]:
realestate_dataset.isnull().any()

No                                     False
transaction date                       False
house age                              False
distance to the nearest MRT station    False
number of convenience stores           False
latitude                               False
longitude                              False
house price of unit area               False
dtype: bool

In [6]:
realestate_dataset.isnull().sum()

No                                     0
transaction date                       0
house age                              0
distance to the nearest MRT station    0
number of convenience stores           0
latitude                               0
longitude                              0
house price of unit area               0
dtype: int64

In [7]:
x=realestate_dataset.iloc[:,2:5].values
y=realestate_dataset.iloc[:,7:8].values

In [8]:
x

array([[ 32.     ,  84.87882,  10.     ],
       [ 19.5    , 306.5947 ,   9.     ],
       [ 13.3    , 561.9845 ,   5.     ],
       ...,
       [ 18.8    , 390.9696 ,   7.     ],
       [  8.1    , 104.8101 ,   5.     ],
       [  6.5    ,  90.45606,   9.     ]])

In [9]:
y

array([[ 37.9],
       [ 42.2],
       [ 47.3],
       [ 54.8],
       [ 43.1],
       [ 32.1],
       [ 40.3],
       [ 46.7],
       [ 18.8],
       [ 22.1],
       [ 41.4],
       [ 58.1],
       [ 39.3],
       [ 23.8],
       [ 34.3],
       [ 50.5],
       [ 70.1],
       [ 37.4],
       [ 42.3],
       [ 47.7],
       [ 29.3],
       [ 51.6],
       [ 24.6],
       [ 47.9],
       [ 38.8],
       [ 27. ],
       [ 56.2],
       [ 33.6],
       [ 47. ],
       [ 57.1],
       [ 22.1],
       [ 25. ],
       [ 34.2],
       [ 49.3],
       [ 55.1],
       [ 27.3],
       [ 22.9],
       [ 25.3],
       [ 47.7],
       [ 46.2],
       [ 15.9],
       [ 18.2],
       [ 34.7],
       [ 34.1],
       [ 53.9],
       [ 38.3],
       [ 42. ],
       [ 61.5],
       [ 13.4],
       [ 13.2],
       [ 44.2],
       [ 20.7],
       [ 27. ],
       [ 38.9],
       [ 51.7],
       [ 13.7],
       [ 41.9],
       [ 53.5],
       [ 22.6],
       [ 42.4],
       [ 21.3],
       [ 63.2],
       [

In [10]:
x.shape

(414, 3)

In [11]:
y.shape

(414, 1)

**split the dataset into train set and test set**

In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.2,random_state=0)

In [13]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(x_test.shape)

(331, 3)
(331, 1)
(83, 3)
(83, 3)


**feature scaling**

In [14]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.fit_transform(x_test)

In [15]:
from sklearn.neighbors import KNeighborsRegressor
knn=KNeighborsRegressor()

In [16]:
knn.fit(x_train,y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                    weights='uniform')

In [17]:
y_pred=knn.predict(x_test)

In [18]:
y_pred

array([[50.02],
       [19.02],
       [34.68],
       [18.74],
       [31.82],
       [39.06],
       [36.46],
       [36.56],
       [49.24],
       [42.52],
       [45.46],
       [42.24],
       [50.02],
       [38.14],
       [50.7 ],
       [45.64],
       [34.82],
       [48.62],
       [42.24],
       [41.12],
       [50.82],
       [30.88],
       [39.88],
       [43.96],
       [53.98],
       [36.52],
       [44.24],
       [24.66],
       [54.98],
       [27.48],
       [54.62],
       [27.02],
       [49.74],
       [41.6 ],
       [44.24],
       [30.88],
       [41.02],
       [30.7 ],
       [50.24],
       [14.6 ],
       [47.42],
       [35.9 ],
       [26.6 ],
       [41.02],
       [17.84],
       [51.06],
       [37.54],
       [18.84],
       [30.1 ],
       [49.48],
       [54.72],
       [31.9 ],
       [43.8 ],
       [16.18],
       [30.88],
       [46.88],
       [54.62],
       [40.36],
       [45.46],
       [27.02],
       [36.58],
       [37.8 ],
       [

In [19]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.6410732944875639

In [20]:
y_pred=knn.predict(sc.transform(([[7.0,390.56840,10]])))
y_pred

array([[50.24]])