1. Membuat Data Frame

In [1172]:
import pandas as pd

df = pd.read_csv('./resource/toyota.csv')
features = ['year', 'transmission', 'mileage', 'fuelType', 'tax', 'mpg', 'engineSize']

df.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,GT86,2016,16000,Manual,24089,Petrol,265,36.2,2.0
1,GT86,2017,15995,Manual,18615,Petrol,145,36.2,2.0
2,GT86,2015,13998,Manual,27469,Petrol,265,36.2,2.0
3,GT86,2017,18998,Manual,14736,Petrol,150,36.2,2.0
4,GT86,2017,17498,Manual,36284,Petrol,145,36.2,2.0


2. Melihat Data yang Sama

In [1173]:
for column in [features[0], features[1], features[3], features[6]]:
    print(f'{column} ({df[column].nunique()}/{df[column].count()}): \n{df[column].unique()}\n')

year (23/6738): 
[2016 2017 2015 2020 2013 2019 2018 2014 2012 2005 2003 2004 2001 2008
 2007 2010 2011 2006 2009 2002 1999 2000 1998]

transmission (4/6738): 
['Manual' 'Automatic' 'Semi-Auto' 'Other']

fuelType (4/6738): 
['Petrol' 'Other' 'Hybrid' 'Diesel']

engineSize (16/6738): 
[2.  1.8 1.2 1.6 1.4 2.5 2.2 1.5 1.  1.3 0.  2.4 3.  2.8 4.2 4.5]



3. Pemisahan Variabel

In [1174]:
X = df[features]
y = df['price']

In [1175]:
X.head()

Unnamed: 0,year,transmission,mileage,fuelType,tax,mpg,engineSize
0,2016,Manual,24089,Petrol,265,36.2,2.0
1,2017,Manual,18615,Petrol,145,36.2,2.0
2,2015,Manual,27469,Petrol,265,36.2,2.0
3,2017,Manual,14736,Petrol,150,36.2,2.0
4,2017,Manual,36284,Petrol,145,36.2,2.0


In [1176]:
y.head()

0    16000
1    15995
2    13998
3    18998
4    17498
Name: price, dtype: int64

4. Konversi Data Kategorial Menjadi Numerik

In [1177]:
tranmission_convert = pd.get_dummies(X['transmission']).astype(int)

fultype_convert = pd.get_dummies(X['fuelType']).astype(int)

In [1178]:
tranmission_convert.head()

Unnamed: 0,Automatic,Manual,Other,Semi-Auto
0,0,1,0,0
1,0,1,0,0
2,0,1,0,0
3,0,1,0,0
4,0,1,0,0


In [1179]:
fultype_convert.head()

Unnamed: 0,Diesel,Hybrid,Other,Petrol
0,0,0,0,1
1,0,0,0,1
2,0,0,0,1
3,0,0,0,1
4,0,0,0,1


5. Menghapus Kolom yang Sudah Tidak Diperlukan

In [1180]:
X = X.drop(['transmission', 'fuelType'], axis=1)
X.head()

Unnamed: 0,year,mileage,tax,mpg,engineSize
0,2016,24089,265,36.2,2.0
1,2017,18615,145,36.2,2.0
2,2015,27469,265,36.2,2.0
3,2017,14736,150,36.2,2.0
4,2017,36284,145,36.2,2.0


6. Mengabungkan Data yang Sudah Dikonversi

In [1181]:
X = pd.concat([X, tranmission_convert, fultype_convert], axis=1)
X.head()

Unnamed: 0,year,mileage,tax,mpg,engineSize,Automatic,Manual,Other,Semi-Auto,Diesel,Hybrid,Other.1,Petrol
0,2016,24089,265,36.2,2.0,0,1,0,0,0,0,0,1
1,2017,18615,145,36.2,2.0,0,1,0,0,0,0,0,1
2,2015,27469,265,36.2,2.0,0,1,0,0,0,0,0,1
3,2017,14736,150,36.2,2.0,0,1,0,0,0,0,0,1
4,2017,36284,145,36.2,2.0,0,1,0,0,0,0,0,1


7. Mengubah data

In [1182]:
import numpy as np

X = np.array(X)
y = np.array(y)

pd.DataFrame({
    'year': X[:, 0],
    'mileage': X[:, 1],
    'tax': X[:, 2],
    'mpg': X[:, 3],
    'engineSize': X[:, 4],
    'Automatic': X[:, 5],
    'Manual': X[:, 6],
    'Other': X[:, 7],
    'Semi-Auto': X[:, 8],
    'Diesel': X[:, 9],
    'Hybrid': X[:, 10],
    'Other': X[:, 11],
    'Petrol': X[:, 12]
}).head()

Unnamed: 0,year,mileage,tax,mpg,engineSize,Automatic,Manual,Other,Semi-Auto,Diesel,Hybrid,Petrol
0,2016.0,24089.0,265.0,36.2,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,2017.0,18615.0,145.0,36.2,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,2015.0,27469.0,265.0,36.2,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,2017.0,14736.0,150.0,36.2,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,2017.0,36284.0,145.0,36.2,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


8. Inisiasi Model dan Training Data

In [1183]:
from sklearn.neighbors import KNeighborsRegressor as KNN

K = 1

model = KNN(n_neighbors=K)
model.fit(X, y)

10. Membuat Dataset Training dan Test

In [1184]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

11. Evaluasi Model

In [1185]:
from sklearn.metrics import r2_score

y_pred = model.predict(X_test)
print(f'R-Squared: {r2_score(y_test, y_pred)}')

estimasi = float("{:.2f}".format(y_pred[0]))
estimasi_rupiah = '{:,.0f}'.format(estimasi * 19281)

print('\nHasil Prediksi:')
print(f'£ {estimasi}')
print(f'Rp. {estimasi_rupiah}')

R-Squared: 0.999429086897992

Hasil Prediksi:
£ 11295.0
Rp. 217,778,895
