In [1]:
# 1. True
# 2. Model 3

In [3]:
import pandas as pd 
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Reading the csv file 
autos = pd.read_csv('autos.csv')
autos.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


# Defining X and Y

In [4]:
X = autos[['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration']]
Y = autos['mpg']

# Standardize the Input Data

In [5]:
scaler = StandardScaler().set_output(transform = 'pandas')

X = scaler.fit_transform(X)
X.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration
0,1.483947,1.07729,0.664133,0.62054,-1.285258
1,1.483947,1.488732,1.574594,0.843334,-1.466724
2,1.483947,1.182542,1.184397,0.540382,-1.648189
3,1.483947,1.048584,1.184397,0.536845,-1.285258
4,1.483947,1.029447,0.924265,0.555706,-1.829655


# Splitting the data

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

# K-NN

In [9]:
knn_md = KNeighborsRegressor(n_neighbors = 5).fit(X_train, Y_train)

# Predicting on test
knn_pred = knn_md.predict(X_test)

print(f"The RMSE of the k-nn model is {mean_squared_error(Y_test, knn_pred, squared = False)}")
print(f"The MAE of the k-nn model is {mean_absolute_error(Y_test, knn_pred)}")

The RMSE of the k-nn model is 4.499244381004437
The MAE of the k-nn model is 3.292658227848101


# Random Forest

In [11]:
RF_md = RandomForestRegressor(n_estimators = 500, max_depth = 3).fit(X_train, Y_train)

# Predicting on test
RF_pred = RF_md.predict(X_test)

print(f"The RMSE of the RF model is {mean_squared_error(Y_test, RF_pred, squared = False)}")
print(f"The MAE of the RF model is {mean_absolute_error(Y_test, RF_pred)}")

The RMSE of the RF model is 4.155093941182465
The MAE of the RF model is 2.908718201092137


# Gradient Boosting 

In [12]:
GB_md = GradientBoostingRegressor(n_estimators = 500, max_depth = 3).fit(X_train, Y_train)

# Predicting on test
GB_pred = GB_md.predict(X_test)

print(f"The RMSE of the GB model is {mean_squared_error(Y_test, GB_pred, squared = False)}")
print(f"The MAE of the GB model is {mean_absolute_error(Y_test, GB_pred)}")

The RMSE of the GB model is 4.215199999267491
The MAE of the GB model is 2.834522922288351
