# Car Price Prediction with Multiple Linear Regression

In [None]:
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
car_price = pd.read_csv('/kaggle/input/car-price-prediction/CarPrice_Assignment.csv', usecols=['horsepower',
                                                                                               'enginesize',
                                                                                               'price'])
car_price.head()

In [None]:
car_price.describe().T

In [None]:
car_price.isnull().sum()

In [None]:
sns.set_style('whitegrid')
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (12,5))

ax1.scatter(x = car_price['enginesize'],
            y = car_price['price'],
            marker='+', color = 'darkorange')
ax1.set_title('Enginesize - Price', size = 12)
ax1.set(xlabel = 'Enginesize', ylabel = 'Price')

ax2.scatter(x = car_price['horsepower'],
            y = car_price['price'],
            marker='+', color = 'darkorange')
ax2.set_title('Horsepower - Price', size = 12)
ax2.set(xlabel = 'Horsepower', ylabel = 'Price')
fig.tight_layout()

## Multiple Linear Regression

In [None]:
x = car_price.drop('price', axis = 1) #independent variables (horsepower, enginesize)
y = car_price['price'] #dependent variable (price) 

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

print('X_train shape: {}'.format(X_train.shape))
print('X_test shape: {}'.format(X_test.shape))
print('y_train shape: {}'.format(y_train.shape))
print('y_test shape: {}'.format(y_test.shape))

In [None]:
multiple_lr = LinearRegression().fit(X_train, y_train)

print(' Model Equation '.center(70,'#'), '\n')
print('Price =', multiple_lr.intercept_ ,'+' , X_train.columns[0].title() , '*' ,multiple_lr.coef_[0] , '+', X_train.columns[1].title() , '*' ,multiple_lr.coef_[1])

In [None]:
y_pred = multiple_lr.predict(X_test)
print('Muliple Linear Regression MAE: {}'.format(round(mean_absolute_error(y_test, y_pred),5)))