In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import os

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
dataset = pd.read_csv('/kaggle/input/car-price-prediction/CarPrice_Assignment.csv')

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.describe()

In [None]:
dataset.isnull().sum()

In [None]:
plt.figure(figsize=(15,8))
sns.heatmap(dataset.corr(), annot=True, cmap='viridis')

In [None]:
dataset.corr()

In [None]:
labelencoder = LabelEncoder()

In [None]:
dataset['fueltype'] = labelencoder.fit_transform(dataset['fueltype'])
dataset['aspiration'] = labelencoder.fit_transform(dataset['aspiration'])
dataset['carbody'] = labelencoder.fit_transform(dataset['carbody'])
dataset['drivewheel'] = labelencoder.fit_transform(dataset['drivewheel'])
dataset['enginelocation'] = labelencoder.fit_transform(dataset['enginelocation'])
dataset['fuelsystem'] = labelencoder.fit_transform(dataset['enginelocation'])

In [None]:
dataset

## Feature Selection based on Correlation Coefficients

In [None]:
X = dataset[['wheelbase', 'fueltype','carlength', 'carwidth', 'curbweight', 'enginesize', 'boreratio', 'horsepower','carbody']]

In [None]:
y = dataset['price']

In [None]:
X

In [None]:
y

## Feature Scaling

In [None]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [None]:
X

## Linear Regression

In [None]:
model = LinearRegression()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
model.fit(X_train, y_train)

In [None]:
model.predict(X_test)

In [None]:
r2_score(y_test, model.predict(X_test))

### Taking More Features

In [None]:
X1 = dataset.drop(columns=['CarName', 'price', 'doornumber', 'enginetype', 'cylindernumber'])

In [None]:
X1

In [None]:
X1 = scaler.fit_transform(X1)

In [None]:
X1

In [None]:
X1_train, X1_test, y_train, y_test = train_test_split(X1, y, test_size=0.3, random_state=42)

In [None]:
model.fit(X1_train, y_train)

In [None]:
model.predict(X1_test)

In [None]:
r2_score(y_test, model.predict(X1_test))

### Recursive Feature Elimination

In [None]:
a = RFE(estimator=LinearRegression(), n_features_to_select=15, step=5)

In [None]:
a.fit(X1_train, y_train)

In [None]:
a.support_

In [None]:
a.predict(X1_test)

In [None]:
r2_score(y_test, a.predict(X1_test))

In [None]:
y_pred = a.predict(X1_test)

In [None]:
plt.figure(figsize=(10,7), dpi=100)
sns.scatterplot(x=range(len(y_test)), y=y_test, color='blue', label='Test Value')
sns.scatterplot(x=range(len(y_pred)), y=y_pred, color='red', label='Predicted Value')

In [None]:
print('Regression Model: Linear')
print('Feature Selection with Recursive Feature Elimination (RFE)')
print('Model Accuracy: ' + str(100*r2_score(y_test, a.predict(X1_test)).round(3)) + '%')