## Importing libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

## Reading data

In [1]:
data=pd.read_csv('../input/used-car-dataset-ford-and-mercedes/audi.csv')
data.head()

In [1]:
data.describe()

In [1]:
data.info()

In [1]:
data.isna().sum()

## EDA

In [1]:
data.duplicated().sum()

In [1]:
data.drop_duplicates(inplace=True)
data.duplicated().sum()

In [1]:
data.plot(kind='box',subplots=True,layout=(3,2),figsize=(15,7))

## Model

In [1]:
data['model'].nunique()

In [1]:
data['model'].unique()

In [1]:
data['model'].value_counts()

In [1]:
plt.figure(figsize=(15,7))
sns.countplot(data=data,x='model')

In [1]:
plt.figure(figsize=(15,7))
sns.boxplot(data=data,x='model',y='price')

### Year

In [1]:
data['year'].nunique()

In [1]:
data['year'].unique()

In [1]:
data['year'].value_counts()

In [1]:
sns.distplot(data['year'])

In [1]:
sns.regplot(data=data,x='year',y='price')

In [1]:
plt.figure(figsize=(15,7))
sns.boxplot(data=data,x='year',y='price')

### Price

In [1]:
data['price'].nunique()

In [1]:
sns.distplot(data['price'])

In [1]:
sns.boxplot(data=data,y='price')

### Transmission

In [1]:
data['transmission'].nunique()

In [1]:
data['transmission'].unique()

In [1]:
data['transmission'].value_counts()

In [1]:
sns.countplot(data=data,x='transmission')

In [1]:
sns.boxplot(data=data,x='transmission',y='price')

### Mileage

In [1]:
data['mileage'].nunique()

In [1]:
sns.distplot(data['mileage'])

In [1]:
sns.boxplot(data=data,y='mileage')

In [1]:
sns.regplot(data=data,x='mileage',y='price')

### Fuel Type

In [1]:
data['fuelType'].nunique()

In [1]:
data['fuelType'].unique()

In [1]:
data['fuelType'].value_counts()

In [1]:
sns.countplot(data=data,x='fuelType')

In [1]:
sns.boxplot(data=data,x='fuelType',y='price')

### Tax

In [1]:
data['tax'].nunique()

In [1]:
sns.distplot(data['tax'])

In [1]:
sns.boxplot(data=data,y='tax')

In [1]:
sns.regplot(data=data,y='price',x='tax')

### MPG

In [1]:
data['mpg'].nunique()

In [1]:
sns.distplot(data['mpg'])

In [1]:
sns.boxplot(y=data['mpg'])

In [1]:
sns.regplot(data=data,x='mpg',y='price')

### EngineSize

In [1]:
data['engineSize'].nunique()

In [1]:
data['engineSize'].value_counts()

In [1]:
sns.countplot(data=data,x='engineSize')

In [1]:
sns.boxplot(y=data['engineSize'])

In [1]:
sns.regplot(data=data,x='engineSize',y='price')

## Feature Engineering

In [1]:
from sklearn.preprocessing import LabelEncoder,MinMaxScaler,StandardScaler

In [1]:
minmax=MinMaxScaler()
le=LabelEncoder()
ss=StandardScaler()

In [1]:
cols=data.select_dtypes(include=['object']).columns
cols

In [1]:
data[cols]=data[cols].apply(le.fit_transform)

In [1]:
sns.heatmap(data.corr(),annot=True,fmt='.2f')

In [1]:
from sklearn.feature_selection import SelectKBest,f_regression

In [1]:
feature=SelectKBest(score_func=f_regression,k='all').fit(data.drop('price',axis=1),data['price'])
feature=pd.DataFrame(data=feature.scores_,index=[data.drop('price',axis=1).columns])
feature.sort_values(by=0,ascending=False)

In [1]:
cols=list(data.drop(['fuelType','transmission'],axis=1).columns)
cols.pop(2)
cols

In [1]:
data[cols]=minmax.fit_transform(data[cols])
data.describe()

## Modeling

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error,mean_squared_error

In [1]:
train_x,test_x,train_y,test_y=train_test_split(data[cols],data['price'],test_size=0.2,random_state=504)

In [1]:
from sklearn.ensemble import RandomForestRegressor

In [1]:
rfr=RandomForestRegressor(criterion='mse')
rfr.fit(train_x,train_y)
predict=rfr.predict(test_x)
print('MAE',mean_absolute_error(predict,test_y))
print('R2',math.ceil(r2_score(predict,test_y)*100))

In [1]:
from xgboost import XGBRegressor

In [1]:
xgr=XGBRegressor()
xgr.fit(train_x,train_y)
predict=xgr.predict(test_x)
print('MAE',mean_absolute_error(predict,test_y))
print('R2',math.ceil(r2_score(predict,test_y)*100))

It gives delightful output 