### Import the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

### Read dataset

In [None]:
car_details = pd.read_csv('../input/vehicle-dataset-from-cardekho/Car details v3.csv')
car_details.shape

## EDA

In [None]:
car_details.head()

In [None]:
car_details.describe()

In [None]:
car_details.columns

In [None]:
car_details.info()

### Data visualization

#### uni-variate

In [None]:
sns.countplot(x='transmission',data=car_details)

In [None]:
sns.countplot(x='fuel',data=car_details)

In [None]:
sns.countplot(x='owner',data=car_details)

In [None]:
sns.countplot(x='seller_type',data=car_details)

In [None]:
def remove_unit(df,column):
    temp=[]
    length = len(df[column])
    for i in range(length):
        temp.append(float(str(df[column][i]).split(' ')[0]))
    return temp
    

In [None]:
car_details['engine'] = remove_unit(car_details,'engine')

car_details['mileage'] = remove_unit(car_details,'mileage')


In [None]:
car_details.drop(['torque','max_power','name'],axis=1, inplace=True)

In [None]:
car_details.head()

In [None]:
car_details.info()

#### multi-variate

In [None]:
columns = ['selling_price','km_driven','mileage','engine','seats']

figure = plt.figure(figsize=[13,3])
plt.plot([1, 2, 3])
plt.subplot(121)
sns.boxplot(x=columns[0],data=car_details)
plt.subplot(122)
sns.boxplot(x=columns[1],data=car_details)
plt.show()

In [None]:
figure = plt.figure(figsize=[13,3])
plt.subplot(121)
sns.boxplot(x=columns[2],data=car_details)
plt.subplot(122)
sns.boxplot(x=columns[3],data=car_details)
plt.show()

In [None]:
for column in columns:
    car_details[car_details[column]> car_details[column].quantile(0.99)]


#### Multivariate

In [None]:
sns.heatmap(car_details.corr(), annot=True, cmap="viridis")
plt.show()

In [None]:
car_details.corr()['selling_price']

In [None]:
car_details.pivot_table(values='selling_price', index = 'seller_type', columns= 'fuel')

# Data Preprocessing

### Categorical variable

In [None]:
columns = [column  for column in car_details.columns if car_details[column].dtype == 'O']
columns

In [None]:
def categorical_variable(df,columns):
    for column in columns:
        col=pd.get_dummies(df[column],prefix=column,drop_first=True)
        df=pd.concat([df,col],axis=1)
        df.drop(column, axis=1,inplace=True)
    return df

In [None]:
car_details = categorical_variable(car_details,columns)

In [None]:
car_details.head()

In [None]:
car_details.info()

In [None]:
car_details.isnull().sum()

In [None]:
car_details.dropna(inplace=True)

In [None]:
car_details.isnull().sum()

In [None]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()

In [None]:
y=car_details.iloc[:,1]
X=car_details.drop('selling_price',axis=1)

In [None]:
X=X.values


In [None]:
y=y.values

### Split the dataset

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.3, random_state=0)

In [None]:
X_train.shape

In [None]:
X_test.shape

### Train - Decision tree classifier

In [None]:
from sklearn.tree import DecisionTreeRegressor
reg = DecisionTreeRegressor()

In [None]:
reg.fit(X_train,y_train)

In [None]:
y_pred=reg.predict(X_test)

### Evaluation Matrix

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.metrics import r2_score

In [None]:
print('R2 Score', r2_score(y_test,y_pred))

In [None]:
mean_squared_error(y_test,y_pred)

In [None]:
mean_absolute_error(y_test,y_pred)