In [None]:

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model  import LinearRegression,Ridge,Lasso,RidgeCV, LassoCV, ElasticNet, ElasticNetCV
from sklearn.model_selection import train_test_split

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
df=pd.read_csv('/kaggle/input/vehicle-dataset-from-cardekho/car data.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

**No null values**

In [None]:
df['Owner'].unique()

In [None]:
df['Number_of_Year']=2020-df['Year'] #age of the car

In [None]:
df.head()

# Exploratory Data Analysis

In [None]:
sns.pairplot(df)

In [None]:
sns.regplot('Year','Selling_Price',data=df)

In [None]:
sns.regplot('Number_of_Year','Selling_Price',data=df)

**Older the car, lower the selling price.**

In [None]:
ax = sns.barplot(x="Seller_Type", y="Selling_Price", data=df)

**Cars sold by the dealers have higher selling price**

In [None]:
ax = sns.barplot(x="Fuel_Type", y="Selling_Price", data=df)

**Diesel cars are most expensive**

In [None]:
ax = sns.barplot(x="Transmission", y="Selling_Price", data=df)

**Automatic cars have higher selling price**

In [None]:
ax = sns.barplot(x="Owner", y="Selling_Price", data=df)

**A newer car is more expensive**

In [None]:
sns.regplot('Selling_Price','Kms_Driven',data=df)

In [None]:
df.columns

In [None]:
final=df[['Selling_Price', 'Present_Price', 'Kms_Driven',
       'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner', 'Number_of_Year']]
final_df=pd.get_dummies(final,drop_first=True)

In [None]:
final_df.head()

In [None]:
plt.figure(figsize=(10,10))
ax = sns.heatmap(final_df.corr(),annot=True)

In [None]:
y=df.Selling_Price
x=final_df.drop(['Selling_Price'],axis=1)

In [None]:
x.head()

In [None]:
y.head()

# Linear Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

In [None]:
reg=LinearRegression()
reg.fit(X_train,y_train)

In [None]:
print(reg.intercept_)
print(reg.coef_)

In [None]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_true=y_train, y_pred=reg.predict(X_train))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_true=y_train, y_pred=reg.predict(X_train))

# Lasso Regression

In [None]:
lasscv = LassoCV(alphas = None,cv =10, max_iter = 100000, normalize = True)
lasscv.fit(X_train, y_train)

In [None]:
alpha = lasscv.alpha_
alpha

In [None]:
lasso_reg = Lasso(alpha)
lasso_reg.fit(X_train, y_train)

In [None]:
lasso_reg.score(X_test, y_test)

# Ridge Regression

In [None]:
alphas = np.random.uniform(low=0, high=10, size=(50,))
ridgecv = RidgeCV(alphas = alphas,cv=10,normalize = True)
ridgecv.fit(X_train, y_train)

In [None]:
ridgecv.alpha_

In [None]:
ridge_model = Ridge(alpha=ridgecv.alpha_)
ridge_model.fit(X_train, y_train)

In [None]:
ridge_model.score(X_test, y_test)

# Elastic net

In [None]:
elasticCV = ElasticNetCV(alphas = None, cv =10)

elasticCV.fit(X_train, y_train)

In [None]:
elasticCV.alpha_

In [None]:
elasticCV.l1_ratio

In [None]:
elasticnet_reg = ElasticNet(alpha = elasticCV.alpha_,l1_ratio=0.5)
elasticnet_reg.fit(X_train, y_train)

In [None]:
elasticnet_reg.score(X_test, y_test)