In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import dash_html_components as html
import dash_core_components as dcc
%matplotlib inline
mpl.style.use('ggplot')


In [2]:
car=pd.read_csv('Cleaned_Car_data.csv')

In [3]:
car

Unnamed: 0.1,Unnamed: 0,name,company,year,Price,kms_driven,fuel_type
0,0,Hyundai Santro Xing,Hyundai,2007,80000,45000,Petrol
1,1,Mahindra Jeep CL550,Mahindra,2006,425000,40,Diesel
2,2,Hyundai Grand i10,Hyundai,2014,325000,28000,Petrol
3,3,Ford EcoSport Titanium,Ford,2014,575000,36000,Diesel
4,4,Ford Figo,Ford,2012,175000,41000,Diesel
...,...,...,...,...,...,...,...
811,811,Maruti Suzuki Ritz,Maruti,2011,270000,50000,Petrol
812,812,Tata Indica V2,Tata,2009,110000,30000,Diesel
813,813,Toyota Corolla Altis,Toyota,2009,300000,132000,Petrol
814,814,Tata Zest XM,Tata,2018,260000,27000,Diesel


In [6]:
car=car[car['Price']<6000000]

In [7]:
X=car[['name','company','year','kms_driven','fuel_type']]
y=car['Price']

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)


In [12]:
ohe=OneHotEncoder()
ohe.fit(X[['name','company','fuel_type']])

OneHotEncoder()

In [14]:
column_trans=make_column_transformer((OneHotEncoder(categories=ohe.categories_),['name','company','fuel_type']),
                                    remainder='passthrough')

In [16]:
lr=LinearRegression()
pipe=make_pipeline(column_trans,lr)
pipe.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(categories=[array(['Audi A3 Cabriolet', 'Audi A4 1.8', 'Audi A4 2.0', 'Audi A6 2.0',
       'Audi A8', 'Audi Q3 2.0', 'Audi Q5 2.0', 'Audi Q7', 'BMW 3 Series',
       'BMW 5 Series', 'BMW 7 Series', 'BMW X1', 'BMW X1 sDrive20d',
       'BMW X1 xDrive20d', 'Chevrolet Beat', 'Chevrolet Beat...
                                                                            array(['Audi', 'BMW', 'Chevrolet', 'Datsun', 'Fiat', 'Force', 'Ford',
       'Hindustan', 'Honda', 'Hyundai', 'Jaguar', 'Jeep', 'Land',
       'Mahindra', 'Maruti', 'Mercedes', 'Mini', 'Mitsubishi', 'Nissan',
       'Renault', 'Skoda', 'Tata', 'Toyota', 'Volkswagen', 'Volvo'],
      dtype=object),
                                                                            array(['Diesel', 'LPG', 'Pe

In [18]:
y_pred=pipe.predict(X_test)
r2_score(y_test,y_pred)

0.5833328364287172

In [20]:
scores=[]
for i in range(1000):
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,random_state=i)
    lr=LinearRegression()
    pipe=make_pipeline(column_trans,lr)
    pipe.fit(X_train,y_train)
    y_pred=pipe.predict(X_test)
    scores.append(r2_score(y_test,y_pred))

In [22]:
scores[np.argmax(scores)]

0.9200894544056878

In [26]:
import pickle


In [27]:
pickle.dump(pipe,open('LinearRegressionModel.pkl','wb'))

In [16]:
import dash
from dash.dependencies import Input,Output
app = dash.Dash(__name__)

In [21]:
name = car['name']
name = list(set(name))
name.sort()
model=[]
for i in name:
    model.append({"label":i,"value":i})
model

[{'label': 'Audi A3 Cabriolet', 'value': 'Audi A3 Cabriolet'},
 {'label': 'Audi A4 1.8', 'value': 'Audi A4 1.8'},
 {'label': 'Audi A4 2.0', 'value': 'Audi A4 2.0'},
 {'label': 'Audi A6 2.0', 'value': 'Audi A6 2.0'},
 {'label': 'Audi A8', 'value': 'Audi A8'},
 {'label': 'Audi Q3 2.0', 'value': 'Audi Q3 2.0'},
 {'label': 'Audi Q5 2.0', 'value': 'Audi Q5 2.0'},
 {'label': 'Audi Q7', 'value': 'Audi Q7'},
 {'label': 'BMW 3 Series', 'value': 'BMW 3 Series'},
 {'label': 'BMW 5 Series', 'value': 'BMW 5 Series'},
 {'label': 'BMW 7 Series', 'value': 'BMW 7 Series'},
 {'label': 'BMW X1', 'value': 'BMW X1'},
 {'label': 'BMW X1 sDrive20d', 'value': 'BMW X1 sDrive20d'},
 {'label': 'BMW X1 xDrive20d', 'value': 'BMW X1 xDrive20d'},
 {'label': 'Chevrolet Beat', 'value': 'Chevrolet Beat'},
 {'label': 'Chevrolet Beat Diesel', 'value': 'Chevrolet Beat Diesel'},
 {'label': 'Chevrolet Beat LS', 'value': 'Chevrolet Beat LS'},
 {'label': 'Chevrolet Beat LT', 'value': 'Chevrolet Beat LT'},
 {'label': 'Chevrole

In [35]:

app.layout = html.Div([
    html.H2("Sales Analysis"),
    html.Img(src='/assets/data.jpg'),
    html.Hr(),
    html.Div([
        html.H4('Choose Model   '),
        html.Br(),
        dcc.Dropdown(
        id = "model_id",
            options=model,
                value=model[0]
        ),
    ],className='dropdown'),
    html.Div([
        html.H4('Enter Brand'),
        html.Br(),
        dcc.Input(
        id = "brand",type='text',placeholder='Enter Brand'
        )
    ]),
    html.Div([
        html.H4('Enter Year'),
        html.Br(),
        dcc.Input(
        id = "year",type='number',placeholder='Enter model Year'
        )
    ]),
    html.Div([
        html.H4('Enter Kms Driven'),
        html.Br(),
        dcc.Input(
        id = "kms",type='number',placeholder='Enter Kilometer-Driven'
        )
    ]),
    html.Div([
        html.H4('Enter fuel-type'),
        html.Br(),
        dcc.Input(
        id = "fuel",type='text',placeholder='Enter Fuel-Type'
        )
        
    ]),
    html.Div(
        html.H1(id="output")
    ),
],className='banner')

@app.callback(
    Output("output","children"),
    Input("model_id","value"),
    Input("brand","value"),
)
def update_output(model_id,brand):
    return u'Input 1 {} and Input 2 {}'.format(model_id, brand)

In [None]:
if __name__ == "__main__":
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [29/May/2021 00:15:00] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [29/May/2021 00:15:01] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [29/May/2021 00:15:01] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [29/May/2021 00:15:01] "[37mGET /_favicon.ico?v=1.20.0 HTTP/1.1[0m" 200 -


In [28]:
pipe.predict(pd.DataFrame(columns=['name','company','year','kms_driven','fuel_type'],data=np.array(['Maruti Suzuki Alto','Maruti',2019,100,'Petrol']).reshape(1,5)))

array([316609.61993986])