In [18]:
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

In [19]:
df = pd.read_csv("clean_train_data_car.csv")
del df['Unnamed: 0']

In [20]:
from sklearn.model_selection import train_test_split

x = df.drop("Price",axis=1)
y = df['Price']
x.shape,y.shape

((5060, 98), (5060,))

In [21]:
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=51)

In [22]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

q = sc.fit(x_train)
x_train = sc.transform(x_train)
x_test = sc.transform(x_test)

In [7]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor

def rmse(x_test,y_pred):
    return np.sqrt(mean_squared_error(x_test,y_pred))

In [23]:
rfr = RandomForestRegressor()
rfr.fit(x_train,y_train)

y_train_pred_rfr = rfr.predict(x_train)
y_test_pred_rfr = rfr.predict(x_test)

print("Train data r2 Score:",r2_score(y_train,y_train_pred_rfr))
print("Test data r2 Score:",r2_score(y_test,y_test_pred_rfr))
print("Error:",rmse(y_test,y_test_pred_rfr))
print("Error:",rmse(y_train,y_train_pred_rfr))

Train data r2 Score: 0.9895238992820591
Test data r2 Score: 0.9458914246013942
Error: 2.007247337043583
Error: 0.9652158702300302


In [27]:
parameters = {
    "n_estimators":[5,10,50,100,250],
    "max_depth":[2,4,8,16,32,None]
}

from sklearn.model_selection import GridSearchCV
cv = GridSearchCV(rfr,parameters,cv=5)
cv.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=RandomForestRegressor(),
             param_grid={'max_depth': [2, 4, 8, 16, 32, None],
                         'n_estimators': [5, 10, 50, 100, 250]})

In [13]:
def display(results):
    print(f'Best parameters are: {results.best_params_}')
    print("\n")
    mean_score = results.cv_results_['mean_test_score']
    std_score = results.cv_results_['std_test_score']
    params = results.cv_results_['params']
    for mean,std,params in zip(mean_score,std_score,params):
        print(f'{round(mean,3)} + or -{round(std,3)} for the {params}')

In [26]:
# 'max_depth': 32, 'n_estimators': 25

rfr2 = RandomForestRegressor(max_depth=32,n_estimators=100)
rfr2.fit(x_train,y_train)

y_train_pred_rfr2 = rfr2.predict(x_train)
y_test_pred_rfr2 = rfr2.predict(x_test)

print("Train data r2 Score:",r2_score(y_train,y_train_pred_rfr2))
print("Test data r2 Score:",r2_score(y_test,y_test_pred_rfr2))
print("Error:",rmse(y_test,y_test_pred_rfr2))
print("Error:",rmse(y_train,y_train_pred_rfr2))

Train data r2 Score: 0.9906797798948008
Test data r2 Score: 0.9454275739146332
Error: 2.0158326319861537
Error: 0.910411438991684


In [106]:
import joblib
joblib.dump(rfr2,'final_rfr.pkl')

['final_rfr.pkl']

In [30]:
import pickle
with open('model_pkl', 'wb') as files:
    pickle.dump(rfr, files)

# Data Pkl

In [107]:
car_data = pd.read_csv("Car_data_clean.csv")
del car_data['Unnamed: 0']

In [27]:
car_data.head()

Unnamed: 0,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Price,Mileage_int,Engine_int,Power_int,Car
0,Mumbai,2010,72000,CNG,Manual,First,5.0,1.75,26.6,998.0,58.16,Maruti WagonR
1,Pune,2015,41000,Diesel,Manual,First,5.0,12.5,19.67,1582.0,126.2,Hyundai Creta
2,Chennai,2011,46000,Petrol,Manual,First,5.0,4.5,18.2,1199.0,88.7,Honda Jazz
3,Chennai,2012,87000,Diesel,Manual,First,7.0,6.0,20.77,1248.0,88.76,Maruti Ertiga
4,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,17.74,15.2,1968.0,140.8,Audi A4


In [52]:
car_names = list(car_data['Car'].value_counts().keys())

In [53]:
city_names = list(car_data['Location'].value_counts().keys())

In [54]:
fuel_type = list(car_data['Fuel_Type'].value_counts().keys())
fuel_type

['Diesel', 'Petrol', 'CNG']

In [55]:
owner_type = list(car_data['Owner_Type'].value_counts().keys())

In [56]:
trans_type = list(car_data['Transmission'].value_counts().keys())
trans_type

['Manual', 'Automatic']

In [57]:
import joblib
joblib.dump(car_names,'car_names.pkl')
joblib.dump(city_names,'city_names.pkl')
joblib.dump(fuel_type,'fuel_type.pkl')
joblib.dump(owner_type,'owner_type.pkl')
joblib.dump(trans_type,'trans_type.pkl')

['trans_type.pkl']

In [108]:
import pickle

In [109]:
car_names = pickle.load(open('car_names.pkl','rb'))
city_names = pickle.load(open('city_names.pkl','rb'))
fuel_type = pickle.load(open('fuel_type.pkl','rb'))
owner_type = pickle.load(open('owner_type.pkl','rb'))
trans_type = pickle.load(open('trans_type.pkl','rb'))

In [110]:
dfx = pd.read_csv('clean_train_data_car.csv',)
del dfx["Unnamed: 0"]

In [111]:
dfx.head()

Unnamed: 0,Year,Kilometers_Driven,Seats,Price,Mileage_int,Engine_int,Power_int,Location_Ahmedabad,Location_Bangalore,Location_Chennai,...,Car_Tata Nano,Car_Tata New Safari,Car_Tata Zest,Car_Toyota Corolla,Car_Toyota Etios,Car_Toyota Fortuner,Car_Toyota Innova,Car_Volkswagen Jetta,Car_Volkswagen Polo,Car_Volkswagen Vento
0,2010,72000,5.0,1.75,26.6,998.0,58.16,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2015,41000,5.0,12.5,19.67,1582.0,126.2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2011,46000,5.0,4.5,18.2,1199.0,88.7,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,2012,87000,7.0,6.0,20.77,1248.0,88.76,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,2013,40670,5.0,17.74,15.2,1968.0,140.8,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [112]:
# import streamlit as st
import pickle
import joblib
import numpy as np
import pandas as pd
import requests
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

df = pd.read_csv('clean_train_data_car.csv',)
del df["Unnamed: 0"]
x = df.drop("Price",axis=1)
y = df['Price']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=51)
sc = StandardScaler()
sc.fit(x_train)
x_train = sc.transform(x_train)
x_test = sc.transform(x_test)

def car_price(year,km,seats,mileage,eng,power,location,car,fuel,trans,owner):
    index_list = []
    arr = np.zeros(98)

    arr[0] = year
    arr[1] = km
    arr[2] = seats
    arr[3] = mileage
    arr[4] = eng
    arr[5] = power

    if "Location_" + location in x.columns:
        index = np.where(x.columns == "Location_"+location)[0][0]
        arr[index] = 1
        index_list.append((index,"loc"))

    if "Car_" + car in x.columns:
        index = np.where(x.columns == "Car_" + car)[0][0]
        arr[index] = 1
        index_list.append((index,"car"))

    if "Fuel_Type_" + fuel in x.columns:
        index = np.where(x.columns == "Fuel_Type_" + fuel)[0][0]
        arr[index] = 1
        index_list.append((index,"fuel"))

    if "Transmission_" + trans in x.columns:
        index = np.where(x.columns == "Transmission_" + trans)[0][0]
        arr[index] = 1
        index_list.append((index,"trans"))

    if "Owner_Type_" + owner in x.columns:
        index = np.where(x.columns == "Owner_Type_" + owner)[0][0]
        arr[index] = 1
        index_list.append((index,"owner"))

    arr = sc.transform([arr])[0]
    return model.predict([arr])[0]

car_names = pickle.load(open('car_names.pkl','rb'))
city_names = pickle.load(open('city_names.pkl','rb'))
fuel_type = pickle.load(open('fuel_type.pkl','rb'))
owner_type = pickle.load(open('owner_type.pkl','rb'))
trans_type = pickle.load(open('trans_type.pkl','rb'))
model = joblib.load('final_rfr.pkl')

# st.title("Used Car Price Prediction Model")

# with st.form("my_form"):
#     st.write("Please Enter Following Details To Get Estimited Car Price ")

#     model = st.selectbox("Model",car_names)
#     location = st.selectbox("City",city_names)
#     Fuel = st.selectbox("Fuel-Type",fuel_type)
#     Owner= st.selectbox("Owner",owner_type)
#     trans= st.selectbox("Transmission",trans_type)
#     year = st.text_input("Year Of manifacture",placeholder="Enter Year")
#     km = st.text_input("Kilometer Driven",placeholder="Enter Total Kilometer Driven")
#     mileage = st.text_input("Mileage",placeholder="Enter Mileage")
#     seats = st.text_input("Seats",placeholder="Enter Seats")
#     engine = st.text_input("Engine(CC)",placeholder="Enter Engine Power in CC")
#     power = st.text_input("Power(CC)",placeholder="Enter Power in BHP")

    

#     # Every form must have a submit button.
#     submitted = st.form_submit_button("Submit")
#     if submitted:
#         st.write("Submited")

# st.write("Outside the form")

In [80]:
import joblib
model = joblib.load("final_rfr.pkl")

EOFError: 

In [1]:
import pandas as pd
import joblib