In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

In [34]:
data = pd.read_csv("bikeDetails.csv")
data.head()

Unnamed: 0,name,selling_price,year,seller_type,owner,km_driven,ex_showroom_price
0,Royal Enfield Classic 350,175000,2019,Individual,1st owner,350,
1,Honda Dio,45000,2017,Individual,1st owner,5650,
2,Royal Enfield Classic Gunmetal Grey,150000,2018,Individual,1st owner,12000,148114.0
3,Yamaha Fazer FI V 2.0 [2016-2018],65000,2015,Individual,1st owner,23000,89643.0
4,Yamaha SZ [2013-2014],20000,2011,Individual,2nd owner,21000,


# Grid Search Algorithm
## Just y

In [80]:
def sel(y, c):
    '''
    Returns sqared error loss
    '''
    return (y-c)**2

In [101]:
def rmse(y, c):
    '''
    Returns rmse of c and y
    '''
    try:
        value = np.sqrt(np.mean(sel(y,c)))
        return value
    except Exception as e:
        print("Exception occurred " + str(e))

In [102]:
def create_grid_for_c(y):
    '''
    Returns a grid for c
    '''
    result = []
    result.append (y.median())
    result.append (y.median() + 0.05*y.std())
    result.append (y.median() - 0.05*y.std())
    result.append (y.median() + 0.1*y.std())
    result.append (y.median() - 0.1*y.std())
    result.append (y.median() + 0.2*y.std())
    result.append (y.median() - 0.2*y.std())
    result.append (y.mean())
    result.append (y.mean() + 0.05*y.std())
    result.append (y.mean() - 0.05*y.std())
    result.append (y.mean() + 0.1*y.std())
    result.append (y.mean() - 0.1*y.std())
    result.append (y.mean() + 0.2*y.std())
    result.append (y.mean() - 0.2*y.std())
    result.append (y.mean() + 0.3*y.std())
    result.append (y.mean() - 0.3*y.std())
    result.append (y.mean() + 0.5*y.std())
    result.append (y.mean() - 0.5*y.std())
    return result

In [142]:
def grid_search(y):
    '''
    Returns optimal prediction for y by grid search
    '''
    try:
        c = create_grid_for_c(y)
        rmse_temp= rmse(y,c[0])
        opt_c=c[0]
        
        for i in range(1,len(c)):
            rmse_temp2= rmse(y,c[i])
            if (rmse_temp2 < rmse_temp):
                rmse_temp = rmse_temp2
                opt_c=c[i]
        return opt_c
    except Exception as e:
        print("Exception occurred " + str(e))  

In [143]:
optimal_price = grid_search(data.selling_price)
optimal_price

59638.151743638075

In [144]:
opt_km_driven = grid_search(data.km_driven)
opt_km_driven

34359.83317624882

## both y and x

In [135]:
b0 = np.arange (start=60000, stop=70000, step=5)
b1 = np.arange(start=-1, stop = 1, step=0.01)
grid = {'b0':b0, 'b1':b1}

In [136]:
def grid_search_with_x(grid, x, y):
    '''
    Returns optimal values for b0 and b1 by grid search 
    '''
    try:
        result = {"b0": 0,"b1": 0,"rmse": 10**10}
    
        for i in range (len(grid["b0"])):
            for j in range (len(grid["b1"])):
                c = grid["b0"][i] + x * grid["b1"][j]
                rmse_temp = rmse(y,c)
                if (rmse_temp < result["rmse"]):
                    result["b0"]=grid["b0"][i]
                    result["b1"]=grid["b1"][j]
                    result["rmse"]=rmse_temp
                    
        return result
    except Exception as e:
        print("Exception occurred " + str(e))  

In [137]:
optimal = grid_search_with_x(grid, data.km_driven, data.selling_price) 

In [138]:
optimal

{'b0': 67540, 'b1': -0.22999999999999932, 'rmse': 54987.1999686615}

In [139]:
x = 10000
y = optimal["b0"]+optimal["b1"]*x
y

65240.00000000001

In [140]:
x = 25000
y = optimal["b0"]+optimal["b1"]*x
y

61790.000000000015

In [141]:
x = 35000
y = optimal["b0"]+optimal["b1"]*x
y

59490.00000000002