## Here we will be making our model

we will begin by importing required libraries

In [104]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


now that we have our libraries imported, we will first define some global variables

In [None]:
ALPHA = 0.00001
ITERATIONS = 1000
W = np.zeros(8)
B = 0

now we will first import and feature scale data using z-scale normalisation

In [106]:
def getData():
    df = pd.read_csv('CleanDataNP.csv')
    X = df.to_numpy()[:,0:-1]
    Y = df.to_numpy()[:,-1]
    mux = np.mean(X, axis=0)
    muy = np.mean(Y, axis=0)
    # print(np.shape(mux))
    sigx = np.std(X, axis=0)
    sigy = np.std(Y, axis=0)
    return (X - mux)/sigx, (Y-muy)/sigy, mux, muy, sigx, sigy

rX, rY, muX, muY, stdX, stdY = getData()

before moving further, let's first divide our data into 3 parts-  
1. Training set (X, Y) - 60%
2. Dev set (dX, dY) - 20%
3. Test set (tX. tY) - 20%

In [107]:
X, Y = rX[:5200, :], rY[:5200]
dX, dY = rX[5200:6950, :], rY[5200:6950]
tX, tY = rX[6950:, :], rY[6950:]
print(np.shape(X), np.shape(Y), np.shape(dX), np.shape(dY), np.shape(tX), np.shape(tY))

(5200, 8) (5200,) (1750, 8) (1750,) (1768, 8) (1768,)


now we have the data which is 100% ready to be fed into model.  
so now let's make a function that calculate cost function

In [108]:
def calculateCost(x, y, w, b):
    cost = 0
    for i in range(len(y)):
        cost += (1/(2*len(y))*(((np.dot(w, x[i]) + b) - y[i])**2))
    return cost
print(calculateCost(X, Y, W, B))

0.5388097996627936


let's also make a function to calculate MOE (mean obsolute error)

In [109]:
def calculateMOE(x, y, w, b):
    moe = 0
    for i in range(len(y)):
        moe += abs(np.dot(w, x[i]) + b - y[i])
    return (1/(2*len(y)))*moe
print(calculateMOE(X, Y, W, B))

0.29882082193745696


Now we will be making a function to calculate gradient for our gradient discent algo

In [110]:
def getGradient(x, y, w, b):
    grw = 0
    grb = 0
    for i in range(len(y)):
        grw += ((np.dot(w, x[i]) + b) - y[i])*x[i]
        grb += ((np.dot(w, x[i]) + b) - y[i])
    return (1/(2*len(y)))*grw, (1/(2*len(y)))*grb

now perhaps we are ready to iterate and get the real values for w and b

In [111]:
def getWB(x, y, w, b):
    global W, B, tX, tY
    for i in range(ITERATIONS):
        grw, grb = getGradient(x, y, w, b)
        tw = W - ALPHA*grw
        tb = B - ALPHA*grb
        W, B = tw, tb
        if i%100 == 0:
            print("At iteration:", i, "Cost:", calculateCost(X, Y, W, B), "MOE:", calculateMOE(X, Y, W, B), "|| Unseen MOE:", calculateMOE(tX, tY, W, B))
getWB(X, Y, W, B)
print(W, B)

At iteration: 0 Cost: 0.5388037978563048 MOE: 0.29881804373085247 || Unseen MOE: 0.28038097494259245
At iteration: 100 Cost: 0.5382039846148009 MOE: 0.29854022307011446 || Unseen MOE: 0.2801057518348282
At iteration: 200 Cost: 0.5376048989120964 MOE: 0.29826240240937774 || Unseen MOE: 0.2798305287270643
At iteration: 300 Cost: 0.5370065407481904 MOE: 0.2979845817486408 || Unseen MOE: 0.2795553056193002
At iteration: 400 Cost: 0.536408910123083 MOE: 0.2977067610879038 || Unseen MOE: 0.2792800825115368
At iteration: 500 Cost: 0.5358120070367739 MOE: 0.2974289404271666 || Unseen MOE: 0.2790048594037725
At iteration: 600 Cost: 0.5352158314892616 MOE: 0.2971511197664298 || Unseen MOE: 0.27872963629600833
At iteration: 700 Cost: 0.5346203834805497 MOE: 0.29687329910569327 || Unseen MOE: 0.27845441318824515
At iteration: 800 Cost: 0.5340256630106393 MOE: 0.29659547844495515 || Unseen MOE: 0.27817919008048136
At iteration: 900 Cost: 0.533431670079524 MOE: 0.2963176577842178 || Unseen MOE: 0.27

In [112]:
prices = np.dot(W, dX[0]) + B
print(prices*stdY + muY, dY[0]*stdY + muY)

1939603.7216208973 1098000.0
