In [3]:
import pandas as pd

data= pd.read_csv("kc_house_data.csv")
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [4]:
data = data[['price', 'sqft_living']]
data

Unnamed: 0,price,sqft_living
0,221900.0,1180
1,538000.0,2570
2,180000.0,770
3,604000.0,1960
4,510000.0,1680
...,...,...
21608,360000.0,1530
21609,400000.0,2310
21610,402101.0,1020
21611,400000.0,1600


In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
data = scaler.fit_transform(data)

In [6]:
data= pd.DataFrame(data=data, columns=["price","sqft_living"])
data

Unnamed: 0,price,sqft_living
0,-0.866681,-0.979835
1,-0.005708,0.533634
2,-0.980806,-1.426254
3,0.174059,-0.130550
4,-0.081973,-0.435422
...,...,...
21608,-0.490533,-0.598746
21609,-0.381584,0.250539
21610,-0.375861,-1.154047
21611,-0.381584,-0.522528


In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, 1], data.iloc[:, 0], test_size = 0.10, random_state = 42)

In [8]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(19451,) (19451,) (2162,) (2162,)


In [9]:
batch_size = 32
n_minibatches = x_train.shape[0] // batch_size

def create_mini_batches(x_tr, y_tr, batch_size):
    mini_batches = [] 
    global n_minibatches
    for i in range(n_minibatches + 1): 
        X_mini = x_tr.iloc[i * batch_size : (i + 1) * batch_size]
        Y_mini = y_tr.iloc[i * batch_size : (i + 1) * batch_size]
        mini_batches.append((X_mini, Y_mini)) 
        if x_tr.shape[0] % batch_size != 0:
            X_mini = x_tr.iloc[i * batch_size: x_tr.shape[0]]
            Y_mini  = y_tr.iloc[i * batch_size: y_tr.shape[0]]
        mini_batches.append((X_mini,Y_mini)) 
    return mini_batches 

mini_batches_all = create_mini_batches(x_train, y_train, batch_size)

In [10]:
m = 1 #Initial value of slope
c = -1 #Initial value of intercept
lr = 0.1 #Learning Rate
delta_m = 1 #Initialising Δm
delta_c = 1 #Initialising Δc
max_iters = 10 #Maximum number of iterations  
iters_count = 0 #Counting Iterations


def deriv(m_f, c_f, datax, datay):
    m_deriv = 0
    c_deriv = 0
    for i in range(datax.shape[0]):
        x, y = datax.iloc[i], datay.iloc[i]
        m_deriv += (y-m_f*x-c_f)*x
        c_deriv += (y-m_f*x-c_f)
        m_deriv = -m_deriv/len(datax)
        c_deriv = -c_deriv/len(datay)
    return m_deriv, c_deriv  


while iters_count < max_iters:
    for i in range(n_minibatches):
        x_b, y_b = mini_batches_all[i]
        delta_m, delta_c = deriv(m, c, x_b, y_b)
        delta_m = -lr * delta_m
        delta_c = -lr * delta_c
        m += delta_m
        c += delta_c
    iters_count += 1
    print(f"Iteration: {iters_count}\tValue of m: {m}, \tValue of c: {c}")

print(f"\nThe local minima occurs at: {m}, {c}")

Iteration: 1	Value of m: 0.7855881817612184, 	Value of c: -0.42621975053772676
Iteration: 2	Value of m: 0.6993881463525335, 	Value of c: -0.1997910290102009
Iteration: 3	Value of m: 0.6647175647317495, 	Value of c: -0.11044252405839473
Iteration: 4	Value of m: 0.6507663708217768, 	Value of c: -0.07518827330361025
Iteration: 5	Value of m: 0.6451499501673154, 	Value of c: -0.06127905872636808
Iteration: 6	Value of m: 0.6428878743633883, 	Value of c: -0.05579175421936221
Iteration: 7	Value of m: 0.6419763777739348, 	Value of c: -0.05362714657452013
Iteration: 8	Value of m: 0.6416089227224974, 	Value of c: -0.05277333657384936
Iteration: 9	Value of m: 0.6414607202644524, 	Value of c: -0.05243658985847545
Iteration: 10	Value of m: 0.6414009191680222, 	Value of c: -0.05230378815595178

The local minima occurs at: 0.6414009191680222, -0.05230378815595178


In [11]:
import numpy as np

x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [12]:

y_pred_train = []
for i in x_train:
  y_p_tr = (m * i) - c
  y_pred_train.append(y_p_tr)
y_pred_train = np.array(y_pred_train)

In [13]:
y_pred_test = []
for i in x_test:
  y_p_te = (m * i) - c
  y_pred_test.append(y_p_te)
y_pred_test = np.array(y_pred_test)


In [14]:
import math
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error


#Training Accuracies
mse = math.sqrt(mean_squared_error(y_train, y_pred_train)) 
print('Root mean square error', mse) 
mse = (mean_squared_error(y_train, y_pred_train)) 
print('Mean square error', mse) 
mae=mean_absolute_error(y_train, y_pred_train)
print('Mean absolute error', mae)

Root mean square error 0.711374810430384
Mean square error 0.5060541209148648
Mean absolute error 0.47168187994957317


In [15]:
#Testing Accuracies
mse = math.sqrt(mean_squared_error(y_test, y_pred_test)) 
print('Root mean square error', mse) 
mse = (mean_squared_error(y_test, y_pred_test)) 
print('Mean square error', mse) 
mae=mean_absolute_error(y_test, y_pred_test)
print('Mean absolute error', mae)

Root mean square error 0.7621200740860146
Mean square error 0.5808270073248724
Mean absolute error 0.4919983891378863
