In [12]:
import numpy as np
import random
eta = 0.5    # learning rate
epoch = 1000 # iteration
eta *= 0.95

### Neural Network Model for Linear Regression
+ Single layer neural network with linear activation function.
+ In forward processing, it uses MSE (mean square error) loss function.
+ In backward processing, delta = output - target. 
+ Backward processing is called "backprop".

In [13]:
# Linear Regression Model
class LinearRegression:
    def __init__(self, x, w, y):
        self.inputs  = x
        self.weights = w               
        self.target  = y
        self.output  = np.zeros(self.target.shape)
    def forward_proc(self):
        # forward processing of inputs and weights
        self.output = np.dot(self.weights, self.inputs.T)
    def backprop(self):
        # backward processing of appling the chain rule to find derivative of the mean square error function with respect to weights
        dw = (self.output - self.target) * self.inputs
        # update the weights with the derivative of the loss function
        self.weights -= eta * dw
    def predict(self, x):
        # predict the output for a given input x
        return (np.dot(self.weights, x.T))
    def calculate_error(self):
        # calculate error
        error = self.target - self.output
        return abs(error)

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
# load dataset
df = pd.read_csv("kc_house_data.csv")
df.dropna(inplace=True)
# use 300 samples for fast experimentation, for final run, you may
# run the entire dataset
#df = df.iloc[:300, :]
#df = df.iloc[:, :]

In [30]:
df

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,0.0,0.0,...,7,2170,400.0,1951,1991.0,98125,47.7210,-122.319,1690,7639
3,2487200875,12/9/2014,604000.0,4,3.00,1960,5000,1.0,0.0,0.0,...,7,1050,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000
4,1954400510,2/18/2015,510000.0,3,2.00,1680,8080,1.0,0.0,0.0,...,8,1680,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503
5,7237550310,5/12/2014,1230000.0,4,4.50,5420,101930,1.0,0.0,0.0,...,11,3890,1530.0,2001,0.0,98053,47.6561,-122.005,4760,101930
6,1321400060,6/27/2014,257500.0,3,2.25,1715,6819,2.0,0.0,0.0,...,7,1715,?,1995,0.0,98003,47.3097,-122.327,2238,6819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21591,2997800021,2/19/2015,475000.0,3,2.50,1310,1294,2.0,0.0,0.0,...,8,1180,130.0,2008,0.0,98116,47.5773,-122.409,1330,1265
21592,263000018,5/21/2014,360000.0,3,2.50,1530,1131,3.0,0.0,0.0,...,8,1530,0.0,2009,0.0,98103,47.6993,-122.346,1530,1509
21593,6600060120,2/23/2015,400000.0,4,2.50,2310,5813,2.0,0.0,0.0,...,8,2310,0.0,2014,0.0,98146,47.5107,-122.362,1830,7200
21594,1523300141,6/23/2014,402101.0,2,0.75,1020,1350,2.0,0.0,0.0,...,7,1020,0.0,2009,0.0,98144,47.5944,-122.299,1020,2007


In [38]:
df[df['yr_built']==1980]

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
98,722079104,7/11/2014,314000.0,3,1.75,1810,41800,1.0,0.0,0.0,...,7,1210,600.0,1980,0.0,98038,47.4109,-121.958,1650,135036
112,2525310310,9/16/2014,272500.0,3,1.75,1540,12600,1.0,0.0,0.0,...,7,1160,?,1980,0.0,98038,47.3624,-122.031,1540,11656
174,1687900520,9/29/2014,673000.0,4,2.25,2590,8190,2.0,0.0,0.0,...,8,2590,0.0,1980,0.0,98006,47.5619,-122.125,2260,8335
443,822079033,4/22/2015,350000.0,3,1.50,1250,219978,1.0,0.0,0.0,...,6,1250,0.0,1980,0.0,98038,47.4056,-121.955,1930,210394
457,2122039094,11/26/2014,705000.0,3,3.00,1970,20978,2.0,1.0,3.0,...,9,1770,200.0,1980,0.0,98070,47.3844,-122.438,2280,75396
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19357,8802400906,8/29/2014,244000.0,3,1.75,1540,8885,1.0,0.0,0.0,...,7,1440,100.0,1980,0.0,98031,47.4031,-122.201,1540,12734
19400,6804600720,8/1/2014,495000.0,4,2.25,2350,10072,2.0,0.0,0.0,...,8,2350,0.0,1980,0.0,98011,47.7628,-122.168,2210,9687
19408,5089700750,5/9/2014,320000.0,4,2.25,2310,7490,2.0,0.0,0.0,...,8,2310,0.0,1980,0.0,98055,47.4379,-122.192,2310,8480
19470,2722049077,8/28/2014,299500.0,3,1.75,1810,34500,1.0,0.0,0.0,...,8,1230,580.0,1980,0.0,98032,47.3707,-122.275,2090,9735


In [24]:
if __name__ == "__main__":

    # data normalization on number of rooms and age of the house
    # select features and labels for prediction
    features = df[["bedrooms", "yr_built", 'grade']].values
    labels = df[["price"]].values
    # normalize data
    scaler = MinMaxScaler()
    features = scaler.fit_transform(features)
    labels = scaler.fit_transform(labels) 
    #weights = np.random.rand(1, 3)
    weights = np.array([ 0.70579101, 0.62761976, 0.17335884 ])
    weights = weights.reshape(1,3)
    
    
    print("Initial Weights:", weights)

    # SGD Optimization
    for i in range(epoch):
   
        if i == 0: w=weights
        else: w=model.weights

        concat_data = np.concatenate((features, labels), axis = 1)
        np.random.shuffle(concat_data) # shuffle the dataset

        X = concat_data[:, 0:3]
        y = concat_data[:, 3:4]
        
        for j in range(len(features)):
         
            model = LinearRegression(X[j], w, y[j])
            model.forward_proc()   # forward processing
            model.backprop()       # backward processing
            w = model.weights 

        if (i % 50) == 0:
             print("Loss: ", model.calculate_error())
        
    print("Output:", model.output)
    print("Adjusted Weights:", model.weights)



Initial Weights: [[0.70579101 0.62761976 0.17335884]]
Loss:  [0.02474668]
Loss:  [0.03841002]
Loss:  [0.02826331]
Loss:  [0.00663606]
Loss:  [0.01768105]
Loss:  [0.04784565]
Loss:  [0.00889533]
Loss:  [0.00706409]
Loss:  [0.02359895]
Loss:  [0.01198917]
Loss:  [0.04125633]
Loss:  [0.05412131]
Loss:  [0.00804049]
Loss:  [0.01382233]
Loss:  [0.03826315]
Loss:  [0.00339904]
Loss:  [0.08100091]
Loss:  [0.00689213]
Loss:  [0.02788278]
Loss:  [0.03186913]
Output: [0.04020242]
Adjusted Weights: [[ 0.01529305 -0.05301453  0.18393075]]


In [27]:
new_features = np.array([[3, 1980, 8]])
new_features = scaler.transform(new_features)
predicted_value = model.predict(new_features)
print("Predicted price for 3 beds, 1980, grade 8 is:", int(scaler.inverse_transform(predicted_value)))  

Predicted price for 3 beds, 1980, grade 8 is: 349536
