In [68]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import random
import tensorflow as tf
import csv

import tensorflow as tf
from torch.utils.data import TensorDataset, DataLoader
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.applications import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.initializers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split


## Preprocessing

In [69]:
# Import Housing Data
new_housing_csv = pd.read_csv('../data/new_housing.csv')

# Select houses with a Sale Price over $100,000
mask = (new_housing_csv["SALE PRICE"]) > 100000.0
new_housing_csv = new_housing_csv[mask]

# Only select homes sold between 2011 and 2015
new_housing_csv['SALE DATE'] = pd.to_datetime(new_housing_csv['SALE DATE'])
mask = (new_housing_csv["SALE DATE"]).dt.year > 2011 
new_housing_csv = new_housing_csv[mask]
mask = (new_housing_csv["SALE DATE"]).dt.year < 2015
new_housing_csv = new_housing_csv[mask]


# Only select important rows
new_housing_csv = new_housing_csv[["LAND VAL", "PARCEL VAL", "IMPROVE VAL", "SALE PRICE", "SQFT", "ROOMS", "BEDROOM", "BATH", "LIVING AREA", "GROSS AREA", "YEAR", "PERCENT GOOD", "STORIES"]]

#Remove Nulls and 0s in Data
mask = new_housing_csv.notnull().all(axis=1)
new_housing_csv = new_housing_csv.loc[mask, :]
mask = (new_housing_csv != 0).all(axis=1)
new_housing_csv = new_housing_csv.loc[mask, :]

# Seperate into Data and Labels
housing_data = new_housing_csv[["LAND VAL", "PARCEL VAL", "SQFT", "ROOMS", "BEDROOM", "BATH", "LIVING AREA", "GROSS AREA", "YEAR","PERCENT GOOD", "STORIES"]]
housing_label = new_housing_csv[["SALE PRICE"]]

  new_housing_csv = pd.read_csv('../data/new_housing.csv')


In [70]:
X = housing_data
Y = housing_label

def normalize(X):
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    X = (X - mu) / sigma
    return X

# Normalize Data
X = normalize(X)
Y = normalize(Y)

# Convert to Numpy Array
X = pd.DataFrame.to_numpy(X)
Y = pd.DataFrame.to_numpy(Y)

## Model

In [71]:
class LinearRegression(tf.keras.Model):
    def __init__(self):
        super(LinearRegression, self).__init__()
        self.dense1 = tf.keras.layers.Dense(units=128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(units=64, activation='relu')
        self.dense3 = tf.keras.layers.Dense(units=32, activation='relu')
        self.dense4 = tf.keras.layers.Dense(units=1, kernel_regularizer=tf.keras.regularizers.l2(0.01))
        
    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)

        return x

In [72]:
model = LinearRegression()

# Loss function and Optimizer
loss_fn = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.015)

In [74]:
# Convert data to tensors
X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(Y, dtype=tf.float32)

indices = np.arange(len(X))
indices_list = indices.tolist()

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)


# x_train = x[:(data_split * len(X))]
# y_train = y[:(data_split * len(X))]
# x_test = x[(data_split * len(X) + 1):(len(X))]
# y_test = y[(data_split * len(X) + 1):(len(X))]

# Remove all instances where data is empty

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([2157,  876, 1439, ..., 3547, 1062, 1021])

In [61]:
num_epochs = 10000
for epoch in range(num_epochs):
    # Forward pass
    with tf.GradientTape() as tape:
        y_pred = model(x_train)
        loss = loss_fn(y_train, y_pred)
    
    # Backward and optimize
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    
    # Print progress
    if (epoch+1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.numpy():.4f}")

Epoch [100/10000], Loss: 0.4116
Epoch [200/10000], Loss: 0.3776
Epoch [300/10000], Loss: 0.3539
Epoch [400/10000], Loss: 0.3332
Epoch [500/10000], Loss: 0.3141
Epoch [600/10000], Loss: 0.2974
Epoch [700/10000], Loss: 0.2824
Epoch [800/10000], Loss: 0.2688
Epoch [900/10000], Loss: 0.2564
Epoch [1000/10000], Loss: 0.2446
Epoch [1100/10000], Loss: 0.2336
Epoch [1200/10000], Loss: 0.2233
Epoch [1300/10000], Loss: 0.2135
Epoch [1400/10000], Loss: 0.2045
Epoch [1500/10000], Loss: 0.1961
Epoch [1600/10000], Loss: 0.1878
Epoch [1700/10000], Loss: 0.1801
Epoch [1800/10000], Loss: 0.1726
Epoch [1900/10000], Loss: 0.1656
Epoch [2000/10000], Loss: 0.1590
Epoch [2100/10000], Loss: 0.1530
Epoch [2200/10000], Loss: 0.1495
Epoch [2300/10000], Loss: 0.1471
Epoch [2400/10000], Loss: 0.1442
Epoch [2500/10000], Loss: 0.1407
Epoch [2600/10000], Loss: 0.1365
Epoch [2700/10000], Loss: 0.1293
Epoch [2800/10000], Loss: 0.1294
Epoch [2900/10000], Loss: 0.1255
Epoch [3000/10000], Loss: 0.1201
Epoch [3100/10000],

In [50]:
print(model.layers[0].weights)

[<tf.Variable 'linear_regression_4/dense_16/kernel:0' shape=(11, 128) dtype=float32, numpy=
array([[-0.12692076,  0.10856866,  0.04461654, ..., -0.07514493,
        -0.0744973 ,  0.01260479],
       [-0.14683446, -0.14953981,  0.18442567, ..., -0.20339386,
         0.103581  , -0.09291214],
       [ 0.1254563 , -0.11002223, -0.05349731, ..., -0.11784876,
        -0.03736782, -0.16272876],
       ...,
       [-0.25042447,  0.25035843,  0.09431099, ...,  0.13657267,
        -0.22676115,  0.01360488],
       [-0.03390095,  0.18360633,  0.18887787, ...,  0.04897064,
         0.2254355 ,  0.05400588],
       [-0.15032893, -0.04964776, -0.18149091, ..., -0.06873605,
        -0.10752581,  0.31416753]], dtype=float32)>, <tf.Variable 'linear_regression_4/dense_16/bias:0' shape=(128,) dtype=float32, numpy=
array([-3.48075889e-02,  5.71436621e-02, -3.95265818e-02, -1.96170285e-02,
       -5.11795841e-02, -3.94228846e-02, -3.07753831e-02,  4.84897792e-02,
        5.86357899e-03,  3.40636000e-02,  

In [51]:
def accuracy(y_pred, y_true, sigma, mu, threshold=0.2):
    """
    Computes the accuracy of the predictions given the true labels, based on a threshold value.
    
    Parameters:
    y_pred (np.ndarray): An array of predicted labels.
    y_true (np.ndarray): An array of true labels.
    threshold (float): The threshold value to use for measuring accuracy.
    
    Returns:
    float: The accuracy of the predictions as a percentage.
    """
    # Compute the absolute difference between predicted and true values
    
    
    y_pred = (y_pred * sigma) + mu
    y_true = (y_true * sigma) + mu

    diff = np.abs(y_pred - y_true)
    print(y_pred)
    print(y_true)
    
    # Compute accuracy
    acc = np.mean(diff <= threshold * y_pred)
    
    # Convert to percentage
    acc_pct = acc * 100
    
    return acc_pct

In [52]:
pred = model(x_train)
loss = loss_fn(y_train, pred)
print("Training Accuracy:", accuracy(pred, y_train, sigma, mu))

pred = model(x_test)
loss = loss_fn(y_test, pred)
print("Testing Accuracy:", accuracy(pred, y_test, sigma, mu))


tf.Tensor(
[[329669.03]
 [303795.1 ]
 [285692.34]
 ...
 [511830.06]
 [555258.8 ]
 [529761.1 ]], shape=(3000, 1), dtype=float32)
tf.Tensor(
[[309000.  ]
 [299999.97]
 [264999.97]
 ...
 [460000.  ]
 [480000.  ]
 [425000.  ]], shape=(3000, 1), dtype=float32)
Training Accuracy: 90.3
tf.Tensor(
[[ 454911.3 ]
 [ 731288.94]
 [ 664541.9 ]
 [ 464073.12]
 [1351638.4 ]
 [ 518510.5 ]
 [ 509089.  ]
 [ 636483.8 ]
 [ 984983.6 ]
 [ 599913.6 ]
 [1583548.5 ]
 [ 644167.6 ]
 [ 738521.56]
 [ 550295.9 ]
 [ 819021.  ]
 [ 568807.94]
 [ 843509.6 ]
 [ 814123.  ]
 [ 475993.72]
 [ 477301.2 ]
 [ 490294.75]
 [1254651.2 ]
 [1288440.2 ]
 [1337536.8 ]
 [ 508344.1 ]
 [ 376631.22]
 [ 484515.3 ]
 [ 607957.6 ]
 [ 723235.1 ]
 [ 747788.5 ]
 [ 691557.9 ]
 [ 632108.8 ]
 [ 478103.3 ]
 [ 744153.  ]
 [ 656288.1 ]
 [ 897131.4 ]
 [ 684974.25]
 [ 414631.66]
 [ 719559.  ]
 [ 914125.25]
 [ 906084.9 ]
 [ 654080.6 ]
 [ 737468.8 ]
 [ 662071.  ]
 [ 619238.7 ]
 [ 822821.4 ]
 [ 419660.66]
 [ 502268.62]
 [1145174.2 ]
 [ 679544.94]
 [ 465323

In [53]:
weights, bias = model.layers[0].get_weights()
print(weights)


[[-0.12692076  0.10856866  0.04461654 ... -0.07514493 -0.0744973
   0.01260479]
 [-0.14683446 -0.14953981  0.18442567 ... -0.20339386  0.103581
  -0.09291214]
 [ 0.1254563  -0.11002223 -0.05349731 ... -0.11784876 -0.03736782
  -0.16272876]
 ...
 [-0.25042447  0.25035843  0.09431099 ...  0.13657267 -0.22676115
   0.01360488]
 [-0.03390095  0.18360633  0.18887787 ...  0.04897064  0.2254355
   0.05400588]
 [-0.15032893 -0.04964776 -0.18149091 ... -0.06873605 -0.10752581
   0.31416753]]
