In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

from itertools import count
import random
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
epochs = 50

class Net(nn.Module):
  def __init__(self, state_dim, output_dim):
    super(Net, self).__init__()

    hidden_dim = 256
    self.linear1 = nn.Linear(state_dim, hidden_dim)
    self.linear2 = nn.Linear(hidden_dim, hidden_dim)
    self.linear3 = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    x = F.relu(self.linear1(x))
    x = F.relu(self.linear2(x))
    x = torch.tanh(self.linear3(x))

    return x

  def train(self, X, y):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(self.parameters(), lr=0.001)
    cnt = 0

    for epoch in range(int(epochs)):
      outputs = self.forward(X.float())
      labels = torch.max(y, 1)[1]
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      cnt+=1
      if cnt%10==0:
        correct = 0
        total = y.shape[0]
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum()
        accuracy = 100 * correct/total
        print("Iteration: {}. Loss: {}. Correct: {}. Accuracy: {}.".format(cnt, loss.item(), correct, accuracy))

In [None]:
df = pd.read_csv('../data/denver_mean_dom_discretize.csv')

df["list_date"] = pd.to_datetime(df["list_date"])
df["sale_date"] = pd.to_datetime(df["sale_date"])

FileNotFoundError: [Errno 2] No such file or directory: '../data/denver_mean_dom_discretize.csv'

In [None]:
mapping = {k: v for v, k in enumerate(df.property_type.unique())}
mapping

{'SINGLE': 0,
 'CONDO': 1,
 'MULTI_FAMILY': 2,
 'LAND': 3,
 'OTHER': 4,
 'TOWNHOUSE': 5}

In [None]:
df['property_type_code'] = df['property_type'].map(mapping)

In [None]:
scaler = StandardScaler()

In [None]:
y2 = df['dom_bucket'].copy()

X2 = df.drop(columns=['property_id', 'list_date', 'sale_date', 'dom', 
                     'farm_score', 'fips', 
                     'first_time_homebuyer_score', 'dom_bucket', 'property_type',
                     'has_jacuzzi', 'has_pool', 'has_solar', 'has_garage'
                     ])

columns_to_scale = ['zipcode', 'sqft', 'sale_price_per_sqft', 'median_rental_price', 
                    'pop_density', 'retirement_friendly_score',
                    'college_student_friendly_score', 'violent_crime_rate', 'young_single_professional_score',
                    'property_type_code'
                   ]
X2[columns_to_scale] =  scaler.fit_transform(X2[columns_to_scale])

X2['has_central_air'] = X2['has_central_air'].astype(int)

X2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 232498 entries, 0 to 232497
Data columns (total 15 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   zipcode                          232498 non-null  float64
 1   sqft                             232498 non-null  float64
 2   has_central_air                  232498 non-null  int64  
 3   sale_price_per_sqft              232498 non-null  float64
 4   median_rental_price              232498 non-null  float64
 5   pop_density                      232498 non-null  float64
 6   retirement_friendly_score        232498 non-null  float64
 7   young_single_professional_score  232498 non-null  float64
 8   college_student_friendly_score   232498 non-null  float64
 9   violent_crime_rate               232498 non-null  float64
 10  walk                             232498 non-null  float64
 11  public_transportation            232498 non-null  float64
 12  ca

In [None]:
y_tensor = torch.from_numpy(y2.values)
y_one_hot = torch.nn.functional.one_hot(y_tensor)

In [None]:
X_tensor = torch.from_numpy(X2.values)

In [None]:
def print_results(model):
  print('Train set accuracy: {}'.format(model.score(X_train, y_train)))
  print('Test set accuracy: {}'.format(model.score(X_test, y_test)))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_one_hot, test_size=0.33, random_state =297)

In [None]:
net = Net(X_train.shape[1], y_train.shape[1])

In [None]:
net.train(X_train, y_train)

Iteration: 10. Loss: 1.0451730489730835. Correct: 95839. Accuracy: 61.524784088134766.
Iteration: 20. Loss: 1.0425257682800293. Correct: 95839. Accuracy: 61.524784088134766.
Iteration: 30. Loss: 1.0430985689163208. Correct: 95839. Accuracy: 61.524784088134766.
Iteration: 40. Loss: 1.0397984981536865. Correct: 95839. Accuracy: 61.524784088134766.
Iteration: 50. Loss: 1.0537997484207153. Correct: 95839. Accuracy: 61.524784088134766.


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=1c850c61-d934-4c85-b16d-3cb283df0c84' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>