In [1]:
!pip install torch==1.10.0+cu113 torchvision==0.11.1+cu113 torchaudio===0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html


In [2]:
import numpy as np
import time
import torch as T
import pandas as pd
import seaborn as sns
device = T.device("cuda")

In [3]:
class HouseDataset(T.utils.data.Dataset):
  # AC  sq ft   style  price   school
  # -1  0.2500  0 1 0  0.5650  0 1 0
  #  1  0.1275  1 0 0  0.3710  0 0 1
  # air condition: -1 = no, +1 = yes
  # style: art_deco, bungalow, colonial
  # school: johnson, kennedy, lincoln

    def __init__(self, src_file, m_rows=None):
        all_xy = np.loadtxt(src_file, max_rows=m_rows,
          usecols=[0,1,2,3,4,5,6,7,8], delimiter="\t",
          # usecols=range(0,9), delimiter="\t",
          comments="#", skiprows=0, dtype=np.float32)

        tmp_x = all_xy[:,[0,1,2,3,4,6,7,8]]
        
        self.df = pd.DataFrame(all_xy).rename(columns={5: 'result', 0: 'AC', 1: 'sq ft', 2: 'style art_deco',3: 'style bungalow', 4: 'style colonial', 6: 'school johnson', 7: 'school kennedy', 8: 'school lincoln'})
        tmp_y = all_xy[:,5].reshape(-1,1)    # 2-D required

        self.x_data = T.tensor(tmp_x, \
          dtype=T.float32).to(device)
        self.y_data = T.tensor(tmp_y, \
          dtype=T.float32).to(device)

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        preds = self.x_data[idx,:]  # or just [idx]
        price = self.y_data[idx,:] 
        return (preds, price)       # tuple of two matrices 
    


### 0. get started

In [4]:
print("\nBegin predict House price \n")
T.manual_seed(4)  # representative results 
np.random.seed(4)


Begin predict House price 



### 1. create DataLoader objects

In [5]:

print("Creating Houses Dataset objects ")
train_file = ".\\Data\\houses_train.txt"
train_ds = HouseDataset(train_file)  # all 200 rows

test_file = ".\\Data\\houses_test.txt"
test_ds = HouseDataset(test_file)  # all 40 rows

bat_size = 10
train_ldr = T.utils.data.DataLoader(train_ds,
batch_size=bat_size, shuffle=True)

Creating Houses Dataset objects 


In [6]:
train_ds.df.head()

Unnamed: 0,AC,sq ft,style art_deco,style bungalow,style colonial,result,school johnson,school kennedy,school lincoln
0,-1.0,0.1275,0.0,1.0,0.0,0.3,0.0,0.0,1.0
1,1.0,0.11,1.0,0.0,0.0,0.335,1.0,0.0,0.0
2,-1.0,0.1375,0.0,0.0,1.0,0.286,0.0,1.0,0.0
3,1.0,0.1975,0.0,1.0,0.0,0.512,0.0,0.0,1.0
4,-1.0,0.12,0.0,0.0,1.0,0.295,1.0,0.0,0.0


In [7]:
#sns.pairplot(train_ds.df)

### 2. create network

In [8]:
class Net(T.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hid1 = T.nn.Linear(8, 10)  # 8-(10-10)-1
        self.hid2 = T.nn.Linear(10, 10)
        self.oupt = T.nn.Linear(10, 1)

        T.nn.init.xavier_uniform_(self.hid1.weight)
        T.nn.init.zeros_(self.hid1.bias)
        T.nn.init.xavier_uniform_(self.hid2.weight)
        T.nn.init.zeros_(self.hid2.bias)
        T.nn.init.xavier_uniform_(self.oupt.weight)
        T.nn.init.zeros_(self.oupt.bias)

    def forward(self, x):
        z = T.relu(self.hid1(x))
        z = T.relu(self.hid2(z))
        z = self.oupt(z)  # no activation
        return z

In [9]:

net = Net().to(device)

### 3. train model

In [10]:

max_epochs = 500
ep_log_interval = 50
lrn_rate = 0.005

loss_func = T.nn.MSELoss()
# optimizer = T.optim.SGD(net.parameters(), lr=lrn_rate)
optimizer = T.optim.Adam(net.parameters(), lr=lrn_rate)

print("\nbat_size = %3d " % bat_size)
print("loss = " + str(loss_func))
print("optimizer = Adam")
print("max_epochs = %3d " % max_epochs)
print("lrn_rate = %0.3f " % lrn_rate)

print("\nStarting training with saved checkpoints")
net.train()  # set mode
for epoch in range(0, max_epochs):
    T.manual_seed(1+epoch)  # recovery reproducibility
    epoch_loss = 0  # for one full epoch

    for (batch_idx, batch) in enumerate(train_ldr):
        (X, Y) = batch                 # (predictors, targets)
        optimizer.zero_grad()          # prepare gradients
        oupt = net(X)                  # predicted prices
        loss_val = loss_func(oupt, Y)  # avg per item in batch
        epoch_loss += loss_val.item()  # accumulate avgs
        loss_val.backward()            # compute gradients
        optimizer.step()               # update wts

    if epoch % ep_log_interval == 0:
        print("epoch = %4d   loss = %0.4f" % (epoch, epoch_loss))

      # save checkpoint
        dt = time.strftime("%Y_%m_%d-%H_%M_%S")
        fn = ".\\Log\\" + str(dt) + str("-") + str(epoch) + "_checkpoint.pt"

        info_dict = {
            'epoch' : epoch,
            'net_state' : net.state_dict(),
            'optimizer_state' : optimizer.state_dict()
        }
        T.save(info_dict, fn)

print("Done ")


bat_size =  10 
loss = MSELoss()
optimizer = Adam
max_epochs = 500 
lrn_rate = 0.005 

Starting training with saved checkpoints
epoch =    0   loss = 5.4416
epoch =   50   loss = 0.0227
epoch =  100   loss = 0.0179
epoch =  150   loss = 0.0190
epoch =  200   loss = 0.0145
epoch =  250   loss = 0.0163
epoch =  300   loss = 0.0185
epoch =  350   loss = 0.0142
epoch =  400   loss = 0.0135
epoch =  450   loss = 0.0155
Done 


### 4. evaluate model accuracy

In [11]:
def accuracy(model, ds, pct):
    # assumes model.eval()
    # percent correct within pct of true house price
    n_correct = 0; n_wrong = 0

    for i in range(len(ds)):
        (X, Y) = ds[i]            # (predictors, target)
        with T.no_grad():
            oupt = model(X)         # computed price

        abs_delta = np.abs(oupt.item() - Y.item())
        max_allow = np.abs(pct * Y.item())
        if abs_delta < max_allow:
            n_correct +=1
        else:
            n_wrong += 1

    acc = (n_correct * 1.0) / (n_correct + n_wrong)
    return acc

In [12]:
def accuracy_quick(model, dataset, pct):
    # assumes model.eval()
    n = len(dataset)
    X = dataset[0:n][0]  # all predictor values
    Y = dataset[0:n][1]  # all target prices
    with T.no_grad():
        oupt = model(X)      # all computed prices

    max_deltas = T.abs(pct * Y)    # max allowable deltas
    abs_deltas = T.abs(oupt - Y)   # actual differences
    
    results = abs_deltas < max_deltas  # [[True, False, . .]]
    acc = T.sum(results, dim=0).item() / n  # dim not needed
    return acc

In [13]:
def baseline_acc(ds, pct):
    # linear regression model accuracy using just sq. feet
    # y = 1.9559x + 0.0987 (from separate program)
    n_correct = 0; n_wrong = 0
    for i in range(len(ds)):
        (X, Y) = ds[i]           # (predictors, target)
        x = X[1].item()          # sq feet predictor
        y = 1.9559 * x + 0.0987  # computed
                
        abs_delta = np.abs(y - Y.item())
        max_allow = np.abs(pct * Y.item())
        if abs_delta < max_allow:
            n_correct +=1
        else:
            n_wrong += 1

    acc = (n_correct * 1.0) / (n_correct + n_wrong)
    return acc  

#### 4.1. model accuracy

In [14]:
print("\nComputing model accuracy")
net.eval()
acc_train = accuracy(net, train_ds, 0.10) 
print("Accuracy (within 0.10) on train data = %0.4f" % \
acc_train)

acc_test = accuracy(net, test_ds, 0.10) 
print("Accuracy (within 0.10) on test data  = %0.4f" % \
acc_test)


Computing model accuracy
Accuracy (within 0.10) on train data = 0.9450
Accuracy (within 0.10) on test data  = 0.8750


#### 4.2. baseline accuracy

In [15]:
base_acc_train = baseline_acc(train_ds, 0.10) 
print("%0.4f" % base_acc_train)  # 0.7000
base_acc_test = baseline_acc(test_ds, 0.10)    
print("%0.4f" % base_acc_test)   # 0.7000

0.7000
0.7000


### 5. make a prediction

In [16]:

print("\nPredicting price for AC=no, sqft=2300, ")
print(" style=colonial, school=kennedy: ")
unk = np.array([[-1, 0.2300,  0,0,1,  0,1,0]],
dtype=np.float32)
unk = T.tensor(unk, dtype=T.float32).to(device) 

with T.no_grad():
    pred_price = net(unk)
pred_price = pred_price.item()  # scalar
str_price = \
"${:,.2f}".format(pred_price * 1000000)
print(str_price)


Predicting price for AC=no, sqft=2300, 
 style=colonial, school=kennedy: 
$498,734.65


### 6. save final model (state_dict approach)

In [17]:

print("\nSaving trained model state")
fn = ".\\Models\\houses_model.pth"
T.save(net.state_dict(), fn)

# saved_model = Net()
# saved_model.load_state_dict(T.load(fn))
# use saved_model to make prediction(s)

print("\nEnd House price demo")


Saving trained model state

End House price demo
