In [262]:
import numpy as np
import random
import os, errno
import sys
from tqdm import trange
from copy import deepcopy

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import matplotlib.pyplot as plt

import pandas as pd
import math

In [263]:
class inp_layer_norm(nn.Module):
    def __init__(self,input_size):
        '''
        : param input_size:     the number of features in the input X
        '''
        
        super(inp_layer_norm, self).__init__()
        
        self.layerNorm = nn.LayerNorm(input_size)
        
    def forward(self,x_input):
        n_input = self.layerNorm(x_input)
        
        return n_input

In [264]:
class lstm_final_output(nn.Module):
    '''Takes the final decoder output and the output a single feature.'''
    def __init__(self, input_size, output_size):
        '''
        : param input_size:     the number of features in the input X
        : param output_size:    the number of features in the output Y
        '''
        
        super(lstm_final_output, self).__init__()
        self.input_size = input_size
        self.output_size = output_size 
        
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, x_input):
        
        '''        
        : param x_input:                    should be 2D (batch_size, input_size)
        : param encoder_hidden_states:      hidden states
        : return output, hidden:            output gives all the hidden states in the sequence;
        :                                   hidden gives the hidden state and cell state for the last
        :                                   element in the sequence 
 
        '''
        output = self.linear(x_input.squeeze(0))     
        
        return output

    
class lstm_encoder(nn.Module):
    ''' Encodes time-series sequence '''

    def __init__(self, input_size, hidden_size, dropout = 0.0, num_layers = 1):
        
        '''
        : param input_size:     the number of features in the input X
        : param hidden_size:    the number of features in the hidden state h
        : param num_layers:     number of recurrent layers (i.e., 2 means there are
        :                       2 stacked LSTMs)
        '''
        
        super(lstm_encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size 
        self.num_layers = num_layers
       
        # define LSTM layer
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, dropout=dropout,
                            num_layers = num_layers)
        
        
    def forward(self, x_input):
        
        '''
        : param x_input:               input of shape (seq_len, # in batch, input_size)
        : return lstm_out, hidden:     lstm_out gives all the hidden states in the sequence;
        :                              hidden gives the hidden state and cell state for the last
        :                              element in the sequence 
        '''

        lstm_out, self.hidden = self.lstm(x_input.view(x_input.shape[0], x_input.shape[1], self.input_size))
        
        return lstm_out, self.hidden     
    
    def init_hidden(self, batch_size):
        
        '''
        initialize hidden state
        : param batch_size:    x_input.shape[1]
        : return:              zeroed hidden state and cell state 
        '''
        
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))


class lstm_decoder(nn.Module):
    ''' Decodes hidden state output by encoder '''
    
    def __init__(self, input_size, hidden_size, dropout=0.0, num_layers = 1):

        '''
        : param input_size:     the number of features in the input X
        : param hidden_size:    the number of features in the hidden state h
        : param num_layers:     number of recurrent layers (i.e., 2 means there are
        :                       2 stacked LSTMs)
        '''
        
        super(lstm_decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, dropout = dropout,
                            num_layers = num_layers)
        self.linear = nn.Linear(hidden_size, input_size)           

    def forward(self, x_input, encoder_hidden_states):
        
        '''        
        : param x_input:                    should be 2D (batch_size, input_size)
        : param encoder_hidden_states:      hidden states
        : return output, hidden:            output gives all the hidden states in the sequence;
        :                                   hidden gives the hidden state and cell state for the last
        :                                   element in the sequence 
 
        '''
        lstm_out, self.hidden = self.lstm(x_input.unsqueeze(0), encoder_hidden_states)
        output = self.linear(lstm_out.squeeze(0))     
        
        return output, self.hidden

class lstm_seq2seq(nn.Module):
    ''' train LSTM encoder-decoder and make predictions '''
    
    def __init__(self, input_size, hidden_size, output_size, dropout = 0.0, num_layers=1):

        '''
        : param input_size:     the number of expected features in the input X
        : param hidden_size:    the number of features in the hidden state h
        : param output_size:    the number of features in the output Y
        '''

        super(lstm_seq2seq, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.layer_norm = inp_layer_norm(input_size = input_size)
        self.encoder = lstm_encoder(input_size = input_size, hidden_size = hidden_size, dropout=dropout, num_layers=num_layers)
        self.decoder = lstm_decoder(input_size = input_size, hidden_size = hidden_size, dropout=dropout, num_layers=num_layers)
        self.outputter = lstm_final_output(input_size = input_size, output_size = output_size)

    def train_model(self, input_tensor, target_tensor, n_epochs, target_len, batch_size, learning_rate = 0.01):
        
        '''
        train lstm encoder-decoder
        
        : param input_tensor:              input data with shape (seq_len, # in batch, number features); PyTorch tensor    
        : param target_tensor:             target data with shape (seq_len, # in batch, number features); PyTorch tensor
        : param n_epochs:                  number of epochs 
        : param target_len:                number of values to predict 
        : param batch_size:                number of samples per gradient update
        : param learning_rate:             float >= 0; learning rate
        : return losses:                   array of loss function for each epoch
        '''
        
        # initialize array of losses 
        losses = np.full(n_epochs, np.nan)
        losses_per_horizon = np.full((n_epochs, target_len), np.nan)

        optimizer = optim.Adam(self.parameters(), lr = learning_rate)
        criterion = nn.MSELoss()

        # calculate number of batch iterations
        n_batches = int(input_tensor.shape[1] / batch_size)

        with trange(n_epochs) as tr:
            for it in tr:
                
                batch_loss = 0.
                batch_loss_tf = 0.
                batch_loss_no_tf = 0.
                num_tf = 0
                num_no_tf = 0

                for b in range(n_batches):
                    # select data 
                    input_batch = input_tensor[:, b: b + batch_size, :]
                    target_batch = target_tensor[:, b: b + batch_size, :]

                    # outputs tensor
                    outputs = torch.zeros(target_len, batch_size, target_batch.shape[2]).cuda()

                    # initialize hidden state
                    encoder_hidden = self.encoder.init_hidden(batch_size)

                    # zero the gradient
                    optimizer.zero_grad()
                    
                    #Layer normalization
                    input_batch = self.layer_norm(input_batch)

                    # encoder outputs
                    encoder_output, encoder_hidden = self.encoder(input_batch)

                    # decoder with teacher forcing
                    decoder_input = input_batch[-1, :, :]   # shape: (batch_size, input_size)
                    decoder_hidden = encoder_hidden

                    # predict recursively
                    for t in range(target_len): 
                        decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                        outputs[t] = self.outputter(decoder_output)
                        decoder_input = decoder_output

                    # compute the loss 
                    loss = criterion(outputs, target_batch)
                    loss_per_horizon = np.full(target_len, np.nan)
                    for t in range(target_len): 
                        loss_per_horizon[t] = criterion(outputs[t], target_batch[t])
                    batch_loss += loss.item()
                    
                    # backpropagation
                    loss.backward()
                    optimizer.step()

                # loss for epoch 
                batch_loss /= n_batches 
                losses[it] = batch_loss
                for t in range(target_len):
                    losses_per_horizon[it][t] = loss_per_horizon[t]
                # progress bar 
                tr.set_postfix(loss="{0:.3f}".format(batch_loss))
                    
        return losses, losses_per_horizon

    def predict(self, input_tensor, target_len):
        
        '''
        : param input_tensor:      input data (seq_len, input_size); PyTorch tensor 
        : param target_len:        number of target values to predict 
        : return np_outputs:       np.array containing predicted values; prediction done recursively 
        '''

        # encode input_tensor
        input_tensor = input_tensor.unsqueeze(1)     # add in batch size of 1
        encoder_output, encoder_hidden = self.encoder(input_tensor)

        # initialize tensor for predictions
        outputs = torch.zeros(target_len, self.output_size).cuda()

        # decode input_tensor
        decoder_input = input_tensor[-1, :, :]
        decoder_hidden = encoder_hidden
        
        for t in range(target_len):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            outputs[t] = self.outputter(decoder_output)
            decoder_input = decoder_output
            
        np_outputs = outputs.cpu().detach().numpy()
        
        return np_outputs

In [265]:
data_folder = "C:\\Users\\spars\\Documents\\Master\\JHU\TML\\HomePriceBeastNew\\"

In [290]:
def get_counterfactual_sale(X_post_covid, post_covid_label, lstm_model, ow = 1):
    labels_county = post_covid_label[0,:,0]
    labels_period = post_covid_label[1,:,0]
    labels_state = post_covid_label[2,:,0]
    labels_fips = [c + ',' + s for c,s in zip(list(labels_county), list(labels_state))]
    labels_fips_unique = np.unique(labels_fips)

    column_names = ["state_code", "county_name", "period_begin", 'predicted_median_sale_price']
    
    counterfactual_df = pd.DataFrame()
    
    for fips in labels_fips_unique:
#         print(f"fips : {fips}")
        indices = [i for i, x in enumerate(labels_fips) if x == fips]

        periods = labels_period[indices]
        len_periods = len(periods)
#         print(len_periods)
        start_period_index = min(indices)
        
        #check why all values end up being same after a time period. 
        pred_res = []
        for ii in range(len_periods):
            y_res = lstm_model.predict(torch.from_numpy(X_post_covid[:,start_period_index+ii,:]).type(torch.Tensor).cuda(), target_len = ow)
            prev_sale = list(X_post_covid[:,start_period_index+ii,0])
            del prev_sale[0]
            prev_sale.append(y_res[0][0])
            
            pred_res.append(y_res[0][0])
            
            if ii != len_periods - 1:
                X_post_covid[:,start_period_index+ii+1,0] = np.array(prev_sale)
            
        df2 = pd.DataFrame()
        df2['predicted_median_sale_price'] = pred_res
        df2['period_begin'] = labels_period[start_period_index: start_period_index+len_periods]
        df2['state_code'] = labels_state[start_period_index]
        df2['county_name'] = labels_county[start_period_index]
        
        counterfactual_df = counterfactual_df.append(df2, ignore_index = True)
    return counterfactual_df

In [267]:
def convert_frame_to_numpy(df, remove_cols, target_prefix, related_prefix, J,H):
    
    #assemble lag variables.
    y_lag_cols = [f'{target_prefix}_lag_{j}' for j in range(J,0,-1)]
    y_lead_cols = [target_prefix] + [f'{target_prefix}_lead_{h}' for h in range(1,H+1,1)]    
    x_rel_cols = [f'{related_prefix}_lag_{j}' for j in range(J,0,-1)]
    x_rel_lead_cols = [f'{related_prefix}_lag_1'] + [related_prefix] + [f'{related_prefix}_lead_{h}' for h in range(1,H+1,1)] 
    other_cols = [x for x in df.columns if x not in y_lag_cols + y_lead_cols + x_rel_cols + remove_cols + x_rel_lead_cols]
    print(f"Length of other columns = {len(other_cols)}")
    print(other_cols)
    
    
    def get_label_row(row):
        label = np.array([row[remove_cols].values])
        
        return label
    
    def get_xvec_row(row):
        x = np.array([row[y_lag_cols].values])
        #Making it nowcast to take future inventory to predict sale price.
        x = np.append(x,[row[x_rel_lead_cols].values],axis=0)
        stat_val = row[other_cols].values
        stat_val = np.tile(stat_val,[J,1])
        stat_val = np.transpose(stat_val)
        x = np.append(x,stat_val,axis=0)
        
        return x
    
    def get_yvec_row(row):
        y = np.array([row[y_lead_cols].values])
        
        return y
            
    X = np.array(df.apply(get_xvec_row, axis = 1))
    y = np.array(df.apply(get_yvec_row, axis = 1))
    label = np.array(df.apply(get_label_row, axis = 1))
    
    return X,y, label
    
remove_cols = ["county_name", "period_begin", "state_code"]
target_prefix = 'median_sale_price'
related_prefix = 'inventory'
J=5
H=3

In [268]:
post_covid_predicted_df_inv = pd.read_csv(f"{data_folder}post_covid_predicted_df_inv.csv")
post_covid_predicted_df_inv['period_begin'] = pd.to_datetime(post_covid_predicted_df_inv['period_begin'])
post_covid_predicted_df_inv.drop(["Unnamed: 0"], axis=1, inplace=True)
test_predicted_df_inv = pd.read_csv(f"{data_folder}ptest_predicted_df_inv.csv")
test_predicted_df_inv['period_begin'] = pd.to_datetime(test_predicted_df_inv['period_begin'])

In [269]:
post_covid_predicted_df_inv['inventory'] = [x.rstrip("]").lstrip("[").replace("nan","0") for x in post_covid_predicted_df_inv['inventory']]
post_covid_predicted_df_inv['inventory'] = pd.to_numeric(post_covid_predicted_df_inv['inventory'])

In [270]:
def get_lead_lag_features(feature_df,L,colname,time_feature,lag=True):
    col_mid = "lead"
    mult = -1
    
    if lag:
        col_mid = "lag"
        mult = 1
        
    other_cols = [x for x in feature_df.columns if x!=colname and x!=time_feature]
    print(other_cols)
    cols = other_cols+ [time_feature]
    feature_df = feature_df.sort_values(cols).reset_index(drop=True)
    
    new_feature_df = pd.DataFrame()
    for lag in range(L):
        l = lag +1
        col = f"{colname}_{col_mid}_{l}"
        new_feature_df[col] = feature_df.groupby(other_cols)[colname].shift(l*mult)
    new_feature_df.fillna(0,inplace=True)
    
    result = pd.concat([feature_df, new_feature_df], axis=1)
    return result

In [271]:
def get_lag_features(feature_df, J, colname, time_feature):
    
    return get_lead_lag_features(feature_df, J, colname, time_feature)

def get_lead_features(feature_df, H, colname, time_feature):
    
    return get_lead_lag_features(feature_df, H, colname, time_feature,lag=False)

In [272]:
dt_col = "period_begin"
y = "inventory"
H = 3
J = 5

y_feature_df_lag = get_lag_features(post_covid_predicted_df_inv, J, y, dt_col)    
y_feature_df_lead = get_lead_features(post_covid_predicted_df_inv, H, y, dt_col)
y_feature_df_lead = y_feature_df_lead[[x for x in y_feature_df_lead.columns if x not in y_feature_df_lag.columns]]

y_feature_df = pd.concat([y_feature_df_lag, 
                          y_feature_df_lead], axis=1)

post_covid_predicted_df_inv = y_feature_df

['state_code', 'county_name']
['state_code', 'county_name']


In [273]:
def chunks(a, size):
    arr = iter(a)
    for v in arr:
        tmp = [ v ]
        for i,v in zip( range( size - 1 ), arr ):
            tmp.append( v )
        yield tmp

In [274]:
post_covid_predicted_df_inv

Unnamed: 0,inventory,period_begin,state_code,county_name,inventory_lag_1,inventory_lag_2,inventory_lag_3,inventory_lag_4,inventory_lag_5,inventory_lead_1,inventory_lead_2,inventory_lead_3
0,2325.72600,2020-03-02,AK,Kenai Peninsula Borough,0.00000,0.00000,0.00000,0.00000,0.00000,1414.33390,807.65160,722.06620
1,1414.33390,2020-03-09,AK,Kenai Peninsula Borough,2325.72600,0.00000,0.00000,0.00000,0.00000,807.65160,722.06620,754.86550
2,807.65160,2020-03-16,AK,Kenai Peninsula Borough,1414.33390,2325.72600,0.00000,0.00000,0.00000,722.06620,754.86550,880.01920
3,722.06620,2020-03-23,AK,Kenai Peninsula Borough,807.65160,1414.33390,2325.72600,0.00000,0.00000,754.86550,880.01920,909.75635
4,754.86550,2020-03-30,AK,Kenai Peninsula Borough,722.06620,807.65160,1414.33390,2325.72600,0.00000,880.01920,909.75635,890.38330
...,...,...,...,...,...,...,...,...,...,...,...,...
118258,590.97925,2021-05-03,WV,Morgan County,590.97050,590.96190,590.95325,590.94470,590.93604,590.98785,590.99650,591.00500
118259,590.98785,2021-05-10,WV,Morgan County,590.97925,590.97050,590.96190,590.95325,590.94470,590.99650,591.00500,591.01360
118260,590.99650,2021-05-17,WV,Morgan County,590.98785,590.97925,590.97050,590.96190,590.95325,591.00500,591.01360,0.00000
118261,591.00500,2021-05-24,WV,Morgan County,590.99650,590.98785,590.97925,590.97050,590.96190,591.01360,0.00000,0.00000


In [275]:
#Get best median sale price model. 
#Get results for the best post covid prediction and test prediction. 
#Get the static features for test and post covid time. 


In [276]:
post_covid_frame = pd.read_csv(f"{data_folder}post_covid_frame_df_sale_model.csv")
test_frame = pd.read_csv(f"{data_folder}test_frame_df_sale_model.csv")

In [277]:
del_cols = [x for x in post_covid_frame.columns if "inventory" in x]
del_cols

['inventory',
 'inventory_lag_1',
 'inventory_lag_2',
 'inventory_lag_3',
 'inventory_lag_4',
 'inventory_lag_5',
 'inventory_lead_1',
 'inventory_lead_2',
 'inventory_lead_3']

In [278]:
post_covid_counterfactual_frame = post_covid_frame.drop(del_cols, axis = 1)
post_covid_counterfactual_frame['period_begin'] = pd.to_datetime(post_covid_counterfactual_frame['period_begin'])
post_covid_counterfactual_frame.drop(["Unnamed: 0"], axis=1, inplace=True)

In [279]:
post_covid_counterfactual_frame

Unnamed: 0,state_code,county_name,period_begin,median_sale_price,median_sale_price_lag_1,median_sale_price_lag_2,median_sale_price_lag_3,median_sale_price_lag_4,median_sale_price_lag_5,median_sale_price_lead_1,...,state_code_dummy_RI,state_code_dummy_SC,state_code_dummy_TN,state_code_dummy_TX,state_code_dummy_UT,state_code_dummy_VA,state_code_dummy_VT,state_code_dummy_WA,state_code_dummy_WI,state_code_dummy_WV
0,AK,Kenai Peninsula Borough,2020-03-02,242687.5,208312.5,232656.2,231456.2,235868.7,242243.7,250562.5,...,0,0,0,0,0,0,0,0,0,0
1,AK,Kenai Peninsula Borough,2020-03-09,250562.5,242687.5,208312.5,232656.2,231456.2,235868.7,247125.0,...,0,0,0,0,0,0,0,0,0,0
2,AK,Kenai Peninsula Borough,2020-03-16,247125.0,250562.5,242687.5,208312.5,232656.2,231456.2,251750.0,...,0,0,0,0,0,0,0,0,0,0
3,AK,Kenai Peninsula Borough,2020-03-23,251750.0,247125.0,250562.5,242687.5,208312.5,232656.2,239875.0,...,0,0,0,0,0,0,0,0,0,0
4,AK,Kenai Peninsula Borough,2020-03-30,239875.0,251750.0,247125.0,250562.5,242687.5,208312.5,235356.2,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118258,WV,Morgan County,2021-05-03,249987.5,244987.5,233737.5,215626.3,256613.8,247113.8,256237.5,...,0,0,0,0,0,0,0,0,0,1
118259,WV,Morgan County,2021-05-10,256237.5,249987.5,244987.5,233737.5,215626.3,256613.8,244012.5,...,0,0,0,0,0,0,0,0,0,1
118260,WV,Morgan County,2021-05-17,244012.5,256237.5,249987.5,244987.5,233737.5,215626.3,235887.5,...,0,0,0,0,0,0,0,0,0,1
118261,WV,Morgan County,2021-05-24,235887.5,244012.5,256237.5,249987.5,244987.5,233737.5,276612.5,...,0,0,0,0,0,0,0,0,0,1


In [280]:
post_covid_counterfactual_frame = pd.merge(
    post_covid_counterfactual_frame,
    post_covid_predicted_df_inv,
    how="inner")

In [281]:
post_covid_counterfactual_frame

Unnamed: 0,state_code,county_name,period_begin,median_sale_price,median_sale_price_lag_1,median_sale_price_lag_2,median_sale_price_lag_3,median_sale_price_lag_4,median_sale_price_lag_5,median_sale_price_lead_1,...,state_code_dummy_WV,inventory,inventory_lag_1,inventory_lag_2,inventory_lag_3,inventory_lag_4,inventory_lag_5,inventory_lead_1,inventory_lead_2,inventory_lead_3
0,AK,Kenai Peninsula Borough,2020-03-02,242687.5,208312.5,232656.2,231456.2,235868.7,242243.7,250562.5,...,0,2325.72600,0.00000,0.00000,0.00000,0.00000,0.00000,1414.33390,807.65160,722.06620
1,AK,Kenai Peninsula Borough,2020-03-09,250562.5,242687.5,208312.5,232656.2,231456.2,235868.7,247125.0,...,0,1414.33390,2325.72600,0.00000,0.00000,0.00000,0.00000,807.65160,722.06620,754.86550
2,AK,Kenai Peninsula Borough,2020-03-16,247125.0,250562.5,242687.5,208312.5,232656.2,231456.2,251750.0,...,0,807.65160,1414.33390,2325.72600,0.00000,0.00000,0.00000,722.06620,754.86550,880.01920
3,AK,Kenai Peninsula Borough,2020-03-23,251750.0,247125.0,250562.5,242687.5,208312.5,232656.2,239875.0,...,0,722.06620,807.65160,1414.33390,2325.72600,0.00000,0.00000,754.86550,880.01920,909.75635
4,AK,Kenai Peninsula Borough,2020-03-30,239875.0,251750.0,247125.0,250562.5,242687.5,208312.5,235356.2,...,0,754.86550,722.06620,807.65160,1414.33390,2325.72600,0.00000,880.01920,909.75635,890.38330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134750,WV,Morgan County,2021-05-03,249987.5,244987.5,233737.5,215626.3,256613.8,247113.8,256237.5,...,1,590.97925,590.97050,590.96190,590.95325,590.94470,590.93604,590.98785,590.99650,591.00500
134751,WV,Morgan County,2021-05-10,256237.5,249987.5,244987.5,233737.5,215626.3,256613.8,244012.5,...,1,590.98785,590.97925,590.97050,590.96190,590.95325,590.94470,590.99650,591.00500,591.01360
134752,WV,Morgan County,2021-05-17,244012.5,256237.5,249987.5,244987.5,233737.5,215626.3,235887.5,...,1,590.99650,590.98785,590.97925,590.97050,590.96190,590.95325,591.00500,591.01360,0.00000
134753,WV,Morgan County,2021-05-24,235887.5,244012.5,256237.5,249987.5,244987.5,233737.5,276612.5,...,1,591.00500,590.99650,590.98785,590.97925,590.97050,590.96190,591.01360,0.00000,0.00000


In [282]:
remove_cols = ["county_name", "period_begin", "state_code"]
target_prefix = 'median_sale_price'
related_prefix = 'inventory'
J=5
H=3

In [283]:
X_post_covid_counterfactual, y_post_covid_expected, post_covid_label_counterfactual = convert_frame_to_numpy(post_covid_counterfactual_frame, 
                                                                        remove_cols, 
                                                                        target_prefix, 
                                                                        related_prefix, 
                                                                        J,H)

Length of other columns = 82
['R_INTERNATIONAL_MIG_2019', 'Unemployment_rate_2020', 'PCT_COLL_4_2015_19', 'PCT_COLL_1TO3_2000', 'PCT_HSD_Only_2000', 'R_NET_MIG_2019', 'Med_HH_Income_Percent_of_State_Total_2019', 'GQ_ESTIMATES_2019', 'N_POP_CHG_2019', 'INTERNATIONAL_MIG_2019', 'NET_MIG_2019', 'HSD_Only_2000', 'DOMESTIC_MIG_2019', 'RESIDUAL_2019', 'Deaths_2019', 'COLL_4_2000', 'POP_ESTIMATE_2019', 'LT_HSD_2015_19', 'COLL_1TO3_2000', 'Unemployed_2020', 'NATURAL_INC_2019', 'GQ_ESTIMATES_BASE_2010', 'Employed_2020', 'LT_HSD_2000', 'COLL_4_2015_19', 'HSD_Only_2015_19', 'COLL_1TO3_2015_19', 'Civilian_labor_force_2020', 'CENSUS_2010_POP', 'PCT_LT_HSD_2000', 'R_birth_2019', 'PCT_COLL_1TO3_2015_19', 'PCT_COLL_4_2000', 'Economic_typology_2015', 'R_death_2019', 'state_code_dummy_AK', 'state_code_dummy_AL', 'state_code_dummy_AR', 'state_code_dummy_AZ', 'state_code_dummy_CA', 'state_code_dummy_CO', 'state_code_dummy_CT', 'state_code_dummy_DC', 'state_code_dummy_DE', 'state_code_dummy_FL', 'state_cod

In [284]:
stack_range = np.array(X_post_covid_counterfactual[0]).shape[0]
X_post_covid_counterfactual_stack = list(chunks(np.vstack(X_post_covid_counterfactual), stack_range))
post_covid_label_counterfactual_stack = np.expand_dims(np.vstack(post_covid_label_counterfactual),axis=2)
y_post_covid_expected_stack = np.expand_dims(np.vstack(y_post_covid_expected),axis=2)

X_post_covid_counterfactual_swap = np.array(X_post_covid_counterfactual_stack).swapaxes(0,1).swapaxes(0,2)
post_covid_label_counterfactual_swap = np.array(post_covid_label_counterfactual_stack).swapaxes(0,1)
y_post_covid_expected_swap = np.array(y_post_covid_expected_stack).swapaxes(0,1)

In [287]:
best_median_model = lstm_seq2seq(input_size = X_post_covid_counterfactual_swap.shape[2], hidden_size = 15, dropout = 0.0,
                     output_size = 1, num_layers=2).cuda()
best_median_model.load_state_dict(torch.load('./sale_model.pth'))

<All keys matched successfully>

In [None]:
counterfactual_df = get_counterfactual_sale(X_post_covid_counterfactual_swap.astype(float), post_covid_label_counterfactual_swap, best_median_model)

In [291]:
post_covid_expected_df_sale = post_covid_frame[['state_code', 'county_name', 'period_begin', 'median_sale_price']]
test_expected_df_sale = test_frame[['state_code', 'county_name', 'period_begin', 'median_sale_price']]

In [293]:
counterfactual_df.to_csv(f"{data_folder}post_covid_predicted_df_sale.csv")
post_covid_expected_df_sale.to_csv(f"{data_folder}post_covid_expected_df_sale.csv")

In [None]:
counterfactual_df_test = get_counterfactual_sale(X_post_covid_counterfactual_swap.astype(float), post_covid_label_counterfactual_swap, best_median_model)

In [None]:
counterfactual_df_test.to_csv(f"{data_folder}test_predicted_df_sale.csv")
post_covid_expected_df_sale.to_csv(f"{data_folder}test_expected_df_sale.csv")