In [135]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import pandas as pd
import csv
import cleaned_econ_data
#sess = tf.InteractiveSession()

In [136]:
STEPS = 5000
DATA_PATH = '../data/QuarterlyResults.csv'
FEATURE_NAMES = ['funds-rate', 'oil-cpi', 'high-tax-rate', 'low-tax-rate', 'unemployment', 'snp']
# LABEL is GDP in this model

tf.logging.set_verbosity(tf.logging.INFO)

In [137]:
## Data Functions
def import_data(path):
    with open(path, 'r') as f:
        dat = list(csv.reader(f))
        ar = np.array(dat)

    ## assumes columns are:
    ## index, DATE, funds_rate, oil_cpi, high_tax_rate, low_tax_rate, unemployment, real_gdp, snp
    
    ## remove index column
    ar = np.delete(ar,0,1)
    
    ## save DATE column and then remove
    dates = np.split(ar,8,1)[0].flatten()
    ar = np.delete(ar,0,1)
    
    ## convert data to floats
    data = np.array(ar.tolist(), dtype=float)
    
    ## split label (gdp) from features
    features = np.delete(data,5,1)
    label    = np.delete(data,[0,1,2,3,4,6],1).flatten()
    
    return (dates, features, label)

def split_data(dates, features, label, split_index):
    train_dates     = dates[:split_index]
    train_features  = features[:split_index]
    train_label     = label[:split_index]
    
    test_dates      = dates[split_index:]
    test_features   = features[split_index:]
    test_label      = label[split_index:]
    
    return (train_dates, train_features, train_label, test_dates, test_features, test_label)

def create_dataframe(numpy_data, feature_names):
    return pd.DataFrame(numpy_data, columns=feature_names)

def create_dataset(features, labels):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    return dataset.batch(30)

## Evalation Functions
def percentage_error(predicted, actual):
    if len(predicted)!=len(actual):
        print("Cannot compare datasets of different length")
    
    sum_percentage_error = 0
    for i in range(len(predicted)):
        sum_percentage_error = abs(predicted[i]-actual[i])/float(actual[i])
    
    return float(sum_percentage_error)/float(len(predicted)) 


In [138]:
(dates, features, label) = import_data(DATA_PATH)

(train_dates, train_features, train_label, test_dates, test_features, test_label) = split_data(dates, features, label, 168)

train_features_df = create_dataframe(train_features, FEATURE_NAMES)
test_features_df  = create_dataframe(test_features, FEATURE_NAMES)

In [139]:
#(train, test) = cleaned_econ_data.dataset()

# # Build the training input_fn.
# def input_train():
#     return (
#             # Shuffling with a buffer larger than the data set ensures
#             # that the examples are well mixed.
#             train.batch(128)
#             # Repeat forever
#             .repeat().make_one_shot_iterator().get_next())

# # Build the validation input_fn.
# def input_test():
#     return (test.batch(128)
#                     .make_one_shot_iterator().get_next())

feature_columns = [
        tf.feature_column.numeric_column(key="funds-rate"),
        tf.feature_column.numeric_column(key="oil-cpi"),
        tf.feature_column.numeric_column(key="high-tax-rate"),
        tf.feature_column.numeric_column(key="low-tax-rate"),
        tf.feature_column.numeric_column(key="unemployment"),
        tf.feature_column.numeric_column(key="snp")
]


In [140]:
# Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
# defined above as input.
model = tf.estimator.DNNRegressor(
        hidden_units=[20, 20], feature_columns=feature_columns)

# Train the model with STEPS iterations
model.train(input_fn=lambda : create_dataset(train_features_df, train_label), steps=STEPS)

# Evaluate how the model performs on data it has not yet seen.
predictions = list(model.predict(input_fn=lambda : create_dataset(test_features_df, test_label)))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmph8__iqa5', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f627c1ba7b8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmph8__iqa5/model.ckpt.
INFO:tensorflow:loss = 844410600.0, step = 1
INFO:tenso

In [141]:
predicted_gpd = [p['predictions'][0] for p in predictions]
err = percentage_error(predicted_gpd, test_label.tolist())

In [142]:
err

0.021822760309935005