<a href="https://colab.research.google.com/github/mmistroni/jupyter/blob/master/TFTimeSeries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas_datareader
!pip install tensorflow==1.15

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from datetime import date
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.contrib.learn import ModeKeys
import tensorflow.contrib.rnn as rnn
from datetime import datetime, date, timedelta





In [None]:
tf.__version__

In [None]:

from pandas.tseries.offsets import BDay
TIMESERIES_COL = 'rawdata'
N_OUTPUTS = 5 # in each sequence, 1-14 are features, and 14-20 is label 
SEQ_LEN = 20
DEFAULTS = 0.0
LSTM_SIZE = 5 # number of hidden layers in each of the LSTM cells
N_INPUTS = SEQ_LEN - N_OUTPUTS
BATCH_SIZE = 20
ROOT_DIR = '/home/mmistroni/tf_logs/rnn-run-{}'


def get_prices(startdate, enddate, symbol):
  import pandas_datareader as pdr
  from datetime import date, timedelta
  print ('--Start:{%s}, end:{%s}', startdate, enddate)
  
  stock_data = pdr.get_data_yahoo(symbol, startdate, enddate)[['Close']].pct_change().fillna(0)
  adjClose = np.stack(stock_data['Close'])
  return adjClose
  
def create_training_data2(inputData):
  print ('Len of input dat ais {}', len(inputData))
  return [np.array(inputData[i * SEQ_LEN: (i + 1) * SEQ_LEN]) 
       for i in range(len(inputData) // SEQ_LEN)]

def create_training_data(inputData):
  print ('AdjClose is of shape {}', inputData.shape)
  return inputData.T.reshape(-1,SEQ_LEN)
  
  
def create_time_series(end_date, numDays=360, symbol='XOM'):
  print("==== CREATING TIMESERIES GOING BACK 280 DAYS FROM  {}".format(end_date))
  endDate = end_date # training from 20 days ago
  startDate = endDate - BDay(numDays)
  prices =  get_prices(startDate, endDate, symbol=symbol)
  return create_training_data2(prices)
  


In [None]:
from datetime import date
import pandas_datareader as pdr
for ticker in ['KR','WST','AMGN','FAST','DXCM','DOCU','GMAB']: 
  print(pdr.get_data_yahoo(ticker, date(2020,8,3), date.today())[['Close']])

In [None]:

def create_train_and_test(end_date, symbol='XOM',extra_prices=None):
  all_timeseries =  create_time_series(end_date, symbol=symbol) #[create_time_series() for i in range(0, SEQ_LEN * 4)]
  if extra_prices:
    print('ADDING extra prices with size:{}'.format(extra_prices))
    all_timeseries = all_timeseries + extra_prices
  print("Timeseries is of type:{} and has length:{}".format(type(all_timeseries), len(all_timeseries)))
  all_data = np.stack(all_timeseries)
  print('All data shape is{0} and type {1}'.format(all_data.shape,type(all_data)))
  X, y = all_data[...,0:-N_OUTPUTS], all_data[...,-N_OUTPUTS:]
  print ('X is fo type {0}, y  of type {1}'.format(type(X[0][0]), type(y)))
  print ('X.shape is {0}, y shap is {1}'.format(X.shape, y.shape))
  X_train, X_test, y_train, y_test = train_test_split(X,
                                                      y,
                                                      test_size=0.1,
                                                      random_state=1)
  return X_train, y_train, X_test, y_test



In [None]:
create_train_and_test(date.today())

<h3> Creating RNN Model </h3>


In [None]:

# create the inference model
def simple_rnn(features, labels, mode, params):
  # 0. Reformat input shape to become a sequence
  print ('IN Features are:{0}'.format(features))
  x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1)
  #print 'x={}'.format(x)
    
  # 1. configure the RNN
  lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0)
  outputs, _ = tf.nn.static_rnn(lstm_cell, x, dtype=tf.float32)

  # slice to keep only the last cell of the RNN
  outputs = outputs[-1]
  #print 'last outputs={}'.format(outputs)
  
  # output is result of linear activation of last layer of RNN
  weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS]))
  bias = tf.Variable(tf.random_normal([N_OUTPUTS]))
  predictions = tf.matmul(outputs, weight) + bias
    
  # 2. loss function, training/eval ops
  if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
     loss = tf.losses.mean_squared_error(labels, predictions)
     train_op = tf.contrib.layers.optimize_loss(
         loss=loss,
         global_step=tf.train.get_global_step(),
         learning_rate=0.01,
         optimizer="SGD") # SGD
     eval_metric_ops = {
      "rmse": tf.metrics.root_mean_squared_error(labels, predictions)
     }
  else:
     loss = None
     train_op = None
     eval_metric_ops = None
  
  # 3. Create predictions
  predictions_dict = {"predicted": predictions}

  # 4. Create export outputs  
  export_outputs = {"predicted": tf.estimator.export.PredictOutput(predictions)}

  # 5. return ModelFnOps
  return tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=predictions_dict,
      loss=loss,
      train_op=train_op,
      eval_metric_ops=eval_metric_ops,
      export_outputs=export_outputs)



<h3> Creating Serving Function , Train Function and Test Function </h3>

In [None]:
def serving_input_receiver_fn():
  feature_placeholders = {
    TIMESERIES_COL: tf.placeholder(tf.float32, [None, N_INPUTS])
  }

  features = {
    key: tf.expand_dims(tensor, -1)
    for key, tensor in feature_placeholders.items()
  }

  features[TIMESERIES_COL] = tf.squeeze(features[TIMESERIES_COL], axis=[2], name='timeseries')
  
  print('serving: features={}'.format(features[TIMESERIES_COL]))

  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)



# Creating a TrainFn and a TestFn
def _train_fn(X, y, batch_size):
    
    def _train():
        """An input function for training"""
        # Convert the inputs to a Dataset.
        # TODO need to be refactored according to https://medium.com/google-cloud/how-to-do-time-series-prediction-using-rnns-and-tensorflow-and-cloud-ml-engine-2ad2eeb189e8
        # this is not good.
        X_32 = tf.cast(X, tf.float32)
        y_32 = tf.cast(y, tf.float32)
        inputs = tf.concat(X_32, axis=1)
        label = tf.concat(y_32, axis=1)
        features, labels = {TIMESERIES_COL: inputs}, label
        
        return features, labels
        dataset = tf.data.Dataset.from_tensor_slices(features, labels)
        # Shuffle, repeat, and batch the examples.
        dataset = dataset.repeat(None).batch(batch_size)
        # This will now return batches of features, label
        return dataset.make_one_shot_iterator().get_next()
    return _train

def _test_fn(X, y, batch_size):
    def _test():
        """An input function for training"""
        # Convert the inputs to a Dataset.
        X_32 = tf.cast(X, tf.float32)
        y_32 = tf.cast(y, tf.float32)
        inputs = tf.concat(X_32, axis=1)
        label = tf.concat(y_32, axis=1)
        
        features, labels = {TIMESERIES_COL: inputs}, label
        
        return features, labels
        
        dataset = tf.data.Dataset.from_tensor_slices(features, labels)
        # Shuffle, repeat, and batch the examples.
        dataset = dataset.repeat(1).batch(batch_size)
        # This will now return batches of features, label
        return dataset.make_one_shot_iterator().get_next()
    return _test  
  

 


<h3> Creating Experiment Function and running model </h3>

In [None]:
def _predict_fn(X) :
    def _predict():
        """An input function for training"""
        # Convert the inputs to a Dataset.
        X_32 = tf.cast(X, tf.float32)
        inputs = tf.concat(X_32, axis=1)
        features = {TIMESERIES_COL: inputs}
        return features
    return _predict




def experiment_fn(output_dir, X, y, x_tst, y_tst):
    # run experiment
    train_spec = tf.estimator.TrainSpec(
          input_fn=_train_fn(X, y, BATCH_SIZE), max_steps=1500)
    exporter = tf.estimator.FinalExporter('timeseries',
    serving_input_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(
            input_fn=_test_fn(x_tst, y_tst, BATCH_SIZE),
            exporters=[exporter])
    
    estimator = tf.estimator.Estimator(model_fn=simple_rnn, model_dir=output_dir)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    return estimator


def run_model(end_date, symbol='XOM', extra_prices=None):  
  print('Attempting to generate learning model for {}, using end_date={}'.format(symbol, end_date))
  tf.logging.set_verbosity(tf.logging.ERROR)    
  output_dir = ROOT_DIR.format(datetime.utcnow().strftime('%Y%m%d%H%M%S'))  
  X_train, y_train, X_test, y_test = create_train_and_test(end_date, symbol=symbol, extra_prices=extra_prices)
  estimator = experiment_fn(output_dir, X_train, y_train, X_test, y_test)
  print('Xtrain is:{} and of shape:{}', type(X_train), X_train.shape)
  return estimator


<h3> SETTING STARTDATE, ENDDATE AND TICKER </h3>

In [None]:
END_DATE= date.today()
START_DATE = END_DATE - BDay(N_INPUTS) # This will be used for predicting future prices. end date will be 15 days ago, start_date 280 days earlier


<h3> Now running model </h3>

In [None]:
def train_estimator(symbol, extra_prices=None):
  print('===================== CONFIGURATION FOR {}======================'.format(symbol))
  print('STARTDATE={}'.format(START_DATE))
  print('ENDDATE={}'.format(END_DATE))
  print('SYMBOL={}'.format(symbol))
  print('=== TRAINING MODEL ON PAST 280 DAYS from {}'.format(START_DATE))
  estimator = run_model(START_DATE, symbol=symbol, extra_prices=extra_prices)
  return estimator


<h3> Now, after training,  Let's do some predictions for future </h3>

In [None]:
from datetime import date
from pprint import pprint
# going back to last month
tf.logging.set_verbosity(tf.logging.ERROR)    

def get_last_20_days(symbol):

  def get_prices2(startdate=None, enddate=None, symbol=''):
    import pandas_datareader as pdr
    from datetime import date, timedelta
    print ('--Start:{%s}, end:{%s}', startdate, enddate)

    if not startdate and not enddate:
      last_check = date.today() - BDay(SEQ_LEN * 2) 
      enddate = last_check
      startdate = last_check - BDay(122)#timedelta(days=120)
    return pdr.get_data_yahoo(symbol, startdate, enddate)[['Close']]


  print('==== GETTING MOST RECENT PRICES for {}======'.format(symbol))
  stock_data = get_prices2(START_DATE- BDay(1), END_DATE, symbol=symbol)
  percentage_changes = stock_data.pct_change().fillna(0)
  #First Price
  first_price = stock_data.values[0]
  print('==============First ever price of the series of shape {} is {}'.format(stock_data.shape, first_price ))
  #print(stock_data.head(30))
  #print('=========== PERCENTAGE CHANGES ===========')
  #print(percentage_changes.head(30))
  prices = np.stack(percentage_changes['Close'])
  #prices = np.stack(stock_data['Close'])
  print('===== PRICES ARE===')
  pprint(prices)
  return prices, first_price




<h3> Now recalculating price percentages to predict </h3>

In [None]:
def make_predictions(prices, estimator, iterations=8):

  print ('Prices is of shape:{}', prices.shape)
  prices = prices.reshape(-1,N_INPUTS) if prices.shape[0] % 15 == 0 else prices[-15:].reshape(-1,N_INPUTS)
  print ('Reshaped Prices is of shape:{}', prices.shape) 
  print ('-------- PREDICTING -------')
  print (type(prices))
  acc = []
  acc += prices[0].tolist()
  for i in range(0, iterations):
    print('Iteration:{}.Accumulator length:{}'.format(i, len(acc)))
    # Given last 15 days, predict the next 5. now we  have all data
    candidate = np.array(acc[-15:]).reshape(-1,15)  # this is the Train #
    
    pred = estimator.predict(input_fn = _predict_fn(candidate))
    item = next(pred)
    vals = item['predicted'] # this is the predicted
    
    # First iteration has last 15 prices, then we predict the next 5. Now accumulator has 20
    # Second iteration,
    """
    Iteration:0.Accumulator length:15, then we predict 5/ so now we have test_data + 20. 
    Training is done in batches of 20. So we can only retrain when we have at least 15 available to send to the estimator
    That means that we can only do it when we have 35. So we can only retrain periodically when we have enough samples. 
    That means when len(acc) % 35 == 0. so we can take the first 20 to train, and use the next 15 to predict
    Iteration:1.Accumulator length:20
    Iteration:2.Accumulator length:25
    """
    # So we can only retrain when we have 
    
    # at Iteation 2, we take the last 15. But w
    # Next 5 predicted. So now we have 15 -test- + 5 predicted. 
    # Then, as test data, pick the previous 15 and then predict the next one

    """
    ATTEMPT TO RETRAIN THE MODEL..
      print("====== Now adding prediction to current train....")
      print("Xtrain of type:{}, candidate of type:{}".format(type(X_train), type(candidate)))
      print("SHapes.Train:{}, Candidate:{}".format(X_train.shape, candidate.shape))
      
      # train again
      # need to think this.
      #estimator = train_estimator(symbol, acc[-15:] )
    
      
      #X_train = np.vstack([X_train , candidate])
      #y_train = np.vstack([y_train , vals])
      # So Now we have to 
      # 1. retrain the model
      # 2. get the most recent 15 days to 

    """
    print ('Round {} Prediction: on {}={}'.format(i, candidate, vals))
    print ('{}={}'.format(type(vals), vals.tolist()))
    acc += vals.tolist()
  return acc



<h3> Now visualizing price predictions against date time </h3>

In [None]:
def calculate_list_of_prices(idx, first_price, zipped, accumulator):
  if idx ==0:
    ts = zipped[idx][1]
    change = zipped[idx][0]
    price = first_price * (1 + change)
    accumulator.append((ts, change, price))
    return calculate_list_of_prices(idx + 1, first_price, zipped, accumulator)
  elif idx < len(zipped):
    ts = zipped[idx][1]
    _, _, last_price = accumulator[-1]
    change =zipped[idx][0]
    accumulator.append((ts, change,last_price * (1 + change))) 
    return calculate_list_of_prices(idx + 1, first_price, zipped, accumulator)
  else:
    return accumulator
  

def generate_results(acc, first_price):
  print('-------------- END OF STORY.Computing increase from {}'.format(first_price))
  from pprint import pprint
  future_days = len(acc) - 15
  pprint('Accumlator has length:{}'.format(len(acc)))
  pprint('We have prediction for the next:{} days'.format(future_days))
  print("calculating business days to zip ")
  # we got start date, todays - 15
  # end date is  today + 
  dts = [START_DATE + BDay(offset) for offset in range(0, len(acc))]
  print('Start Date:{}={}'.format(dts[0], START_DATE))
  print('End date:{}'.format(dts[-1]))
  zipped = [tpl for tpl in zip(acc, dts)]
       
  print('===== FULL PREDICTIONS======')
  #print(zipped)
  from functools import reduce
  final_predictions  = acc
  #pprint(final_predictions)
  # Then find the end price by using reduce and entering the first price 
  res = reduce(lambda acc,x: acc * (1+x) ,  final_predictions, first_price)
  print('Final Price for series{} = {}'.format(zipped[-1], res))
  
  final_prices = calculate_list_of_prices(0, first_price, zipped, [])
  print('Final list of prices...')
  pprint(final_prices)
  return res, final_prices[-3:]

ALL_SYMBOLS = ['DXCM', 'DOCU', 'GMAB']

results = []
for symbol in ALL_SYMBOLS:
  print('Running predictions for:{}'.format(symbol))
  
  estimator = train_estimator(symbol)
  latest_prices, first_price = get_last_20_days(symbol)
  # Reshaping dat3
  acc = make_predictions(latest_prices, estimator, iterations=10)
  res, final_prices = generate_results(acc, first_price)
  #pprint(final_prices)

  results.append((symbol, res[0]))
from pprint import pprint
pprint(results)



In [None]:
import pandas as pd
pd.DataFrame(results, columns=['Symbol', 'MidSeptember Price'])

In [None]:
import pandas as pd
pd.DataFrame(results, columns=['Symbol', 'MidNovember Price'])