<a href="https://colab.research.google.com/github/molchai/ML_PUB/blob/master/model_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pymongo
import tensorflow as tf
import pandas as pd
import numpy as np
client= pymongo.MongoClient()
db=client.finance.stock
t=db.distinct( "trade_date" )

In [0]:
# Import PyDrive and associated libraries.
# This only needs to be done once in a notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


In [0]:
def getdata(start, end):
    dataset=pd.DataFrame(db.find({"trade_date":{"$gte":start, "$lt":end}},{'_id': False,'trade_date': False,'stock_code':False,'bin':False}))
    target = dataset.pop('ret')
    dataset =tf.data.Dataset.from_tensor_slices((dataset.values, target.values))
    train_dataset = dataset.shuffle(len(target)).batch(128)
    return train_dataset

In [0]:
def get_val_data(start, end):
    dataset=pd.DataFrame(db.find({"trade_date":{"$gte":start, "$lt":end}},{'_id': False,'trade_date': False,'stock_code':False,'bin':False}))
    target = dataset.pop('ret')
    return dataset.values,target.values

In [0]:
def get_test_data(start, end):
    dataset=pd.DataFrame(db.find({"trade_date":{"$gte":start, "$lt":end}},{'_id': False,'trade_date': False,'bin':False}))
    stock_code=dataset.pop('trade_date')
    target = dataset.pop('ret')
    return stock.values,dataset.values,target.values

In [0]:
class MyModel(tf.keras.Model):
  def __init__(self):
    super(MyModel,self).__init__()
    self.layer_1=tf.keras.layers.Dense(128,activation='selu')
    self.layer_2=tf.keras.layers.Dense(64,activation='selu')
    self.layer_3=tf.keras.layers.Dropout(rate=0.2)
  def call(self,input_tensor,training=False):
    x=self.layer_1(input_tensor)
    x=self.layer_2(x)
    x=self.layer_3(x,training=training)
    for _ in range(3):
      x=tf.keras.layers.Dense(64,activation='selu')(x)
    x=tf.keras.layers.Dense(1,activation='linear')(x+input_tensor)
    return x

In [0]:
def construct_porto(model,start_date,end_date,n):
    code,X,Y=get_test_data(start_date, end_date)
    Y_predicted=model.predict(X)
    loss_value=tf.keras.losses.MeanSquaredError(Y_predicted,Y)
    stock_pitch=np.argpartition(Y_predicted, -n)[-n:]
    porto_ret=np.nansum(Y[stock_pitch])
    porto_stock=code[stock_pitch]
    return porto_ret,porto_stock

In [0]:
def R_squared(target_y, predicted_y):
    return tf.reduce_sum(tf.square(target_y - predicted_y))/tf.reduce_sum(tf.square(target_y))

In [0]:
@tf.function
def train_window(start,end,val_start,val_end,epoch,optimizer):
  train_dataset=getdata(start,end)
  val_dataset=get_val_data(val_start,val_end)
  summary_writer = tf.summary.create_file_writer(
  '/content/drive/My Drive/ml/training_logs'+ start.strftime("%Y%m%d")+'_'+end.strftime("%Y%m%d")+val_start.strftime("%Y%m%d")+'_'+val_end.strftime("%Y%m%d"))
  checkpoint_dir = '/content/drive/My Drive/ml/training_checkpoints'
  checkpoint_prefix = os.path.join(checkpoint_dir, start.strftime("%Y%m%d-%H%M%S")+end.strftime("%Y%m%d-%H%M%S"))
  for epoch in range(epochs):
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
      with tf.GradientTape() as tape:
        y_batch_predicted = model(x_batch_train, training=True) 
        loss_value = tf.keras.losses.MeanSquaredError(y_batch_train, y_batch_predicted)
      x_batch_val, y_batch_val=val_dataset.take(1)
      val_value=tf.keras.losses.MeanSquaredError(model.predict(x_batch_val),y_batch_val)
      grads = tape.gradient(loss_value, model.trainable_weights)
      optimizer.apply_gradients(zip(grads, model.trainable_weights))
    with summary_writer.as_default():
      tf.summary.scalar('loss_value', loss_value, step=epoch)
      tf.summary.scalar('val_value', val_value, step=epoch)
  checkpoint = tf.train.Checkpoint(optimizer=optimizer,model=model)
  checkpoint.save(file_prefix = checkpoint_prefix)

In [0]:
def test_window(model,test_start,test_end,summary_writer):
  X,Y=get_val_data(test_start,test_end)
  Y_predicted=model.predict(X)
  test_loss=tf.keras.losses.MeanSquaredError(Y_predicted,Y)
  R_Squared=R_squared(Y_predicted,Y)
  with summary_writer.as_default():
      tf.summary.scalar('test_loss', test_loss)
      tf.summary.scalar('R_Squared', R_Squared)

In [0]:
def recursive_train(model,datelist,epoch,optimizer,trainws,valws,n):
  size=trainws+5+valws+5+5
  train_start=0
  train_end=train_start+trainws
  val_start=train_end+5
  val_end=val_start+valws
  test_start=val_end+5
  test_end=test_start+1
  summary_writer_test = tf.summary.create_file_writer(
  '/content/drive/My Drive/ml/testing_logs')
  trade_log={}
  trade_ret={}
  while test_end<len(datelist):
    train_window(datelist[train_start],datelist[train_end],datelist[val_start],datelist[val_end],epoch,optimizer)
    test_window(model,datelist[test_start],datelist[test_end],summary_writer_test)
    ret,porto_stock=construct_porto(model,test_start,test_end,n)
    trade_log[datelist[test_start]]=porto_stock
    trade_ret[datelist[test_start]]=ret
    train_start=5+train_start
    train_end=train_start+trainws
    val_start=train_end+5
    val_end=val_start+valws
    test_start=val_end+5
    test_end=test_start+1
  return trade_log,trade_ret

In [0]:
class portofolio_analysis():
  def _int_(self,log):
    self._log=log
  def turnover(self):
    pass
  def sharpe_ratio(self):
    pass
  def maxdrawdown(self):
    pass
    


In [0]:
model=MyModel()

In [0]:
optimizer=tf.keras.optimizers.Adam()

In [0]:
recursive_train(model,t,10,optimizer,150,10,100)