# Training Policy

##### This module deals with training any policy and serialize and store the model

In [68]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [69]:
import pandas as pd
import numpy as np
import pickle
import requests
import datetime
import pytz
from my_packages.policy_train import PolicyTrain
from my_packages.environment import StockEnv

In [70]:
from dotenv import dotenv_values
env_vars = dotenv_values('.env')  # Load environment variables from .env file

In [None]:
#data to train the model for a range of dates
def train_model(policy, start_date, end_date, company, headers, get_log=False): 
    while(start_date < end_date) :
        #conversion to UTC
        utc_start_time = pytz.utc.localize(start_date)
        #conversion to rc3339 format as accepted by ALPACA API
        rc3339_start = utc_start_time.isoformat()

        #random limit for the number of minutes to train E [50,100]
        limit = np.random.randint(50, 100)

        utc_end_time = utc_start_time + datetime.timedelta(minutes=limit)
        rc3339_end = utc_end_time.isoformat()
        print(f"Training from {rc3339_start} to {rc3339_end}")

        #ALPACA API call to get the data
        url = f"https://data.alpaca.markets/v2/stocks/bars?symbols={company}&timeframe=1Min&start={rc3339_start[:19]+'Z'}&end={rc3339_end[:19]+'Z'}&limit={limit}&adjustment=raw&feed=sip&sort=asc"
        response = requests.get(url, headers=headers)
        json_data = response.json()

        #dataframe creation
        df = pd.DataFrame(json_data['bars'][f'{company}'])
        #data preprocessing
        training_df = pd.DataFrame()
        training_df.insert(0,'co',((df['c'] - df['o'] )*10/ df['o']), allow_duplicates=True)
        training_df.insert(1,'hl',((df['h'] - df['l'] )*10/ df['o']), allow_duplicates=True)
        training_df.insert(2,'ho',((df['h'] - df['o'] )*10/ df['o']), allow_duplicates=True)
        training_df.insert(3,'ol',((df['o'] - df['l'] )*10/ df['o']), allow_duplicates=True)
        training_df.insert(4,'c',(df['c']/1000),allow_duplicates=True)
        training_df.insert(5,'vol',(df['v']/10000000), allow_duplicates=True)

        X = training_df.to_numpy()# numpy array prepared of corresponding dataframe

        #environment setup
        env = StockEnv()
        env.set_env_data(X)#set the data to the environment
        
        #training tool setup
        training_tool = PolicyTrain(policy,env,epsilon=0.9,gamma=0.9)
        #training tool training
        training_tool.episode_train(batch_size=400, get_log=get_log)
        #updating start_date for the next iteration
        start_date += datetime.timedelta(minutes=limit) 


In [72]:
#this contains the path where the policy thaT is to be trained is stored
policy_path = "../model/model6inputs"

In [73]:
file = open(policy_path , 'rb')
policy = pickle.load(file)
file.close()

In [79]:
start_date = datetime.datetime(2024, 1, 3, 9, 30, 0)
end_date = datetime.datetime(2024, 1, 3, 11, 0, 0)
headers = {
    "accept": "application/json",
    "APCA-API-KEY-ID": env_vars["ALPACA_API_KEY"],
    "APCA-API-SECRET-KEY": env_vars["ALPACA_API_SECRET"],
}

In [80]:
train_model(policy, start_date, end_date, 'TSLA', headers, get_log=False)

Training from 2024-01-03T09:30:00+00:00 to 2024-01-03T11:07:00+00:00


In [81]:
policy.layers[0].W[0,0:20]

array([ 0.86787786,  0.89229731,  0.87624142,  1.46541578,  1.47182501,
       -0.00813568,  0.87091785,  0.8954603 ,  0.0061263 ,  0.88306299,
        0.00731507,  0.89757116,  1.50003431,  1.49236362,  0.86874734,
        0.88679481,  1.46824356,  1.46584178,  0.87759466,  0.91752057])

In [77]:
file = open(policy_path,'wb')
pickle.dump(policy,file)
file.close()