# Training Policy

##### This module deals with training any policy and serialize and store the model

In [86]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
import pandas as pd
import numpy as np
import pickle
from my_packages.policy_train import PolicyTrain
from my_packages.environment import StockEnv

In [88]:
#this contains the path where the policy thaT is to be trained is stored
policy_path = "../model/model6inputs"
#now the path to data on which the policy would be trained
data_path = "./data/AAPL-jul-24.csv"

In [89]:
file = open(policy_path , 'rb')
policy = pickle.load(file)
file.close()

In [90]:
# the feature that are required for this model is :
policy.data_features

['c-o/o', 'h-l/o', 'h-o/o', 'o-l/o', 'c', 'vol']

In [91]:
policy.layers[0].W[0,0:20]

array([ 0.01644218, -0.0089881 , -0.00302969, -0.00428415,  0.01272927,
        0.00295105,  0.00127162,  0.02243782,  0.01062767,  0.00028178,
        0.00118358,  0.00126384,  0.01411984,  0.02218069,  0.00496575,
       -0.00668175, -0.0098788 , -0.00528056, -0.01223937,  0.02107784])

In [92]:
df = pd.read_csv(data_path)
df.head()

Unnamed: 0,c,h,l,n,o,t,v,vw
0,211.84,211.89,211.7,168,211.89,2024-07-01T08:00:00Z,2488,211.801805
1,211.68,211.68,211.68,44,211.68,2024-07-01T08:01:00Z,800,211.68
2,211.83,211.83,211.78,37,211.78,2024-07-01T08:02:00Z,1587,211.793333
3,211.79,211.88,211.79,51,211.83,2024-07-01T08:03:00Z,4499,211.80494
4,211.71,211.71,211.71,68,211.71,2024-07-01T08:04:00Z,1677,211.71


for scalling purposes we want to scale the ratios multipling them by 100 , the close price is scaled down by 1e3 and the volume will be scaled down by a fraction of 1e7

In [93]:
training_df = pd.DataFrame()
training_df.insert(0,'co',((df['c'] - df['o'] )*10/ df['o']), allow_duplicates=True)
training_df.insert(1,'hl',((df['h'] - df['l'] )*10/ df['o']), allow_duplicates=True)
training_df.insert(2,'ho',((df['h'] - df['o'] )*10/ df['o']), allow_duplicates=True)
training_df.insert(3,'ol',((df['o'] - df['l'] )*10/ df['o']), allow_duplicates=True)
training_df.insert(4,'c',(df['c']/1000),allow_duplicates=True)
training_df.insert(5,'vol',(df['v']/10000000), allow_duplicates=True)
training_df.head()

Unnamed: 0,co,hl,ho,ol,c,vol
0,-0.00236,0.008967,0.0,0.008967,0.21184,0.000249
1,0.0,0.0,0.0,0.0,0.21168,8e-05
2,0.002361,0.002361,0.002361,0.0,0.21183,0.000159
3,-0.001888,0.004249,0.00236,0.001888,0.21179,0.00045
4,0.0,0.0,0.0,0.0,0.21171,0.000168


In [94]:
print("max : ",training_df.max(axis=0))
print("min : ",training_df.min(axis=0))
print("avg : ",training_df.mean(axis=0))



max :  co     0.119515
hl     0.196283
ho     0.195413
ol     0.153042
c      0.236927
vol    0.917720
dtype: float64
min :  co    -0.150898
hl     0.000000
ho     0.000000
ol     0.000000
c      0.210500
vol    0.000011
dtype: float64
avg :  co     0.000019
hl     0.006092
ho     0.003025
ol     0.003067
c      0.227053
vol    0.007326
dtype: float64


In [95]:
X = training_df.to_numpy()
X.shape

(10000, 6)

In [96]:
env = StockEnv()
env.set_env_data(X)

In [97]:
training_tool = PolicyTrain(policy,env,epsilon=0.5,gamma=0.9)

In [98]:
training_tool.episode_train(batch_size=100)
policy.training_log.append(data_path)

In [99]:
prediction = policy.predict(X)
uniq = np.unique(np.argmax(prediction , axis=1), return_counts=True, equal_nan=False)
uniq

(array([1, 2]), array([1498, 8502]))

In [100]:
policy.training_log

['./data/AAPL-may-24.csv', './data/AAPL-jun-24.csv', './data/AAPL-jul-24.csv']

In [101]:
policy.layers[0].W[0,0:20]

array([ 0.01644218, -0.0089881 , -0.00302969, -0.00428415,  0.01272927,
        0.00295105,  0.00127162,  0.02243782,  0.01062767,  0.00028178,
        0.00118358,  0.00126384,  0.01411984,  0.02218069,  0.00496575,
       -0.00668175, -0.0098788 , -0.00528056, -0.01223937,  0.02107784])

In [102]:
file = open(policy_path,'wb')
pickle.dump(policy,file)
file.close()