<a href="https://colab.research.google.com/github/saurabh289/FAC_Stock-price-prediction/blob/main/FAC_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%matplotlib inline

import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import tensorflow as tf

In [2]:
is_ipython = 'inline'  in matplotlib.get_backend()
if is_ipython: from IPython import display

In [3]:
class DQN(nn.Module):
  def __init__(self,input_size):
    super().__init__()
    self.fc1 = nn.Linear(in_features=input_size,out_features=64)
    self.fc2 = nn.Linear(in_features=64,out_features=8)
    self.out = nn.Linear(in_features=8,out_features=3)
  def forward(self,t):
    t = t.flatten(start_dim=1)
    t = F.relu(self.fc1(t))
    t = F.relu(self.fc2(t))
    t = self.out(t)
    return t  


In [4]:
Experience = namedtuple('Experience',('state','action','next_state','reward'))

In [5]:
class ReplayMemory():
  def __init__(self,capacity):
    self.capacity = capacity
    self.memory = []
    self.push_count =0
  def push(self,experience):   
    if len(self.memory)<self.capacity:   #check if it's less than the memory's capacity
      self.memory.append(experience)
    else:
      self.memory[self.push_count%self.capacity] = experience   #store in front of memory
      self.push_count+=1
  def sample(self,batch_size):
    return random.sample(self.memory,batch_size)
  def can_provide_sample(self,batch_size):
   # print(len(self.memory))
    return len(self.memory)>=batch_size  

In [6]:
class EpsilonGreedyStrategy():
  def __init__(self,start,end,decay):
    self.start = start
    self.end = end
    self.decay = decay
  def get_exploration_rate(self,current_step):
    return self.end + (self.start-self.end)*math.exp(-1.*current_step*self.decay)

In [7]:
class Agent():
  def __init__(self,strategy,num_actions,device):
    self.current_step = 0 
    self.strategy = strategy
    self.num_actions = num_actions
    self.device = device 
  def select_action(self,state,policy_net):
    rate = strategy.get_exploration_rate(self.current_step)
    self.current_step +=1
    if rate> random.random():
      action =  random.randrange(self.num_actions)  # explore
      return torch.tensor([action]).to(device)
    else:
      with torch.no_grad():
        #print(state,"f1")
        return policy_net(state).argmax(dim=1).to(device) # exploit

In [8]:

def formatPrice(n):
    return("-Rs." if n<0 else "Rs.")+"{0:.2f}".format(abs(n))
def getStockDataVec():
    vec = []
    lines = open("/content/NFLX.csv","r").read().splitlines()
    for line in lines[1:2267]:
        #print(line)
        #print(float(line.split(",")[4]))
        vec.append(float(line.split(",")[4]))
        #print(vec)
    return vec 
def sigmoid(x):
    return 1/(1+math.exp(-x))

def getState(data, t, n):
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i + 1] - block[i]))
    return np.array([res])


In [9]:
def plot(values, moving_avg_period):
    plt.figure(2)
    plt.clf()        
    plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(values)
    plt.plot(get_moving_average(moving_avg_period, values))
    plt.pause(0.001)
    if is_ipython: display.clear_output(wait=True)

In [10]:
def get_moving_average(period, values):
    values = torch.tensor(values, dtype=torch.float)
    if len(values) >= period:
        moving_avg = values.unfold(dimension=0, size=period, step=1).mean(dim=1).flatten(start_dim=0)
        moving_avg = torch.cat((torch.zeros(period-1), moving_avg))
        return moving_avg.numpy()
    else:
        moving_avg = torch.zeros(len(values))
        return moving_avg.numpy()

In [11]:
def extract_tensors(experiences):
    # Convert batch of Experiences to Experience of batches
    batch = Experience(*zip(*experiences))

    t1 = torch.cat(batch.state)
   # print(t1)
    t2 = torch.cat(batch.action)
    t3 = torch.cat(batch.reward)
    t4 = torch.cat(batch.next_state)

    return (t1,t2,t3,t4)

In [12]:
class QValues():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    def get_current(policy_net, states, actions):
      return policy_net(states).gather(dim=1, index=actions.unsqueeze(-1))
    def get_next(target_net, next_states):
      final_state_locations = next_states.flatten(start_dim=1).max(dim=1)[0].eq(0).type(torch.bool)
      non_final_state_locations = (final_state_locations == False)
      non_final_states = next_states[non_final_state_locations]
      batch_size = next_states.shape[0]
      values = torch.zeros(batch_size)#.to(QValues.device)
      values[non_final_state_locations] = target_net(non_final_states).max(dim=1)[0].detach()
      return values  

In [13]:
batch_size = 256
gamma = 0.999
eps_start = 1
eps_end = 0.01
eps_decay = 0.001
target_update  = 10
memory_size = 100000
lr = 0.01
num_episodes =30


In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#em = CartPoleEnvManager(device)
strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)
agent = Agent(strategy, 3, device)
memory = ReplayMemory(memory_size)
vec = getStockDataVec()
k=len(vec)
#print(k)

In [20]:

policy_net = DQN(64).to(device)
target_net = DQN(64).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()
optimizer = optim.Adam(params=policy_net.parameters(), lr=lr)
window_size = 64

In [21]:
episode_durations = []
for episode in range(num_episodes):
    print("Episode " + str(episode) + "/" + str(num_episodes))
    state = torch.tensor(getState(vec, 0, window_size + 1))
    total_profit = 0
    invent = []
    #state = getState(vec,)
    max_transaction = 20 
    total_money = 10000
    c_s_h = 0
    c_t_c =0
    for t in range(k-1):
      action = agent.select_action(state.float(), policy_net.float())
      #print(1,action)
      next_state =  torch.tensor(getState(vec, t+1, window_size + 1))
      reward =0
      if action == 0 and c_t_c < max_transaction and total_money>0:
       # print("f2")
        x=total_money/(max_transaction-c_t_c) 
        total_money = total_money - x
        c_t_c +=1
        
        x= x/vec[t]
        c_s_h += x
        a=[]
        a.append(x)
        a.append(vec[t])
        invent.append(a)
         #print("Buy: " + formatPrice(data[t]))
      elif action ==1 and len(invent)>0:
        #print("f1")
        b_p = invent.pop(0)
        reward = vec[t]*b_p[0]-b_p[0]*b_p[1]
        total_money += vec[t]*b_p[0]
        total_profit += reward 
        #print(reward)
        c_s_h = c_s_h - b_p[0]
        c_t_c =0
      elif action==2 and len(invent)>0:
        b_p = invent[0]
        #print(len(invent))
        reward = -vec[t]*b_p[0]+b_p[0]*b_p[1]  
      #action = torch.tensor(action)  
     
      #print(action.shape)
      reward = torch.tensor([reward])
      memory.push(Experience(state, action, next_state, reward))
      state = next_state
      if memory.can_provide_sample(batch_size):
        experiences = memory.sample(batch_size)
        states, actions, rewards, next_states = extract_tensors(experiences)
        current_q_values = QValues.get_current(policy_net, states.float(), actions)
        next_q_values = QValues.get_next(target_net, next_states.float())
        target_q_values = (next_q_values * gamma) + rewards
        loss = F.mse_loss(current_q_values, target_q_values.unsqueeze(1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
      done = True if t == k-2 else False  
      if done:
        # episode_durations.append(t)
        # plot(episode_durations, 100)
         break
    print(total_profit)    
    if episode % target_update == 0:
      target_net.load_state_dict(policy_net.state_dict())

Episode 0/30
1877.3155676000397
Episode 1/30
1392.3287290791218
Episode 2/30
3163.975707737608
Episode 3/30
14.678499317016247
Episode 4/30
64.24857130071604
Episode 5/30
226.6588992914958
Episode 6/30
-211.6311585845266
Episode 7/30
-77.70356553463529
Episode 8/30
50.11834192462993
Episode 9/30
623.1368133885258
Episode 10/30
-80.19480076780104
Episode 11/30
250.26866117377074
Episode 12/30
477.80663825126896
Episode 13/30
172.10420658087287
Episode 14/30
247.81576687548
Episode 15/30
254.41735722176276
Episode 16/30
95.98140454079476
Episode 17/30
26.04102722339678
Episode 18/30
-3.6480487050531565
Episode 19/30
47.5909590457249
Episode 20/30
227.6119867079829
Episode 21/30
224.196354323116
Episode 22/30
291.4910621000385
Episode 23/30
182.37426702450642
Episode 24/30
79.79279927095541
Episode 25/30
183.4150735644502
Episode 26/30
198.63567247024042
Episode 27/30
82.02794510924195
Episode 28/30
116.22244035074294
Episode 29/30
117.95392055076707


In [22]:
print(total_profit)

117.95392055076707


In [23]:
max_transaction = 50
total_money = 10000
c_s_h = 0
c_t_c =0
print(k)
for t in range(k-1):
      action = agent.select_action(state.float(), policy_net.float())
      #print(action)
      next_state =  torch.tensor(getState(vec, t+1, window_size + 1))
      reward =0
      if action == 0 and c_t_c < max_transaction and total_money>0:
       # print("f2")
        x=total_money/(max_transaction-c_t_c) 
        total_money = total_money - x
        c_t_c +=1
        
        x= x/vec[t]
        c_s_h += x
        a=[]
        a.append(x)
        a.append(vec[t])
        invent.append(a)
        print("Buy: " + formatPrice(vec[t]))
      elif action ==1 and len(invent)>0:
        #print("f1")
        b_p = invent.pop(0)
        reward = vec[t]*b_p[0]-b_p[0]*b_p[1]
        total_money += vec[t]*b_p[0]
        total_profit += reward 
        print('profit :', total_profit)
        #print(reward)
        c_s_h = c_s_h - b_p[0]
        c_t_c =0
      elif action ==2 and len(invent)>0:
        b_p = invent[0]
        #print(len(invent))
       # print('f1')
        reward = vec[t]*b_p[0]-b_p[0]*b_p[1]  
      #action = torch.tensor(action)  
     
      #print(action.shape)
      reward = torch.tensor([reward])
      memory.push(Experience(state, action, next_state, reward))
      state = next_state
      
      done = True if t == k-2 else False  
      if done:
        #episode_durations.append(timestep)
        #plot(episode_durations, 100)
        break
print(total_profit)
print(total_money)

105
Buy: Rs.357.12
profit : 120.55249529593081
Buy: Rs.488.28
profit : 115.41475971767332
Buy: Rs.532.39
profit : 122.32521650130104
Buy: Rs.516.39
Buy: Rs.518.02
Buy: Rs.508.05
profit : 131.24871789029905
profit : 145.65212977116906
profit : 160.81080156459987
Buy: Rs.502.81
profit : 170.50192939770662
170.50192939770662
10052.548008846934
