In [None]:
from collections import OrderedDict, deque
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
def analyze_signals(transaction_history):
    buy, sell, hold = 0, 0, 0
    for _, _, a in transaction_history:
        if a == 0:
            hold += 1
        if a == 1:
            buy += 1
        if a == 2:
            sell += 1
    total = hold + buy + sell
    print('hold {}%, buy {}%, sell {}%'.format(hold/total, buy/total, sell/total))

In [None]:
start = datetime.datetime(2014, 1, 1)
end = datetime.datetime(2018, 1, 1)
train_df, test_df = get_stock_data('AAPL', start, end, 0.8)

train_df = create_df(train_df, 3)

train_data = np.array(train_df[['norm_adj_close', 'norm_bb_width', 'norm_close_sma_ratio']])

In [None]:
class Trader1:
    
    def __init__(self, act_size, state_size, is_trained, model_path=None):
        self.state_size = state_size
        self.act_size = act_size # 3
        self.history = deque(maxlen=1000) # historical stock data <state, action, reward, new state>
        self.is_trained = is_trained
        self.bought_history = []
        self.transactions = []
        
        self.alpha = 0.9
        self.alpha_min = 0.1
        self.alpha_decay = 0.99
        self.gamma = 0.95
        
        if self.is_trained:
            self.deep_net = torch.load(model_path)
        else:
            self.deep_net = self.model()
    
    def model(self):
        deep_net = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(self.state_size, 64)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(64, 32)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(32, 8)),
            ('relu3', nn.ReLU()),
            ('output', nn.Linear(8, self.act_size))
        ]))
        self.deep_net = deep_net
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.deep_net.parameters(), lr=0.002)
        return self.deep_net
    
    def act(self, state):
        state = torch.tensor(state).float()
        if not self.is_trained:# and 
            if np.random.rand() < self.alpha:
                #print('not trained random walker')
                return np.random.randint(0, self.act_size)
            else:
                #print('not trained deep q learning')
                qs = self.deep_net.forward(state)
                return np.argmax(action)
        else:
            qs = self.deep_net.forward(state)
            return np.argmax(action)
 
    
    def replay(self, batch_size):
        mini_batch = []
        random_indices = np.random.choice(range(0, len(self.history)), batch_size, replace=False)
        for i in random_indices: #range(len(self.history)-batch_size-1, len(self.history)-1):
            mini_batch.append(self.history[i])
        
        for state, action, reward, next_state in mini_batch:
            self.optimizer.zero_grad()
            # print('action{}, max future return'.format(action, torch.max(self.deep_net.forward(torch.tensor(next_state).float()).data)))
#             with torch.no_grad():
#                 max_future = self.deep_net.forward(torch.tensor(next_state).float()).numpy().max()
            max_future, _ = torch.max(self.deep_net.forward(torch.tensor(next_state).float()),0)
                #print(max_future)
            q_target_action = reward + self.gamma * max_future
            #print('max return for action {}'.format(q_target_action))
            q_pred = self.deep_net.forward(torch.tensor(state).float())
            #print('pred {} {}'.format(q_pred, q_pred.data[0]))
            q_target = q_pred.clone()
            q_target.data[action] = q_target_action
            #print('updated target {}'.format(q_target.data))
            
            self.loss = self.criterion(q_pred, q_target.detach())
            #print(q_pred, q_target)
            #print('loss {}'.format(loss.item()))
            
            
            self.loss.backward(retain_graph=True)
            
            self.optimizer.step()
       # print(self.deep_net.output.weight)
        if self.alpha >= self.alpha_min:
            self.alpha *= self.alpha_decay
        

In [None]:
trader = Trader1(act_size=3, state_size=4, is_trained=False)

cntr = 0
epochs = 2000
batch_size = 10
num_shares = 0
returns=[0]

for e in range(500):
    num_shares = 0
    returns=[0]
    trader.transactions = []
    trader.history.clear()
    trader.bought_history = []
    for i, state in enumerate(train_data):
        current_price = state[0]

        try:
            next_state = train_data[i+1]
        except:
            break
        if num_shares > 0:
            # state = np.append(state, 0)
            returns.append(get_return_since_entry(trader.bought_history, current_price))
        else:
            # state = np.append(state, 1)
            returns.append(returns[-1])
        state = np.append(state, num_shares)
        action = trader.act(state)
        trader.transactions.append((i, current_price, action))

        #print('action {}', action)

        if action == 0: #hold
            if num_shares > 0:
                previous_price = train_data[i-1][0]
                reward = current_price-previous_price
            else:
                reward = 0
            
        if action == 1: # buy
            reward = 0
            trader.bought_history.append(current_price)
            num_shares += 1
        if action == 2:
            if num_shares > 0:
                reward = current_price - trader.bought_history[0]
               # print('sell {}'.format(trader.bought_history[0]))
                #print('reward', reward)
                trader.bought_history.pop(0)
                num_shares -= 1
            else:
                reward = -100

        # update the state of next_state, hold shares or not
#         if num_shares > 0:
#             next_state = np.append(next_state, 0)
#         else:
#             next_state = np.append(next_state, 1)
        next_state = np.append(next_state, num_shares)
        trader.history.append([state, action, reward, next_state])
        #print(len(trader.history))
        if len(trader.history) > batch_size:
            trader.replay(batch_size)  
            
    if (e+1) % 1 == 0:
        model_name = 'model_epoch_trial'+str(e+1)  + '.pth'
        torch.save(trader.deep_net.state_dict(), model_name)
        #print(trader.deep_net.output.weight)
        print('Trained {} epochs '.format(e+1))
        get_invested_capital(trader.transactions, returns)
        analyze_signals(trader.transactions)



In [None]:
model = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(4, 64)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(64, 32)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(32, 8)),
            ('relu3', nn.ReLU()),
            ('output', nn.Linear(8, 3))
        ]))
model.load_state_dict(torch.load('model_epoch_trial40.pth'))
b=model.fc1.weight

model_1 = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(4, 64)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(64, 32)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(32, 8)),
            ('relu3', nn.ReLU()),
            ('output', nn.Linear(8, 3))
        ]))
model_1.load_state_dict(torch.load('model_epoch_trial44.pth'))
a=model_1.fc1.weight

In [None]:
a==b

In [None]:
cntr = 0

# returns = [0]
num_shares = 0
# losses = []
# #trader.deep_net.eval()
test_transactions = []
returns = [0]
bought_history = []
model.eval()
with torch.no_grad():
    for i, state in enumerate(train_data):
        current_price = state[0]
        try:
            next_state = train_data[i+1]
        except:
            print('End of data!')
            break

        if len(bought_history) > 0:
            returns.append(get_return_since_entry(bought_history, current_price)) 
        else:
            returns.append(returns[-1])
        #print('return since entry {}'.format(returns[-1]))

        state = np.append(state, num_shares)
#         if num_shares > 0:
#             state = np.append(state, 0)

#         else:
#             state = np.append(state, 1)
        # print(state)
        state = torch.tensor(state).float()
        qs = model(state)
        action = np.argmax(qs.numpy())
        print('action', action, 'num shares', num_shares, state,qs)

        if action == 1:
            num_shares += 1
            bought_history.append(current_price)
        if action == 2:
            num_shares -= 1
        test_transactions.append((i, current_price, action))
        #print(num_shares, trader.deep_net.forward(torch.tensor(state).float()))


In [None]:
analyze_signals(test_transactions)

In [None]:
visualize_resualts(test_transactions, returns)