In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas
from collections import namedtuple
import torch.multiprocessing as mp
from torch.autograd import Variable

In [2]:
data = pandas.DataFrame.from_csv('final.csv')
data = data.values
prices = pandas.DataFrame.from_csv('xmrbtc.csv')

In [3]:
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

In [4]:
import math

import torch
import torch.optim as optim


class SharedAdam(optim.Adam):
    """Implements Adam algorithm with shared states.
    """

    def __init__(self,
                 params,
                 lr=1e-3,
                 betas=(0.9, 0.999),
                 eps=1e-8,
                 weight_decay=0):
        super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay)

        for group in self.param_groups:
            for p in group['params']:
                state = self.state[p]
                state['step'] = torch.zeros(1)
                state['exp_avg'] = p.data.new().resize_as_(p.data).zero_()
                state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_()

    def share_memory(self):
        for group in self.param_groups:
            for p in group['params']:
                state = self.state[p]
                state['step'].share_memory_()
                state['exp_avg'].share_memory_()
                state['exp_avg_sq'].share_memory_()

    def step(self, closure=None):
        """Performs a single optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                state = self.state[p]

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                if group['weight_decay'] != 0:
                    grad = grad.add(group['weight_decay'], p.data)

                # Decay the first and second moment running average coefficient
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)

                denom = exp_avg_sq.sqrt().add_(group['eps'])

                bias_correction1 = 1 - beta1**state['step'][0]
                bias_correction2 = 1 - beta2**state['step'][0]
                step_size = group['lr'] * math.sqrt(
                    bias_correction2) / bias_correction1

                p.data.addcdiv_(-step_size, exp_avg, denom)

        return loss

In [5]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))
class ReplayMemory():
    
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0
        
    def push(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity
        
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)

In [6]:
class DataIter:
    def __init__(self, data):
        self.data = data
        self.index = 0
        
    def __iter__(self):
        return self
    
    def next(self):
        if self.index >= len(self.data):
            self.index = 0
        d = self.data[self.index]
        self.index += 1
        return d
    
    def current(self):
        return self.data[self.index]
    
    def index(self):
        return self.index
    
    def has_next(self):
        return self.index < len(self.data)
    
    def reset(self):
        self.index = 0

it = DataIter(data)
def get_next_data():
    return torch.from_numpy(it.next()).unsqueeze(0).type(Tensor)

class MoneySimulator:
    def __init__(self):
        self.reset()
        it.reset()
        
    def reset(self):
        self.monero = 0.1
        self.btcMax = self.monero * prices['close'].iloc[0]
        self.btc = 0
        
    def step(self,action):
        if action == 1:
            return (0, it.has_next())
        
        if action == 0 and self.monero == 0:
            return  (0, it.has_next())
        
        if action == 2 and self.btc == 0:
            return  (0, it.has_next())
        
        if action == 0:
            self.monero = self.monero / 2
            xmr_p = self.monero
            self.btc = xmr_p * prices['close'].iloc[it.index + 1]
            
            current = self.monero / prices['close'].iloc[it.index + 1] + self.btc
            
            self.btcMax = self.btcMax if current < self.btcMax else current
            return self.calculate_res()
        
        if action == 2:
            self.btc = self.btc / 2
            btc = self.btc
            self.monero = btc / prices['close'].iloc[it.index + 1]
            
            current = self.monero / prices['close'].iloc[it.index + 1] + self.btc
            
            self.btcMax = self.btcMax if current < self.btcMax else current
            return self.calculate_res()
        
    def calculate_res(self):
        def calculate_at_moment(price):
            btc = self.monero / price
            return (btc + self.btc) - self.btcMax
        
        return (calculate_at_moment(prices['close'].iloc[it.index + 2]), it.has_next())
        
        
sim = MoneySimulator()

In [7]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        weight_shape = list(m.weight.data.size())
        fan_in = weight_shape[1]
        fan_out = weight_shape[0]
        w_bound = np.sqrt(6. / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        m.bias.data.fill_(0)
        
def normalized_columns_initializer(weights, std=1.0):
    out = torch.randn(weights.size())
    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True))
    return out

class ACNetwork(nn.Module):
    def __init__(self, input_size, hidden_size = 256, output_size = 3):
        super(ACNetwork, self).__init__()
        self.full1 =  nn.Linear(input_size, hidden_size)
        self.full2 =  nn.Linear(hidden_size, hidden_size)
        
        self.lstm = nn.LSTMCell(hidden_size, hidden_size)
        
        self.critic_linear = nn.Linear(256, 1)
        self.actor_linear = nn.Linear(256, 3)
        
        self.actor_linear.weight.data = normalized_columns_initializer(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = normalized_columns_initializer(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)
        
        self.apply(weights_init)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)        
    
    def forward(self, inputs):
        inputs, (hx, cx) = inputs
        x = F.elu(self.full1(inputs))
        x = F.elu(self.full2(x))

#         x = x.view(-1, 32 * 3 * 3)
        hx, cx = self.lstm(x, (hx, cx))
        x = hx

        return self.critic_linear(x), self.actor_linear(x), (hx, cx)

In [8]:
def train(shared_model, optimizer, num_steps = 20):
    model = ACNetwork(len(data[0]))
    
    model.train()
    
    sim.reset()
    
    state = torch.from_numpy(it.next())
    
    done = True
    
    values = []
    log_probs = []
    rewards = []
    entropies = []

    while True:
        model.load_state_dict(shared_model.state_dict())
        
        if done:
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
        else:
            cx = Variable(cx.data)
            hx = Variable(hx.data)
            
        for step in range(num_steps):
            print(state)
            value, logit, (hx, cx) = model((Variable(state.unsqueeze(0)).type(FloatTensor), (hx,cx)))
            
            prob = F.softmax(logit)
            log_prob = F.log_softmax(logit)
            entropy = -(log_prob * prob).sum(1, keepdim=True)
            entropies.append(entropy)

            action = prob.multinomial().data
            log_prob = log_prob.gather(1, Variable(action))
            
            reward, done = sim.step(action.numpy())
            state = it.next()
            values.append(value)
            log_probs.append(log_prob)
            rewards.append(reward)
            
            reward = max(min(reward, 1), -1)

            with lock:
                counter.value += 1

            if done:
                episode_length = 0
                state = sim.reset()
                
            if done:
                break
                
            R = torch.zeros(1, 1)
            if not done:
                value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
                R = value.data
                
            values.append(Variable(R))

            policy_loss = 0
            value_loss = 0
            R = Variable(R)
            gae = torch.zeros(1, 1)
            
            for i in reversed(range(len(rewards))):
                R = args.gamma * R + rewards[i]
                advantage = R - values[i]
                value_loss = value_loss + 0.5 * advantage.pow(2)

                # Generalized Advantage Estimataion
                delta_t = rewards[i] + args.gamma * \
                    values[i + 1].data - values[i].data
                gae = gae * args.gamma * args.tau + delta_t

                policy_loss = policy_loss - \
                    log_probs[i] * Variable(gae) - args.entropy_coef * entropies[i]

                optimizer.zero_grad()

                (policy_loss + args.value_loss_coef * value_loss).backward()
                torch.nn.utils.clip_grad_norm(model.parameters(), args.max_grad_norm)

                ensure_shared_grads(model, shared_model)
                optimizer.step()

    

In [9]:
if __name__ == '__main__':

    
    shared_model = ACNetwork(
        len(data[0]))
    shared_model.share_memory()

    optimizer = SharedAdam(shared_model.parameters())
    optimizer.share_memory()

    processes = []

    counter = mp.Value('i', 0)
    lock = mp.Lock()
    
    num_processes = 4

#     p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter))
#     p.start()
#     processes.append(p)

    for rank in range(0, num_processes):
        p = mp.Process(target=train, args=(shared_model, optimizer))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()


-1.8000e-01
 6.8000e-01
-4.0000e-05
 1.1600e+00
-1.6100e+00
 6.7000e-01
-2.0000e-05
 8.7600e+00
-4.3000e-01
 3.9000e-01
-3.0000e-05
 5.0700e+00
-9.5000e-01
 7.0000e-02
-1.0000e-05
-4.4600e+00
-7.3368e+02
 1.4607e+02
 1.2567e+02
-6.8852e+04
-5.6149e+05
 5.7890e+04
 3.3200e+00
 3.8700e+00
-1.8000e-01
 6.8000e-01
-4.0000e-05
 1.0030e+01
 1.0500e+00
 6.8000e-01
 8.0000e-05
 1.3250e+03
 2.2000e+01
 3.0000e+00
[torch.DoubleTensor of size 34]

-1.8000e-01
 6.8000e-01
-4.0000e-05
 1.1600e+00
-1.6100e+00
 6.7000e-01
-2.0000e-05
 8.7600e+00
-4.3000e-01
 3.9000e-01
-3.0000e-05
 5.0700e+00
-9.5000e-01
 7.0000e-02
-1.0000e-05
-4.4600e+00
-7.3368e+02
 1.4607e+02
 1.2567e+02
-6.8852e+04
-5.6149e+05
 5.7890e+04
 3.3200e+00
 3.8700e+00
-1.8000e-01
 6.8000e-01
-4.0000e-05
 1.0030e+01
 1.0500e+00
 6.8000e-01
 8.0000e-05
 1.3250e+03
 2.2000e+01
 3.0000e+00
[torch.DoubleTensor of size 34]


-1.8000e-01
 6.8000e-01
-4.0000e-05
 1.1600e+00
-1.6100e+00
 6.7000e-01
-2.0000e-05
 8.7600e+00
-4.3000e-01
 3.9000e



None



Process Process-4:


None


Traceback (most recent call last):
Process Process-1:
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


None


  File "<ipython-input-8-37d3bb92b7dd>", line 29, in train
    value, logit, (hx, cx) = model((Variable(state.unsqueeze(0)).type(FloatTensor), (hx,cx)))
AttributeError: 'NoneType' object has no attribute 'unsqueeze'
Traceback (most recent call last):
Process Process-3:
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-8-37d3bb92b7dd>", line 29, in train
    value, logit, (hx, cx) = model((Variable(state.unsqueeze(0)).type(FloatTensor), (hx,cx)))
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py"

None


AttributeError: 'NoneType' object has no attribute 'unsqueeze'
Process Process-2:
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'unsqueeze'
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-8-37d3bb92b7dd>", line 29, in train
    value, logit, (hx, cx) = model((Variable(state.unsqueeze(0)).type(FloatTensor), (hx,cx)))
AttributeError: 'NoneType' object has no attribute 'unsqueeze'
