In [1]:
from torch import nn, optim
import torch
from collections import namedtuple
import random
import pandas
from itertools import count
import math
from torch.autograd import Variable


In [2]:
data = pandas.DataFrame.from_csv('final.csv')
data = data.values
prices = pandas.DataFrame.from_csv('xmrbtc.csv')


In [3]:
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

In [4]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))
class ReplayMemory():
    
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0
        
    def push(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity
        
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)
            

In [5]:
class TNetwork(nn.Module):
    def __init__(self, input_size, hidden_size=256, output_size=3):
        super(TNetwork, self).__init__()
        self.full1 =  nn.Linear(input_size + hidden_size, hidden_size)
        self.full2 =  nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.Linear(output_size, 3)
        self.hidden_size = hidden_size
        
    def forward(self, input, hidden):
        print(input)
        print(hidden)
        combined = torch.cat((input, hidden),1)
        hidden = self.full1(combined)
        output = self.full2(combined)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return Variable(torch.zeros(1, self.hidden_size)).type(FloatTensor)


In [6]:
class DataIter:
    def __init__(self, data):
        self.data = data
        self.index = 0
        
    def __iter__(self):
        return self
    
    def next(self):
        if self.index >= len(self.data):
            self.index = 0
        d = self.data[self.index]
        self.index += 1
        return d
    
    def current(self):
        return self.data[self.index]
    
    def index(self):
        return self.index
    
    def has_next(self):
        return self.index < len(self.data)

it = DataIter(data)
def get_next_data():
    return torch.from_numpy(it.next()).unsqueeze(0).type(Tensor)

class MoneySimulator:
    def __init__(self):
        self.monero = 0.1
        self.btcMax = self.monero * prices['close'].iloc[0]
        self.btc = 0
        
    def step(self,action):
        if action == 1:
            return (0, False)
        
        if action == 0 and self.monero == 0:
            return  (0, False)
        
        if action == 2 and self.btc == 0:
            return  (0, False)
        
        if action == 0:
            self.monero = self.monero / 2
            xmr_p = self.monero
            self.btc = xmr_p * prices['close'].iloc[it.index + 1]
            
            current = self.monero / prices['close'].iloc[it.index + 1] + self.btc
            
            self.btcMax = self.btcMax if current < self.btcMax else current
            return self.calculate_res()
        
        if action == 2:
            self.btc = self.btc / 2
            btc = self.btc
            self.monero = btc / prices['close'].iloc[it.index + 1]
            
            current = self.monero / prices['close'].iloc[it.index + 1] + self.btc
            
            self.btcMax = self.btcMax if current < self.btcMax else current
            return self.calculate_res()
        
    def calculate_res(self):
        def calculate_at_moment(price):
            btc = self.monero / price
            return (btc + self.btc) - self.btcMax
        
        return (calculate_at_moment(prices['close'].iloc[it.index + 2]), False)
        
        
sim = MoneySimulator()

In [7]:
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200

model = TNetwork(len(data[0]))

if use_cuda:
    model.cuda()
    
optimizer = optim.RMSprop(model.parameters())
memory = ReplayMemory(10000)

steps_done = 0
    
def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        output, hidden = model(
            Variable(state, volatile=True).type(FloatTensor),model.initHidden())
        return data.data.max(1)[1].view(1, 1)
    else:
        return LongTensor([[random.randrange(3)]])



In [8]:
def optimize_model():
    global last_sync
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    non_final_mask = ByteTensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)))

    # We don't want to backprop through the expected action values and volatile
    # will save us on temporarily changing the model parameters'
    # requires_grad to False!
    non_final_next_states = Variable(torch.cat([s for s in batch.next_state
                                                if s is not None]),
                                     volatile=True)
    state_batch = Variable(torch.cat(batch.state))
    action_batch = Variable(torch.cat(batch.action))
    reward_batch = Variable(torch.cat(batch.reward))

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken
    state_action_values = model(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    next_state_values = Variable(torch.zeros(BATCH_SIZE).type(Tensor))
    next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]
    # Now, we don't want to mess up the loss with a volatile flag, so let's
    # clear it. After this, we'll just end up with a Variable that has
    # requires_grad=False
    next_state_values.volatile = False
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in model.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

In [9]:
num_episodes = 10
for i_episode in range(num_episodes):
    # Initialize the environment and state
    last_point = get_next_data()
    current_point = get_next_data()
    state = current_point - last_point
    for t in count():
        # Select and perform an action
        action = select_action(state)
        print(action)
        reward, done = sim.step(action[0, 0])
        reward = Tensor([reward])

        # Observe new state
        last_point = current_point
        current_point = get_next_data()
        if not done:
            next_state = current_point - last_point
        else:
            next_state = None

        # Store the transition in memory
        memory.push(state, action, next_state, reward)

        # Move to the next state
        state = next_state

        # Perform one step of the optimization (on the target network)
        optimize_model()
        if done:
            episode_durations.append(t + 1)
            plot_durations()
            break

print('Complete')



 2
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 2
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 1
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 0
[torch.cuda.LongTensor of size 1x1 (GPU 0)]


 2
[torch.cuda.Long

TypeError: 'NoneType' object is not iterable