In [10]:
import time
import numpy as np
import pandas as pd
from glob import glob
from data_read import check_time_range
from talib import RSI, ATR

In [2]:
def norm_rsi(rsi_array):
    rsi_array[rsi_array < 20] = 20
    rsi_array[rsi_array >= 80] = 79
    rsi_array = rsi_array - 20
    return rsi_array

In [26]:

#oneMin, fiveMin, thirtyMin, hour
data_btc = 'G:/Code/Projects/Quant Mod/Robots/Bittrex/Data/USDT-BTC/fiveMin/' 
data_path = 'G:/Code/Projects/Quant Mod/Robots/Bittrex/Data/BTC-LTC/fiveMin/' 
buy_correction = 1.004
sell_correction = 0.996
deposit_percent = 0

fn = glob(data_btc+'*csv')[0]
df_btc = pd.read_csv(fn);

fn = glob(data_path+'*csv')[0]
df_alt = pd.read_csv(fn);

df = pd.merge(df_alt, df_btc, on='T', how = 'inner')
df.drop_duplicates(subset = 'T')

print("Rows: ", len(df))
print("Start time:", df.iloc[0]['T'][:10])
tr = check_time_range(df)
tr = round(tr/3600/24)
print("Дней:", tr)


#Initialize table with all zeros
n_states = 60 * 60 * 60  * 60 * 2
n_actions = 3
# Actions 0-nothing, 1-buy, 2 -sell 

Q = np.zeros([n_states,3])
state_space = np.zeros([60,60,60,60,2])
state_space[:] = -1

#create lists to contain total rewards and steps per episode
#jList = []
rList = []


close_price = df["C_x"].as_matrix()
daily_returns = close_price.copy()
daily_returns[0] = 0
daily_returns[1:] = (close_price[1:]/close_price[:-1]) - 1

rsi4_win = 14
rsi1 = np.round(RSI(close_price * 100, 5)).astype(int)
rsi1 = norm_rsi(rsi1)
rsi2 = np.round(RSI(close_price * 100, 7)).astype(int)
rsi2 = norm_rsi(rsi2)
rsi3 = np.round(RSI(close_price * 100, 10)).astype(int)
rsi3 = norm_rsi(rsi3)
rsi4 = np.round(RSI(close_price * 100, rsi4_win)).astype(int)
rsi4 = norm_rsi(rsi4)


Rows:  30502
Start time: 2017-09-30
Дней: 84


In [8]:

market_position = 0
state_index = 0

def cur_state(t): 
    global state_index
    state = state_space[rsi1[t], rsi2[t], rsi3[t], rsi4[t], market_position]
    if state == -1:
        state = state_index
        state_space[rsi1[t], rsi2[t], rsi3[t], rsi4[t], market_position] = state
        state_index +=1
    return int(state)

def env_step(j, a):
    global Q
    global market_position
    
    if market_position == 0 and a == 1:
         market_position = 1
    elif market_position == 1 and a == 2:
         market_position = 0
    
    r = daily_returns[j]
    if market_position == 0:
        r = -r
    s = cur_state(j)
    return(s,r)

In [11]:
# Set learning parameters
lr = .7
y = .96
num_episodes = 300
train = 0.7

start = rsi4_win
train_end = df.shape[0] * train

start_time = time.time()
for i in range(num_episodes):
 
    rAll = 0
    j = start
    #The Q-Table learning algorithm
    while j < train_end:
        #Choose an action by greedily (with noise) picking from Q table
        s = cur_state(j)
        if market_position == 0:
            Q[s, 2] = -np.inf #can't sell at this state
        elif market_position == 1:
            Q[s, 1] = -np.inf #can't buy at this state
      
        a = np.argmax(Q[s,:] + np.random.randn(1, n_actions)*(1./(i+1)))
        #Get new state and reward from environment
        j+=1
        s1,r = env_step(j,  a)
        #Update Q-Table with new knowledge
        Q[s,a] = Q[s,a] + lr*(r + y*np.max(Q[s1,:]) - Q[s,a])
        rAll += r
        s = s1
    print(i, rAll)
    #rList.append(rAll)

print("Time taken = {0:.1f} sec".format(time.time() - start_time))    

0 0.187417571173
1 -0.128944352962
2 0.761951217351
3 0.649637564181
4 0.697645764063
5 0.555480051656
6 -0.44706379687
7 0.894857853255
8 1.03269876496
9 1.35244044521
10 0.878620579315
11 0.730690872398
12 1.85828089601
13 0.0525157605633
14 1.47055979449
15 2.62593030979
16 1.53348901845
17 2.28523618728
18 1.51383733908
19 1.51857925305
20 1.91810955088
21 1.61147891608
22 2.51339123705
23 2.57359872656
24 3.20068450365
25 2.29203953871
26 2.94096480491
27 2.9917647366
28 3.31633155215
29 2.9959972047
30 4.1622560749
31 3.74716728336
32 3.44613895073
33 3.37294640434
34 3.71185948096
35 3.91637446794
36 2.62173589055
37 2.86890139037
38 3.85127556979
39 4.05207932761
40 3.57960160831
41 3.53635621036
42 3.68826227157
43 4.15725368455
44 4.49375801914
45 3.94876272264
46 5.09560224959
47 4.37649258986
48 4.57301662252
49 5.51169794676
50 5.18716980335
51 5.49074270283
52 4.84843907804
53 4.67555072838
54 4.78160869122
55 5.91458197904
56 5.09053239937
57 4.70094009648
58 5.179451694

In [None]:
market_position = 0
start_capital = 0.1

cur_capital = start_capital
test_start = int(train_end) + 1
test_end = df.shape[0]
for i in range(test_start, test_end):
    s = cur_state(i)
    a = np.argmax(Q[s,:])
    if a == 1:
        price = close_price[i] * buy_correction
        numShares = cur_capital / price * (1-commision)       
        buy_sum = cur_capital 
        curCapital = 0

        marketPosition = 1
        entryPrice = price            
        #df_trades.loc[len(df_trades)] = [myDate[i], "Long entry", price, cumProfit, curCapital, rsiVal]
        num_trades += 1
    if a == 2:
        price = close_price[i] * sell_correction
        profit = price * numShares * (1-commision)               

        pure_profit = profit - buy_sum
        cumProfit += pure_profit
        deposit_sum = max(0, pure_profit) * deposit_percent
        deposit += deposit_sum
        curCapital = profit - deposit_sum
        marketPosition = 0 

