In [6]:
source("functions/data.R")
source("functions/process.R")
source("functions/simulations.R")

In [7]:
# Define dataframes
ETH <- get_data("ETH",params$start,params$end,params$period,52,FALSE)
XRP <- get_data("XRP",params$start,params$end,params$period,52,FALSE)
XMR <- get_data("XMR",params$start,params$end,params$period,52,FALSE)
LTC <- get_data("LTC",params$start,params$end,params$period,52,FALSE)

high <- cbind(ETH$high,XRP$high,XMR$high,LTC$high)
low <- cbind(ETH$low,XRP$low,XMR$low,LTC$low)
open <- cbind(ETH$open,XRP$open,XMR$open,LTC$open)
close <- cbind(ETH$close,XRP$close,XMR$close,LTC$close)

# Rename all the columns
colnames(high)  <- currency_vec
colnames(low) <- currency_vec
colnames(open) <- currency_vec
colnames(close) <- currency_vec
prices_vec <- list(high,low,open,close)

No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.


In [4]:
# simulation function, the agent randomizes the weight vector at each step
# params: n_hist, number of timesteps in the past for price history
# params: n_episodes, number of episodes in the simulation
simulate_random <- function(n_hist,n_episodes){
    prev_v <- tail(head(close,n_hist-1),1) ## initialize first previous price vector
    returns <- 0

    # simulation
    for(i in 0:n_episodes){

        # generates random action (portfolio vector)
        random_action <- function(n_assets){
        x <- runif(n_assets+1)
        return(x/sum(x))
        }

        # get price dataframes based on current time steps
        hi <- head(high,i+n_hist)
        lo <- head(low,i+n_hist)
        price <- head(close,i+n_hist)

        wt <- random_action(4)
        curr_v <- tail(price,1)
        yt <- getPriceRelativeVec(prev_v,curr_v)
        rt <- getLogReturns(yt,wt)

        print(paste0("episode ",i))
        print(cat("wt: ",wt))
        print(cat("yt: ",yt))
        print(cat("rt: ",rt))
        print("=================================")

        prev_v <- curr_v # set previous price as current price
        returns <- update_returns(i+1,returns,yt,wt)
    }

    print(paste("average returns for this simulation: ", returns))
}

simulate_random(10,100)

[1] "episode 0"
wt:  0.06604879 0.3147359 0.09687232 0.3117138 0.2106292NULL
yt:  1 0.9725534 0.9732774 0.9997783 0.9930833NULL
rt:  -0.01283511NULL
[1] "episode 1"
wt:  0.008491789 0.2699929 0.159331 0.4963353 0.0658489NULL
yt:  1 1.000992 0.9868102 0.9871119 0.9976934NULL
rt:  -0.008417682NULL
[1] "episode 2"
wt:  0.2088918 0.1473178 0.2365855 0.1004824 0.3067225NULL
yt:  1 1.001162 1.01773 1.025921 1.133885NULL
rt:  0.04691796NULL
[1] "episode 3"
wt:  0.1105131 0.2240811 0.1400677 0.3071685 0.2181695NULL
yt:  1 1.003188 1.007237 0.9957968 0.9659237NULL
rt:  -0.007022002NULL
[1] "episode 4"
wt:  0.3742001 0.07593277 0.1920006 0.235147 0.1227196NULL
yt:  1 1.028732 1.032464 1.022991 1.040978NULL
rt:  0.01867454NULL
[1] "episode 5"
wt:  0.2670693 0.1707453 0.2447113 0.2192205 0.0982536NULL
yt:  1 0.9636063 0.9863402 1.017593 1.136862NULL
rt:  0.007717218NULL
[1] "episode 6"
wt:  0.1869135 0.2576365 0.28171 0.1543063 0.1194337NULL
yt:  1 1.014833 1.07029 1.013069 0.9902893NULL
rt:  0.02

In [75]:
# Function that gives actions table for n samples and n_curren currencies
get_actions_table <- function(n_samples,n_curren,state_vec){
    action_mat <- matrix(NA,nrow=n_samples,ncol=n_curren+1)
    for(i in 1:n_samples)
        action_mat[i,] <- random_action(n_curren)
    return(action_mat)
}

# Simulation function where the agent randomly samples actions at each step and observes next rewards
simulate_samples <- function(n_samples,n_episodes){
    price <- head(close,2) # initializes the price dataframe as the first 2 price vectors
    st <- getPriceRelativeVec(price[1,],price[2,]) # get first price relative vector (first state)
    prev_v <- tail(price,1) # initializes first price vector v
    training_mat <- matrix(nrow=1,ncol=11) # initialize training matrix for training data
    
    for(i in 0:n_episodes){
        # get price dataframe of current time step
        price <- head(close,i+1)
        
        # get the current v
        curr_v <- tail(price,1)
        
        # get price change
        yt <- getPriceRelativeVec(prev_v,curr_v)
        
        # sample actions
        action_mat <- get_actions_table(n_samples,4)
        
        # get rewards for each sampled action
        reward_vec <- c()
        for(j in 1:n_samples)
            reward_vec <- c(reward_vec,getLogReturns(yt,action_mat[j,]))
        
        # append rewards to matrix
        action_mat <- cbind(action_mat,reward_vec)
        
        # Add state columns to matrix
        for(val in rev(st))
            action_mat <- cbind(rep(val,n_samples),action_mat)
        
        # Concatenate matrix to training data
        training_mat <- rbind(training_mat,action_mat)
        
        print(paste0("episode ",i))
        print(action_mat)
        print(cat("st: ",st))
        print(cat("yt: ",yt))
        print("=================================")
        
        # update price vector
        prev_v <- curr_v
        
        # update state vector
        st <- yt
    }
    return(training_mat[-1,])
}

simulate_samples(10,10)

[1] "episode 0"
                                                                         
 [1,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.12397069430
 [2,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.14729356183
 [3,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.08796701703
 [4,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.23977889529
 [5,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.07455263447
 [6,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.29683613198
 [7,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.11047558351
 [8,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.04067815412
 [9,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.16167619162
[10,] 1 0.9349466675 0.9347245814 0.9631573099 0.9576902527 0.09572393142
                                                                  reward_vec
 [1,] 0.04830658100 0.43525905437 0.04773321372 0.344730456609 0.04956385973
 [2,] 0.08033048

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,reward_vec
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.12397069430,0.04830658100,0.435259054368,0.04773321372,0.344730456609,0.04956385973
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.14729356183,0.08033048749,0.188638534284,0.27664665497,0.307090761423,0.04201676433
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.08796701703,0.28035418842,0.275591141962,0.12114078670,0.234946865886,0.05237054771
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.23977889529,0.23270373636,0.211965685674,0.03270058432,0.282851098350,0.04376878666
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.07455263447,0.22435935760,0.042554202062,0.19597556657,0.462558239311,0.04546500307
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.29683613198,0.07817281233,0.209100421614,0.11206904537,0.303821588710,0.03705578826
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.11047558351,0.35520742963,0.313500644635,0.21739347456,0.003422867663,0.05361199316
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.04067815412,0.24516116701,0.313391283666,0.19968260654,0.201086788666,0.05398202351
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.16167619162,0.15913939086,0.120689898555,0.22773748359,0.330757035369,0.04193340240
1,0.9349466675,0.9347245814,0.9631573099,0.9576902527,0.09572393142,0.29285901139,0.236273562542,0.09262378660,0.282519708050,0.05154960182


In [47]:
# simulates contextual bandits problem
# params
# n_curren: number of currencies in proble (including cash)
# n_steps: number of timesteps in the episode
# alpha: learning rate of the action preference functions
# window_size: size of rolling window of asset prices to consider in the action preferences
# discount: discount factor of previous price changes
simulate_contextual <- function(n_curren,n_steps,alpha,window_size,discount){
    
    Ht <- rep(0,5) # initialize preference vector
    
    # Initialize weight vector for moving average window
    weight_vec <- c()
    for(k in 0:(window_size-1))
        weight_vec <- c(weight_vec,discount^k)
    weight_vec <- rev(weight_vec)
    
    price <- head(close,window_size+1) # initializes the price dataframe as the first 2 price vectors
    #st <- getPriceRelativeVec(price[1,],price[2,]) # get first price relative vector (first state)
    
    # history vector of price changes
    history <- getPriceRelativeVec(price[1,],price[2,])
    for(h in 2:window_size){
        history <- rbind(history,getPriceRelativeVec(price[h,],price[h+1,]))
    }
    
    # Define state st as vector of discounted previous relative changes
    st <- weight_vec %*% history
    
    prev_v <- tail(price,1) # initializes first price vector v
    
    # Define reward vecs and random action reward vectors
    Rvec <- c()
    Rvec_random <- c()
    
    for(i in window_size:n_steps){
        # get price dataframe of current time step
        price <- head(close,i+2)
        # get the current v
        curr_v <- tail(price,1)
        # get price change
        yt <- getPriceRelativeVec(prev_v,curr_v)
        
        # update history matrix
        history <- rbind(tail(history,window_size-1),yt)
        
        # preference vector for state s (element-wise multiplication of Ht and st)
        Ht_s <- st * Ht
        
        # Compute pivec (softmaxes for each currency)
        piVec <- c()
        for(a in 1:n_curren)
            piVec <- c(piVec,get_softmax(a,Ht_s))
        
        # get the prefered action
        action <- which.max(piVec)
        
        # get the log returns for our action (in this case our action is the softax)
        rt <- exp(getLogReturns(yt,piVec))
        Rvec <- c(Rvec,rt)
        
        # Reward for random action
        Rvec_random <- c(Rvec_random,exp(getLogReturns(yt,random_action(4))))
        
        # Update preference vector
        Ht <- get_update(rt,Rvec,Ht,action,alpha)
        
#         print(paste0("episode",i))
#         print(cat("Ht: ", Ht))
#         print(cat("Ht_s: ", Ht_s))
        # print(cat("piVec: ", piVec))
#         print(cat("rt: ", rt))
        # print("==============================")
        
        prev_v <- curr_v
        st <- weight_vec %*% history
            
    }
    return(c(mean(Rvec),mean(Rvec_random)))
    
}

In [49]:
simulate_contextual(5,20000,0.3,10,0.8)

In [64]:
library(nnet)

for(i in 1:10){
    Var1 <- runif(50,0,100)
    sqrt.data <- data.frame(Var1,Sqrt=sqrt(i) + Var1)
    
    if(i == 1){
        net.sqrt <- nnet(Sqrt~Var1, sqrt.data, size=10, maxit=1)
    } else {
        net.sqrt <- nnet(Sqrt~Var1,  sqrt.data, size=10, maxit=10, Wts=net.sqrt$wts)
    }
        
    
    print(net.sqrt$wts)
    print("==========================================")
}

# # Make Some Training Data
# Var1 <- runif(50, 0, 100) 
# # create a vector of 50 random values, min 0, max 100, uniformly distributed
# sqrt.data <- data.frame(Var1, Sqrt=sqrt(Var1)) 
# # create a dataframe with two columns, with Var1 as the first column
# # and square root of Var1 as the second column

# nnet.sqrt <- nnet(Sqrt~Var1, sqrt.data, size=10, maxit=1)

# weights:  31
initial  value 188833.730608 
final  value 188233.080057 
stopped after 2 iterations
 [1]    2.88789116154  148.79646982064    0.45214161866   -0.11089756633
 [5]   -0.56274653382   -0.63095494494    8.47060269185  579.74885503230
 [9]   -2.46766349706 -140.32764511513    0.25362085619    0.62571131094
[13]   -0.62319842786    0.79215028720    0.13087912925    0.70790294812
[17]    0.07373739906    0.20756312574    0.44637704047   -2.06535431783
[21]  107.57813095754   95.33067830604    0.73723544218    0.02995010330
[25]   37.80351465087   41.19112383881  107.46186139834  107.48819140182
[29]  107.28689762848  106.75602321898   -0.29050323268
# weights:  31
initial  value 155365.654136 
final  value 155365.654136 
converged
 [1]    2.88789116154  148.79646982064    0.45214161866   -0.11089756633
 [5]   -0.56274653382   -0.63095494494    8.47060269185  579.74885503230
 [9]   -2.46766349706 -140.32764511513    0.25362085619    0.62571131094
[13]   -0.62319842786    0.7921

In [55]:
nnet.sqrt <- nnet(Sqrt~Var1, sqrt.data, size=10, maxit=1)

# weights:  31
initial  value 13801.935881 
final  value 13247.836022 
stopped after 2 iterations


In [14]:
# Try a new method based on sampling actions and approximating action value function

# function used to sample a random action
sample_action <- function(n_assets){
    x <- runif(n_assets+1)
    return(x/sum(x))
}

# Make Some Training Data
Var1 <- runif(50, 0, 100) 
# create a vector of 50 random values, min 0, max 100, uniformly distributed
sqrt.data <- data.frame(Var1, Sqrt=sqrt(Var1)) 
# create a dataframe with two columns, with Var1 as the first column
# and square root of Var1 as the second column

# Train the neural net
net.sqrt <- neuralnet(Sqrt~Var1,  sqrt.data, hidden=10, threshold=0.01)
# train a neural net, try and predict the Sqrt values based on Var1 values
# 10 hidden nodes

# Compute or predict for test data, (1:10)^2
compute(net.sqrt, (1:10)^2)$net.result
# What the above is doing is using the neural net trained (net.sqrt), 
# if we have a vector of 1^2, 2^2, 3^2 ... 10 ^2 (i.e. 1, 4, 9, 16, 25 ... 100), 
# what would net.sqrt produce?

0
1.254774474
1.997149954
2.994169568
4.001415595
4.997892565
6.002643974
6.999658074
7.998341659
9.003371897
9.972014956


In [43]:
# get price relative vector for starting state
price <- head(close,2)
yt <- getPriceRelativeVec(head(price,1),tail(price,1))
prev_v <- tail(price,1)
returns <- 0
data <- c()

for(i in 1:100){
    
    price <- head(close,i+2) # update price matrix 
    curr_v <- tail(price,1)
    yt_next <- getPriceRelativeVec(prev_v,curr_v) # get price relative vector of next state
    wt <- sample_action(4) # sample an action
    rt <- getLogReturns(yt_next,wt)
    
    data <- rbind(data,c(yt,wt,rt)) # append state action pair and reward to data
    
    prev_v <- curr_v
    yt <- yt_next
}

In [44]:
# create a testing dataframe
data <- data.frame(data)
data$X1 <- NULL
colnames(data) <- c("y2","y3","y4","y5","w1","w2","w3","w4","w5","r")

In [45]:
# fit neural net and predict
network <- neuralnet(r~y2+y3+y4+y5+w1+w2+w3+w4+w5,head(data,50),hidden=10,threshold=0.01)
cbind(compute(network,subset(tail(data,50),select=-r))$net.result,tail(data,50)$r)

0,1,2
51,-0.0079558480769,0.0708140701148
52,0.0245109623899,0.0672985527889
53,-0.0352382795099,-0.0049296967756
54,0.0147700499662,-0.0391479558467
55,0.0047419403723,0.0128816615937
56,0.0257578968284,-0.0069060369898
57,0.0173938037127,-0.0362066941949
58,0.0185186165525,-0.0327041260752
59,-0.0297483537749,0.0225316904661
60,0.0115562279471,-0.0168263977611
