A up down model.

In [None]:
library(stringr)

In [None]:
file_list <- list.files('./data')

In [None]:
name_list <- str_replace_all(file_list, '\\.rds','')

In [None]:
Sys.time()

In [None]:
write_log <- function(text) {
  text <- paste(Sys.time(), text)
  write(text,file="mylog.txt",append=TRUE)
}

# randomely get k samples
# make k samples, each sample have n words from w. with the probablity prob.
make_samples <- function(w, n, k, prob) {
  results <- list()
  strings <- c()
  while(length(results) < k) {
    candidate <- sample(w, size=n, replace=FALSE, prob=prob)
    candidate.str <- paste0(candidate, sep='', collapse='-')
    if (!candidate.str %in% strings) {
      strings <- append(strings, candidate.str)
      results <- append(results, list(candidate))
    }
  }
  return(results)
}

get_prob_weight <- function(name, HHt_val, GGt_val,
  stop_loss, sell_dev, buy_dev, courtage) {
    df <- data_prep(name, HHt_val=HHt_val, GGt_val=GGt_val)
    df <- process_one(df, stop_loss, sell_dev, buy_dev, courtage)
    my_summary <- df %>% filter(trade_profit != 0 | hold != 0) %>% summarise(sum=sum(trade_profit), n=n(), daily=sum/n)
    my_summary$daily
}

get_sample_profit <- function(name_list, HHt_val, GGt_val,
  stop_loss, sell_dev, 
  buy_dev, courtage) {
    df <- data_prep_multiple(name_list, HHt_val=HHt_val, GGt_val=GGt_val)
    df <- process_multiple(df, stop_loss, sell_dev, buy_dev, courtage)
    name_list_str <- paste0(name_list, sep='', collapse='-')
    write.csv(df, str_glue('{name_list_str}.csv'))

    my_summary <- df %>% mutate(id=row_number()) %>% mutate(range=cut(id, seq(0, max(id)+252, 252))) %>% 
      group_by(range) %>% summarise(sum=sum(trade_profit))
    mean(my_summary$sum) - 0.5 * sd(my_summary$sum)

}



In [None]:
write_log('test')

In [None]:
# for each par, run k samples, each sample contains n stocks.
# the return value is the annual profit
source('./util.r')
highest <- 0.17
my_optim <- function(par, k, n, name_list, courtage) {
    write_log(str_glue('Starting calculating weights: par:{par[1]}, {par[2]}, {par[3]}, {par[4]}, {par[5]}'))
    weight_list <- lapply(name_list, get_prob_weight, HHt_val=par[1], GGt_val=par[2],
      stop_loss=par[3], 
      sell_dev=par[4], 
      buy_dev=-par[5], 
      courtage=0.00089)
    df_weight <- data.frame(name=unlist(name_list), weight=as.numeric(weight_list)) %>%
      filter(weight > 0)
    perm_samples <- make_samples(df_weight$name, n, k, df_weight$weight)
    profits <- c()
    write_log('Calculating weights completed')
    for (sample in perm_samples) {
      sample_str <- paste0(sample, collapse='-')
      profit <- get_sample_profit(sample, par[1], par[2], par[3], par[4],
        par[5], courtage)
      if (profit > highest) {
        highest <<- profit
        write_log(str_glue('Find new high: {profit} sample: {sample_str} par:{par[1]}, {par[2]}, {par[3]}, {par[4]}, {par[5]}'))
      } else {
        # delete csv file
        file.remove(str_glue('{name_list_str}.csv'))
      }
      profits <- append(profits, profit)
    }
    
    return(mean(profits, na.rm=TRUE))
}

fit <- optim(par=c(0.08, 0.08, 0.80, 0.03, -0.03), function(par) 
  -my_optim(par, k=100, n=32, name_list=name_list, courtage=0.00089),
  control=list(maxit=500, parscale=c(1, 1, 10, 1, 1)))
fit

In [None]:


perm_samples <- make_samples(df_weight$name, 32, 120, df_weight$weight)
perm_samples

In [None]:
source('./util.r')
highest <- -99999
get_prob_weight <- function(name, HHt_val, GGt_val,
  stop_loss, sell_dev, buy_dev, courtage) {
    df <- data_prep(name, HHt_val=HHt_val, GGt_val=GGt_val)
    df <- process_one(df, stop_loss, sell_dev, buy_dev, courtage)
    my_summary <- df %>% filter(trade_profit != 0 | hold != 0) %>% summarise(sum=sum(trade_profit), n=n(), daily=sum/n)
    my_summary$daily
}


get_par_score <- function(par, name_list, courtage) {
  HHt_val <- par[1]
  GGt_val <- par[2]
  stop_loss <- par[3]
  sell_dev <- par[4]
  buy_dev <- par[5]
  weight_list <- lapply(name_list, get_prob_weight, HHt_val, GGt_val,
    stop_loss, 
  sell_dev, 
  buy_dev, 
  courtage)
  weight_mean <- mean(as.numeric(weight_list), na.rm = TRUE)
  weight_std <- sd(as.numeric(weight_list), na.rm=TRUE)
  score <- weight_mean - 0.2 * weight_std + 0.0001 * log(stop_loss)
  if (score > highest) {
    highest <<- score
    log_text <- str_glue('new high found: score {score}, mean: {weight_mean} , std: {weight_std} par:{par[1]}, {par[2]}, {par[3]}, {par[4]}, {par[5]}')
    print(log_text)
    write(log_text,file="mylog.txt",append=TRUE)
  }
  return(score)
}

fit <- optim(par=c(0.08, 0.08, 0.80, 0.03, -0.03), function(par) 
  -get_par_score(par, name_list=name_list, courtage=0.00089),
  control=list(maxit=500, parscale=c(1,1,10, 1, 1)))
fit

In [None]:
get_par_score(fit$par, name_list=name_list, courtage=0.00089)

In [None]:


weight_list <- lapply(name_list, get_prob_weight, HHt_val=0.08919831, GGt_val=0.10930187,
    stop_loss=0.90204186, 
  sell_dev=0.03497496, 
  buy_dev=-0.09173406, 
  courtage=0.00089)

weight_mean <- mean(weight_list)
weight_std <- sd(weight_list)



In [None]:

df_weight <- data.frame(name=unlist(name_list), weight=as.numeric(weight_list)) %>%
  filter(weight > 0)



In [None]:



mean(as.numeric(weight_list), na.rm=TRUE)

In [None]:
df_weight

In [None]:
source('./util.r')
get_sample_profit <- function(name_list, HHt_val, GGt_val,
  stop_loss, sell_dev, 
  buy_dev, courtage) {
    df <- data_prep_multiple(name_list, HHt_val=HHt_val, GGt_val=GGt_val)
    df <- process_multiple(df, stop_loss, sell_dev, buy_dev, courtage)
    name_list_str <- paste0(name_list, sep='', collapse='-')
    write.csv(df, str_glue('{name_list_str}.csv'))

    my_summary <- df %>% mutate(id=row_number()) %>% mutate(range=cut(id, seq(0, max(id)+252, 252))) %>% 
      group_by(range) %>% summarise(sum=sum(trade_profit))
    mean(my_summary$sum) - 0.5 * sd(my_summary$sum)

}

df <- get_sample_profit(perm_samples_list[[1]][[1]], HHt_val=0.08919831, GGt_val=0.10930187,
      stop_loss=0.90204186, 
      sell_dev=0.03497496, 
      buy_dev=-0.09173406, 
      courtage=0.00089)


In [None]:

perm_samples_list <- split(perm_samples, ceiling(seq_along(perm_samples)/3))
#perm_samples_list
perm_samples_list[[1]][[1]]


In [None]:
source('./util.r')
df <- data_prep_multiple(perm_samples_list[[1]][[1]], 
  HHt_val=0.08919831, GGt_val=0.10930187)

write.csv(df, 'tmp.csv')

In [None]:
library(doParallel)
# parrellel test
my.cluster <- parallel::makeCluster(
  3,
  type = "FORK"
)
sek_df <- get_sek_usd()
doParallel::registerDoParallel(my.cluster)

get_sample_profit_dummy <- function(name_list, HHt_val, GGt_val,
  stop_loss, sell_dev, 
  buy_dev, courtage, j) {
    return(runif(1, j, 7.5))
}

all_resuls <- c()
highest <- 0
for (i in seq_along(perm_samples_list)) {
  samples_sub_list <- perm_samples_list[[i]]
  results <- foreach(j = seq_along(samples_sub_list), .combine='c') %do% {
    failed <- FALSE
    res <- tryCatch(lapply(samples_sub_list[j], get_sample_profit, HHt_val=0.08919831, GGt_val=0.10930187,
      stop_loss=0.90204186, 
      sell_dev=0.03497496, 
      buy_dev=-0.09173406, 
      courtage=0.00089), error=function(e) failed<<-TRUE)
    if (failed) -1 else res
  }

  df_results <- data.frame(cbind(lapply(samples_sub_list, function(x) paste0(x, collapse='-')), results))
  colnames(df_results) <- c('perm', 'profits')
  df_results <- df_results %>% arrange(desc(profits))
  perm <- df_results[1,'perm']
  profits <- df_results[1,'profits']
  if (profits > highest) {
    highest <- profits
    log_text <- str_glue('new high found: perm {perm}, profit:{profits}')
    print(log_text)
    write(log_text,file="mylog.txt",append=TRUE)
  }

  all_resuls <- c(all_resuls, results)

  #saveRDS(results, file=str_glue("results_samples_list_{i}.rds"))
}


In [None]:
all_resuls

In [None]:


stopCluster(my.cluster)

In [None]:
perm_samples

In [17]:
#-0.09173406, 0.03497496, 0.08919831, 0.10930187, 0.90204186  NXPI
source('util.r')

df <- data_prep_multiple(c('DHR', 'CEG', 'V'), HHt_val=0.0876646450332911, GGt_val=0.096818683582795, isNYSE=TRUE)
df <- process_multiple(df, stop_loss=0.754701556390758, 
  sell_dev=0.0211575532288305, 
  buy_dev=-0.0190314759778294, 
  courtage=0.00079)



my_summary <- df %>% filter(trade_profit != 0 | hold != 0) %>% summarise(sum=sum(trade_profit))
my_summary$sum


In [16]:
month = substr(rownames(df), 1,7)
df$month = month
my_summary2 <- df %>% group_by(month) %>% mutate(total_trade_times=cumsum(trade_times)) %>% 
filter(total_trade_times <= 10) %>% summarise(sum=sum(trade_profit))

sum(my_summary2$sum)


In [None]:
write.csv(df, 'aapl.csv')

In [None]:
source('util.r')
df <- data_prep(c('AAPL'), HHt_val=0.08919831, GGt_val=0.10930187)
df <- process_one(df,
  stop_loss=0.90204186, 
  sell_dev=0.03497496, 
  buy_dev=-0.09173406, 
  courtage=0.00089)
write.csv(df, 'aapl.csv')


In [None]:
my_summary <- df %>% filter(trade_profit != 0 | hold != 0) %>% summarise(sum=sum(trade_profit), n=n(), daily=sum/n)
my_summary$daily

In [None]:
my_summary <- df %>% mutate(id=row_number()) %>% mutate(range=cut(id, seq(0, max(id)+252, 252))) %>% 
  filter(trade_profit != 0 | hold == 1) %>% 
  group_by(range) %>% summarise(sum=sum(trade_profit), n=n(), daily=sum/n)

daily_mean <- mean(my_summary$daily)
daily_sd <- sd(my_summary$daily)
daily_mean

In [None]:
my_summary

In [None]:
a=NULL
is.null(a)

In [None]:
source('util.r')
df <- data_prep_multiple(c('T', 'AAPL'), HHt_val=0.08919831, GGt_val=0.10930187)
df <- process_multiple(df, HHt_val=0.08919831, GGt_val=0.10930187,
  stop_loss=0.90204186, 
  sell_dev=0.03497496, 
  buy_dev=-0.09173406, 
  courtage=0.00089)
write.csv(df, 't-aapl.csv')

In [None]:
source('util.r')
df <- data_prep_multiple(c('AAPL', 'T'), HHt_val=0.08919831, GGt_val=0.10930187)
df <- process_multiple(df, HHt_val=0.08919831, GGt_val=0.10930187,
  stop_loss=0.90204186, 
  sell_dev=0.03497496, 
  buy_dev=-0.09173406, 
  courtage=0.00089)
write.csv(df, 'aapl-t.csv')

In [None]:
df %>% mutate(id=row_number()) %>% mutate(range=cut(id, seq(0, max(id)+252, 252))) %>% 
  filter(trade_profit != 0 | hold == 1) %>% 
  group_by(range) %>% summarise(sum=sum(trade_profit), n=n())

In [None]:
source('util.r')
df <- data_prep_multiple(c('AAPL', 'T'), HHt_val=0.08919831, GGt_val=0.10930187)

In [None]:
head(data_prep('MSFT'))

In [None]:
head(data.frame(readRDS(file=str_glue('data/AAPL.rds'))))

In [None]:
head(df)

In [None]:
source('util.r')
df <- data_prep_multiple(c('AAPL', 'T'), HHt_val=0.08919831, GGt_val=0.10930187)
df <- process_multiple(df, HHt_val=0.08919831, GGt_val=0.10930187,
  stop_loss=0.90204186, 
  sell_dev=0.03497496, 
  buy_dev=-0.09173406, 
  courtage=0.00089)
write.csv(df, 'aapl-t.csv')

In [None]:
a <- c('aa.b','cc.b','dd.bb','dd.cc')
unique(str_replace(a, '\\..*', ''))

In [None]:
c(0,0,0,'')

In [None]:
a <- list(open=123)

In [None]:
a[['open']]

In [None]:
a <- 1 
b <- 1
c <- 1
a <- b <- c <- 0


In [None]:
c

In [None]:
bar <- 2

foo <- function() {
    bar <- 1
}
foo()

In [None]:
bar

In [None]:
stocks <- 'TECH-BALL-ACN-RSG-ORLY-ABT-CTVA-GOOGL-AJG-ABC-VRSK-ED-JNJ-PGR-SJM-DVA-AON-WM-JBHT-NDSN-ITW-COST-DHR-CMS-TMO-FRC-STE-LLY-ADP-FDS-KEYS-AVGO'
stocks <- str_split(stocks, '-', simplify=TRUE)
pars <- '0.08, 0.088, 0.8, 0.03, -0.03'
par <- as.numeric(str_split(pars, ',', simplify=TRUE))
HHt <- par[1]
GGt <- par[2]

In [None]:
par

In [65]:
source('./util.r')
data_prep('TECH', start_date='2007-01-01', HHt=HHt, GGt=GGt, predict=TRUE)

# validate if it is the latest.
# find out last trade day, last working days but holidays.
# holidays:
# 22-11-24, 22-12-26, 23-1-2, 23-1-16, 23-2-20, 23-4-7, 23-5-29, 23-6-19, 23-7-4,23-9-4,23-11-23,23-12-25
# 24-1-1, 24-1-15, 24-2-19, 24-3-29


“SEK=X contains missing values. Some functions will not work if objects contain missing values in the middle of the series. Consider using na.omit(), na.approx(), na.fill(), etc to remove or replace them.”


dim(at): 1, nrow(df): 3970


In [80]:
# get last trading day
wday(Sys.Date()) - 1