R/data.r

###############################################################################
# This software is provided 'as-is', without any express or implied
# warranty. In no event will the authors be held liable for any damages
# arising from the use of this software.
# 
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
# 
# 1. The origin of this software must not be misrepresented; you must not
#    claim that you wrote the original software. If you use this software
#    in a product, an acknowledgment in the product documentation would be
#    appreciated but is not required.
# 2. Altered source versions must be plainly marked as such, and must not be
#    misrepresented as being the original software.
# 3. This notice may not be removed or altered from any source distribution.
###############################################################################
# Collection of routines to work with data
#
# For more information please email at TheSystematicInvestor at gmail
###############################################################################

#' @export 
find.tokens <- function
(
  txt,    # source text
  marker,   # key-phrase(s) to find
  pos = 1,  # position to start searching at
  pos.start = T
)
{
  # find location of data
  marker = spl(marker)
    
  for(i in 1:len(marker)) {
    if( pos < 2 )
      pos1 = regexpr(marker[i], txt) 
    else    
      pos1 = regexpr(marker[i], substr(txt, pos, nchar(txt))) 

    if( pos1 < 0 )  
      return(pos1)
    else {
      if( pos < 2 ) pos = pos1
      else pos = pos1 + pos - 1     
    }
    pos = pos + attr(pos1, 'match.length')
  } 
  if( pos.start ) pos = pos - attr(pos1, 'match.length')
    
  return(pos)
} 


#' @export 
extract.token <- function
(
  txt,    # source text
  smarker,  # start key-phrase(s) to find
  emarker,  # end key-phrase(s) to find
  pos = 1,  # position to start searching at
  keep.marker = F 
)
{
  pos1 = 1
  if (nchar(smarker) > 0)
    pos1 = find.tokens(txt, smarker, pos, pos.start = keep.marker)
  if( pos1 < 0 ) return("")
  pos1.marker = iif(keep.marker, pos1 + nchar(last(spl(smarker))), pos1)
  
  pos2 = nchar(txt)
  if (nchar(emarker) > 0)
    pos2 = find.tokens(txt, emarker, pos1.marker, pos.start = !keep.marker) - 1
  if( pos2 < 0 ) return("")
  
  return(substr(txt,pos1,pos2)) 
}

#' @export 
remove.tags <- function
(
  temp    # source text
)
{
  # remove all formating              
  temp = gsub(pattern = '<.*?>', replacement = '', temp, perl = TRUE) 
    
  temp = gsub(pattern = '\r', replacement = '', temp, perl = TRUE) 
  temp = gsub(pattern = '\n', replacement = '', temp, perl = TRUE) 
  temp = gsub(pattern = '\t', replacement = '', temp, perl = TRUE) 
  temp = gsub(pattern = '&nbsp;', replacement = '', temp, perl = TRUE) 
  temp = gsub(pattern = '&amp;', replacement = '', temp, perl = TRUE) 
  temp = gsub(pattern = '&raquo;', replacement = '', temp, perl = TRUE)     
  temp = gsub(pattern = '&#37;', replacement = '%', temp, perl = TRUE)      

  
  return(temp)
}

#' @export 
replace.token <- function
(
  txt,    # source text
  smarker,  # start key-phrase(s) to find
  emarker,  # end key-phrase(s) to find
  replacement,# replacement token
  pos = 1   # position to start searching at
)
{
  token = extract.token(txt, smarker, emarker, pos, keep.marker = T)
  if(nchar(token) == 0) 
    txt
  else
    replace.token(gsub(pattern = token, replacement = replacement, txt), smarker, emarker, replacement)
}


#' @export 
clean.table <- function
(
  temp    # extracted table
)
{
  temp = trim(temp)
  temp[nchar(temp)==0] = NA 
  temp = temp[ncol(temp) > rowSums(is.na(temp)),,drop=F]
  temp[,nrow(temp) > colSums(is.na(temp)),drop=F]
}

###############################################################################
# extract.table.from.webpage
#' @export 
###############################################################################
extract.table.from.webpage <- function
(
  txt,    # source text of webpage
  marker=NA,  # key-phrase(s) located in the table to extract
  
  has.header=T,# flag if table has a header
  has.rownames=F,# flag if table has row names
	
  remove.empty.cols=F,# flag to remove columns with empty name
  remove.empty.rows=F,# flag to remove rows with empty name
	
  convert2num = F,# flag to convert matrix to numeric only
    
  end.marker=NA # additional end of token marker(s)
)
{
  tryCatch({    
    # find location of data   
    pos1=1
    
    if(!is.na(marker)) {
    marker = spl(marker)
    if(len(marker) > 0 && nchar(marker[1]) > 0)
      for(i in 1:len(marker))
        pos1 = regexpr(marker[i], substr(txt, pos1, nchar(txt))) + pos1
    }
    
    
    # find start/end of table
    pos0 = tail(gregexpr('<table', substr(txt, 1, pos1))[[1]], 1)
	if(pos0 == -1) pos0 = tail(gregexpr('<tbody', substr(txt, 1, pos1))[[1]], 1)
    if(pos0 == -1) pos0 = pos1
    pos2 = head(gregexpr('</table', substr(txt, pos1, nchar(txt)))[[1]], 1)
	if(pos2 == -1) pos2 = head(gregexpr('</tbody', substr(txt, pos1, nchar(txt)))[[1]], 1)
    if(pos2 == -1) pos2 = nchar(txt)+1
    temp =  substr(txt, pos0, pos1 + pos2 - 2)
  
    # remove all formating  
    temp = gsub(pattern = '<br>', replacement = '', temp, perl = TRUE) 
    
    temp = gsub(pattern = '</tr>', replacement = ';row;', temp, perl = TRUE) 
    temp = gsub(pattern = '</td>', replacement = ';col;', temp, perl = TRUE) 
    temp = gsub(pattern = '</th>', replacement = ';col;', temp, perl = TRUE) 
    if(!is.na(end.marker)) {
      marker = spl(end.marker)
      if(len(marker) > 0 && nchar(marker[1]) > 0)
        for(i in 1:len(marker))
          temp = gsub(pattern = marker[i], replacement = ';row;', temp, perl = TRUE) 
    }
            
    temp = gsub(pattern = '<.*?>', replacement = '', temp, perl = TRUE) 
    
    temp = gsub(pattern = '\r', replacement = '', temp, perl = TRUE) 
    temp = gsub(pattern = '\n', replacement = '', temp, perl = TRUE) 
    temp = gsub(pattern = '\t', replacement = '', temp, perl = TRUE) 
    temp = gsub(pattern = '&nbsp;', replacement = '', temp, perl = TRUE) 
    temp = gsub(pattern = '&amp;', replacement = '', temp, perl = TRUE) 
    temp = gsub(pattern = '&raquo;', replacement = '', temp, perl = TRUE)     
        
    # parse into matrix 
    temp = lapply( strsplit(temp, ';row;'), strsplit, ';col;')  
    n = max( sapply(temp[[1]], function(x) len(x)) )
    temp = t( sapply(temp[[1]], function(x) x[1:n]) )
	
	temp = extract.table(temp, has.header, has.rownames, remove.empty.cols, remove.empty.rows, convert2num)
  }, error = function(ex) {
    temp <<- txt
  }, finally = {
    return(temp)
  })
}
  
  
###############################################################################
# mat2num
# convert given matrix to numeric only
# keep the column and row names of the original matrix
#' @export 
###############################################################################
mat2num = function
(
	data
) 
{
	temp = as.matrix(data)
	temp = gsub(pattern = '$', replacement = '', temp, perl = TRUE) 
	temp = gsub(pattern = ',', replacement = '', temp, perl = TRUE) 
	temp = gsub(pattern = '%', replacement = '', temp, perl = TRUE) 
 
	temp = as.numeric(temp) 
		dim(temp) = dim(data)
		colnames(temp) = colnames(data)
		rownames(temp) = rownames(data)
	temp
}


###############################################################################
# extract.table
# convert given matrix to numeric only
# keep the column and row names of the original matrix
#' @export 
###############################################################################
extract.table = function
(
	temp,
	
	has.header=T,# flag if table has a header
	has.rownames=T,# flag if table has row names
	
	remove.empty.cols=T,# flag to remove columns with empty name
	remove.empty.rows=T,# flag to remove rows with empty name
	
	convert2num = T# flag to convert matrix to numeric only
)
{
	# the has.header & has.rownames can be specified as the number
	if (has.header && has.rownames) {
		colnames(temp) = trim(temp[(has.header + 0), ])
		rownames(temp) = trim(temp[, (has.rownames + 0)])
		temp = temp[-c(1:(has.header + 0)), -c(1:(has.rownames + 0)), drop = F]
	} else if (has.rownames) {
		rownames(temp) = trim(temp[, (has.rownames + 0)])
		temp = temp[, -c(1:(has.rownames + 0)), drop = F]
	} else if(has.header) {
		colnames(temp) = trim(temp[(has.header + 0), ])
		temp = temp[-c(1:(has.header + 0)), ,drop=F]
	}	

	# flags to remove entries for empty columns / rows names
	if (remove.empty.cols && remove.empty.rows)
		temp = temp[nchar(rownames(temp))>0, nchar(colnames(temp))>0]
	else if (remove.empty.cols)
		temp = temp[, nchar(colnames(temp))>0]
	else if (remove.empty.rows)
		temp = temp[nchar(rownames(temp))>0, ]

	# convert to numbers
	if (has.rownames)
		temp = mat2num(temp)	   

	temp	   
}	

  
###############################################################################
# Test for extract.table.from.webpage function
###############################################################################
extract.table.from.webpage.test <- function()
{
  load.packages('quantmod')

  Symbol = 'IBM'  
  
  # download Key Statistics from yahoo  
  url = paste('http://finance.yahoo.com/q/ks?s=', Symbol, sep = '')
  txt = join(readLines(url))

  # extract Valuation Measures table from this page
  temp = extract.table.from.webpage(txt, 'Market Cap', has.header = F)
    temp = rbind(c('', Symbol), temp) # add header row

    
  # download IBM price history from Yahoo
  data = getSymbols(Symbol, from = '1980-01-01', auto.assign = FALSE)
    
  # prepare IBM data for 2010:2011 and compute 50 days moving average
  y = data['2010::2011']
  sma50 = SMA(Cl(y), 50)
  
  png(filename = 'plot1.png', width = 500, height = 500, units = 'px', pointsize = 12, bg = 'white')
  
  # plote candles and volume and table  
  layout(c(1,1,2,3,3))    
  
  plota(y, type = 'candle', main = Symbol, plotX = F)
    plota.lines(sma50, col='blue')
    plota.legend(c(Symbol,'SMA 50'), 'green,blue', list(y,sma50))
    
  y = plota.scale.volume(y)
  plota(y, type = 'volume')   
    
  plot.table(temp)  
  
  dev.off()   
}


###############################################################################
# Test [SMF add](http://ogres-crypt.com/SMF/)
###############################################################################
extract.table.from.webpage.test.SMF <- function()
{
	ticker = 'SPY'
	url = 'http://performance.morningstar.com/Performance/cef/trailing-total-returns.action?t='
	url = paste0(url, ticker)

	txt = get.url(url)

	temp = extract.table.from.webpage(txt, 'Total Return', has.header=F)
		temp = extract.table(temp)

	temp[,'1-Day']
	temp['Rank in Category(NAV)',]

  
	# [smf-elements-22](http://ogres-crypt.com/SMF/Elements/smf-elements-22.txt)
	# Barchart Momentum
	url = 'https://core-api.barchart.com/v1/momentum/get?raw=1&fields=exchange,high52w,low52w,newHighs,newLows,advancingVolume,unchangedVolume,decliningVolume,advancingIssues,percentAdvancingIssues,unchangedIssues,percentUnchangedIssues,decliningIssues,percentDecliningIssues&exchanges=NASDAQ,NYSE,OTC,AMEX'
	txt = get.url(url)

	require(jsonlite)
	data = fromJSON(txt)
		temp = data$data
		extract.table(temp[, colnames(temp)!='raw'], has.header=F)
	
	# Yahoo Portfolio View
	url = 'https://query1.finance.yahoo.com/v7/finance/quote?fields=symbol,longName,shortName,regularMarketPrice,regularMarketTime,regularMarketChange,regularMarketDayHigh,regularMarketDayLow,regularMarketPrice,regularMarketOpen,regularMarketVolume,averageDailyVolume3Month,marketCap,bid,ask,dividendYield,dividendsPerShare,exDividendDate,trailingPE,priceToSales,targetPriceMean&formatted=false&symbols=~~~~~'
	ticker = 'IBM,DIS'
	url = sub('~~~~~', ticker, url)

	txt = get.url(url)

	data = fromJSON(txt)
		temp = data$quoteResponse$result
		t(temp)
}


###############################################################################
# Pricing Zero Coupon Bond (i.e. yield to price)
# http://thinkanddone.com/finance/valuation-of-zero-coupon-bonds.html
#' @export 
###############################################################################
PricingZeroCouponBond <- function
( 
  yield, 
  timetomaturity, 
  parvalue = 100 
)
{
  parvalue / ( 1 + yield ) ^ timetomaturity  
}

###############################################################################
# Convert Historical TBills rates to Total Returns
# http://timelyportfolio.blogspot.com/2011/04/historical-sources-of-bond-returns_17.html
# http://timelyportfolio.blogspot.ca/2012/11/cashopportunity-lost-or-opportunity.html
#
# [S&P500 and 10yr TReasurey returns since 1928](http://people.stern.nyu.edu/adamodar/pc/datasets/histret.xls)
# http://pages.stern.nyu.edu/~adamodar/New_Home_Page/datafile/histretSP.html
# Another example to compute Return on bond =  
# * Yield at start of the year plus
# * 10YR bond price change given the change in Yield
# http://people.stern.nyu.edu/adamodar/pc/datasets/
#' @export 
###############################################################################
processTBill <- function 
( 
  yields, 
  timetomaturity = 1/4,
  frequency = 365
)
{
  yield = coredata(yields) / 100
  
  # price return
  pr = sapply( yield, function(x) PricingZeroCouponBond(x, timetomaturity) )
    pr = ROC(pr, type='discrete')
    pr[1] = 0

  # interest return
  ir = (1+mlag(yield, nlag=1))^(1 / frequency)-1
  #ir = mlag(yield, nlag=1) / frequency 
    ir[1] = 0
  
  # total return
  tr = pr + ir
    
  #out = as.xts( cbind(pr, ir, tr), index(yields) )
  # colnames(out) = spl('PR,IR,TR')
    
    
  close.price = cumprod(1 + pr)
  adjusted.price = cumprod(1 + tr)
    
  out = as.xts( cbind(close.price, adjusted.price), index(yields) )
    colnames(out) = spl('Close,Adjusted')
    
  return(out)
}


processTBill.test <- function() 
{
  #*****************************************************************
  # Get 1 year t-bill
  #******************************************************************   
  quantmod::getSymbols("GS1", src = "FRED")
  ir = (1 + mlag(GS1) / 100) ^ (1/12) - 1
    ir[1] = 0
    
  out = processTBill(GS1, timetomaturity = 1,12)

  plota(cumprod(1 + ir), type='l', log = 'y')
    plota.lines(Ad(out), type='l', col='red')

  #*****************************************************************
  # Get 3 years t-bill
  #******************************************************************   
  SHY = getSymbols('SHY', src='yahoo', auto.assign = FALSE) 
    
  tbill.m = quantmod::getSymbols('GS3', src='FRED', auto.assign = FALSE)  
  tbill.d = quantmod::getSymbols('DGS3', src='FRED', auto.assign = FALSE) 
  timetomaturity = 3
    
  compute.raw.annual.factor(tbill.d)
  compute.raw.annual.factor(tbill.m)
  
  # compute returns
  tbill.m = processTBill(tbill.m, timetomaturity = timetomaturity, 12)
    #index(tbill.m) = as.Date(paste('1/', format(index(tbill.m), '%m/%Y'), sep=''), '%d/%m/%Y')

  tbill.d[] = ifna.prev(tbill.d)    
  tbill.d = processTBill(tbill.d, timetomaturity = timetomaturity,261)

  
  # scale to start at 1 
  dates = '2003::'
  tbill.m = tbill.m[dates,2]
    tbill.m = tbill.m / as.double(tbill.m[1])
  tbill.d = tbill.d[dates,2]
    tbill.d = tbill.d / as.double(tbill.d[1])
  SHY = Ad(SHY[dates,])
    SHY = SHY / as.double(SHY[1])
    
  # plot
  plota(tbill.d, type='l')    
    plota.lines(tbill.m, type='s', col='blue')                
    plota.lines(SHY, type='l', col='red')
  plota.legend('Daily 3YR T-Bills,Monthly 3YR T-Bills,SHY','black,blue,red')

}


###############################################################################
# Load CRB Commodities Index 
# http://www.jefferies.com/cositemgr.pl/html/ProductsServices/SalesTrading/Commodities/ReutersJefferiesCRB/IndexData/index.shtml
###############################################################################
# ... parameters for read.xls function
# i.e. CRB = get.CRB(perl = 'c:/perl/bin/perl.exe')
#
# This url is not working anymore, for updated example please see
#   bt.extend.DBC.update.test in bt.test.r 
#' @export 
###############################################################################
get.CRB <- function(...)
{
  load.packages('gtools,gdata')
  
  #http://www.jefferies.com/html/ProductsServices/SalesTrading/Commodities/scripts/genExcel.pl?Index=RJCRB_Excess&StartDate=19940103&EndDate=20111202
  url = paste('http://www.jefferies.com/html/ProductsServices/SalesTrading/Commodities/scripts/genExcel.pl?Index=RJCRB_Total&StartDate=19940101&EndDate=', format(Sys.Date(), '%Y%m%d'), sep='')  
      temp = read.xls(url, ...)
    temp = as.matrix(temp[-c(1:7),])
    
  out = repmat(as.double(temp[,2]), 1, 6)
      colnames(out) = spl('Open,High,Low,Close,Volume,Adjusted')
      out[, 'Volume'] = 0
  #out = make.xts( out, as.Date(temp[,1], '%m/%d/%y'))
  out = make.xts( out, as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%m/%d/%y'))  
    xts::tclass(out) = 'Date'  
  
  return(out)
}   
  

get.CRB.test <- function()  
{
  #*****************************************************************
  # Load historical data
  #******************************************************************   
  CRB = get.CRB()
    
  load.packages('quantmod') 
  # http://etfdb.com/
  tickers = spl('GSG,DBC')    
  getSymbols(tickers, src = 'yahoo', from = '1970-01-01')
  
  #*****************************************************************
  # Compare different indexes
  #******************************************************************   
  out = na.omit(merge(Cl(CRB), Cl(GSG), Cl(DBC)))
    colnames(out) = spl('CRB,GSG,DBC')
  temp = out / t(repmat(as.vector(out[1,]),1,nrow(out)))
    
  layout(1:2)
  plota(temp, ylim=range(temp))
    plota.lines(temp[,1],col=1)
    plota.lines(temp[,2],col=2)
    plota.lines(temp[,3],col=3)
  plota.legend(colnames(temp),1:3)
      
  temp = cor(temp / mlag(temp)- 1, use='complete.obs', method='pearson')
      temp[] = plota.format(100 * temp, 0, '', '%')
  plot.table(temp)  
  
  
  layout(1:3) 
  plota.matplot(CRB[,c('Close','Adjusted')])  
  plota.matplot(DBC[,c('DBC.Close','DBC.Adjusted')])  
  plota.matplot(GSG[,c('GSG.Close','GSG.Adjusted')])  
  
  
  layout(1)       
  comm = extend.data(DBC, CRB, scale=T)
  plota(comm, type='l', col=1)
    plota.lines(CRB*0.078, type='l', lwd=5, col=col.add.alpha(2,150))
    plota.lines(DBC, type='l', lwd=5, col=col.add.alpha(3,150))
    plota.lines(comm, type='l', col=1)
  plota.legend('comm,CRB,DBC', 1:3, list(comm,CRB,DBC))
} 
  

###############################################################################
# Get Dow Jones Components
# http://finance.yahoo.com/q/cp?s=^DJI+Components
#' @export 
###############################################################################
dow.jones.components <- function()
{
  url = 'http://money.cnn.com/data/dow30/'
  txt = join(readLines(url))
  
  # extract links
  temp = gsub(pattern = '">', replacement = '<td>', txt, perl = TRUE)
  temp = gsub(pattern = '</a>', replacement = '</td>', temp, perl = TRUE) 

  # extract table from this page
  temp = extract.table.from.webpage(temp, 'Volume', has.header = T)
  trim(temp[,'Company'])
}	

dow.jones.components.0 <- function()
{
  url = 'http://finance.yahoo.com/q/cp?s=^DJI+Components'
  txt = join(readLines(url))

  # extract table from this page
  temp = extract.table.from.webpage(txt, 'Volume', has.header = T)
  temp[, 'Symbol']
}

dow.jones.components.1 <- function()
{
  load.packages('readxl,httr')
  dir.create(paste(getwd(), 'temp', sep='/'), F)
  GET('http://www.djaverages.com/?go=export-components&symbol=DJI', write_disk('temp/DJI.xls', overwrite=TRUE))
  temp = read_excel('temp/DJI.xls')
  temp$Ticker
}
  
###############################################################################
# Get NASDAQ 100 Components
# http://www.nasdaq.com/markets/indices/nasdaq-100.aspx
#' @export 
###############################################################################
nasdaq.100.components <- function()
{
  url = 'http://www.nasdaq.com/quotes/nasdaq-100-stocks.aspx?render=download'
  temp = read.csv(url, header=TRUE, stringsAsFactors=F)   
    colnames(temp) = trim(colnames(temp))
  
  tickers = temp[, 'Symbol']
  return(tickers)
}

  
###############################################################################
# Get Sector SPDR Components
# http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportCsv?symbol=XLE
# tickers = spl('XLY,XLP,XLE,XLF,XLV,XLI,XLB,XLK,XLU')
# tickers.desc = spl('ConsumerCyclicals,ConsumerStaples,Energy,Financials,HealthCare,Industrials,Materials,Technology,U
#' @export
###############################################################################
sector.spdr.components <- function(sector.etf = 'XLE')
{
  url = paste('http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportCsv?symbol=', sector.etf, sep='')

  # extract table from this page
  temp = read.csv(url, skip=1, header=TRUE, stringsAsFactors=F)
  tickers = temp[, 'Symbol']

  return(tickers)
}


###############################################################################
# S&P 500 Components
# http://en.wikipedia.org/wiki/List_of_S%26P_500_companies
#' @export 
###############################################################################
sp500.components <- function()
{
  url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
  txt = join(readLines(url))
  
  # extract table from this page  
  temp = extract.table.from.webpage(txt, 'Ticker', has.header = T)
  tickers = temp[, 'Ticker symbol']
  sector = temp[, 'GICS Sector']

  return(list(tickers=tickers, sector=sector))
}

# List of sites that keep SP500 Components
# http://www.s-p-500.com/stocks-a-b/
#http://www.forexpros.com/indices/us-spx-500-components
#http://marketvolume.com/indexes_exchanges/sp500_components.asp
#http://en.wikipedia.org/wiki/List_of_S%26P_500_companies
#http://en.wikipedia.org/wiki/Dow_Jones_Index


###############################################################################
# S&P 100 Components
# http://www.barchart.com/stocks/sp100.php
#' @export 
###############################################################################
sp100.components <- function()
{
  url = 'http://www.barchart.com/stocks/sp100.php'
  txt = join(readLines(url))
  
  # extract table from this page  
  temp = extract.table.from.webpage(txt, 'Components', has.header = T)
    i.start = grep('Name', temp[,2])
    tickers = trim(temp[-c(1:i.start), 1])
    
  return(tickers) 
}


###############################################################################
# iShares FTSE 100 (ISF)
# http://uk.ishares.com/en/rc/products/ISF/all-holdings/
# http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/indices/constituents-indices.html?index=UKX
# Yahoo ticker for UK stocks ABF.L
#' @export 
###############################################################################
ftse100.components <- function()
{
  # get holdings from uk.ishares.com
  url = 'http://uk.ishares.com/en/rc/products/ISF/all-holdings/'
  txt = join(readLines(url))
  
  # extract table from this page    
  txt = gsub('&#37;','%',txt)
  temp = extract.table.from.webpage(txt, 'Security', has.header = T)
  
  temp = trim(temp)
    colnames(temp) = temp[1,]
    temp = temp[-1,]    
  holdings = temp
    
  
  # get ISIN to ticker map from www.londonstockexchange.com
  page.label = '' 
  ticker2ISIN = c()
  for(i in 1:100) { 
    cat(i,'\n')
    
    # download
    url = paste('http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/indices/constituents-indices.html?index=UKX&page=', i, sep='')
    txt = join(readLines(url))
  
    # get page label  
    pos = regexpr('Page [0-9]+ of [0-9]+', txt, ignore.case = T)
    page.label.new = substr(txt, pos, pos + attr(pos, 'match.length')-1)
    
    if(page.label == page.label.new) break
    page.label = page.label.new
    
    # extract table
    temp.table = extract.table.from.webpage(txt, 'Price', has.header = T)
      colnames(temp.table)[1] = 'tickers'

    # extract links
    temp = gsub(pattern = '<a', replacement = '<td>', txt, perl = TRUE)
    temp = gsub(pattern = '</a>', replacement = '</td>', temp, perl = TRUE) 
  
    temp = extract.table.from.webpage(temp, 'Price', has.header = T)
      pos = regexpr('fourWayKey=', temp[,2])
    ISIN = as.vector(sapply(1:nrow(temp), function(j) 
      substr(temp[j,2], pos[j] + attr(pos, 'match.length')[j], pos[j] + attr(pos, 'match.length')[j] + 12 - 1)
      ))
    
    
    ticker2ISIN = rbind(ticker2ISIN, cbind(temp.table[,spl('ticker,Name,Price'), drop=F], ISIN))
  }
    
  ISIN = intersect(holdings[,'ISIN'],ticker2ISIN[,'ISIN'])
  holdings = cbind(holdings[match(ISIN, holdings[,'ISIN']), ],
        ticker2ISIN[match(ISIN, ticker2ISIN[,'ISIN']), spl('ticker,Name,Price')])

  return(apply(holdings, 2, list))
}


###############################################################################
# Get Dow Jones Components
# http://finance.yahoo.com/q/cp?s=^DJI+Components
# us.ishares.components(date='2008-02-01')
#' @export 
###############################################################################
us.ishares.components <- function(Symbol = 'DVY', date = NULL, debug = F)
{
  url = paste('http://us.ishares.com/product_info/fund/holdings/', Symbol, '.htm?periodCd=d', sep='')
  if( !is.null(date) )
    url = paste('http://us.ishares.com/product_info/fund/holdings/', Symbol, '.htm?asofDt=', date.end(date), '&periodCd=m', sep='')
  txt = join(readLines(url))

  # extract date from this page
  temp = remove.tags(extract.token(txt, 'Holdings Detail', 'Holdings subject to change'))
  date = as.Date(spl(trim(temp),' ')[3], '%m/%d/%Y')
  
  # extract table from this page
  temp = extract.table.from.webpage(txt, 'Symbol', has.header = T)
  
  colnames(temp) = trim(colnames(temp))
  temp = trim(temp)
  
  tickers = temp[, 'Symbol']
    keep.index = nchar(tickers)>1
  weights = as.double(temp[keep.index, '% Net Assets']) / 100
  tickers = tickers[keep.index]
    
  out = list(tickers = tickers, weights = weights, date = date)
  if(debug) out$txt = txt
  out
}


###############################################################################
# Get Google search results:
# https://gist.github.com/Daapii/7281439
# --- explanation of the parameters in the query ---
#
# ie = input encoding
# oe = output encoding
# q = query (our search term)
# num = amount of search results displayed at a time
# gws_rd=cr = redirects you to your countries version of google (required if you're not in the US)
# url encode our query
# query = "https://encrypted.google.com/search?ie=utf-8&oe=utf-8&q={0}&num=100&gws_rd=cr".format(query)
# google.search("r project")
#' @export 
###############################################################################
google.search <- function
(
  query
)
{  
  url = paste("http://google.com/search?ie=utf-8&oe=utf-8&q=", URLencode(query), "&num=10&gws_rd=cr", sep='')
  txt = join(readLines(url))

  tokens = spl(txt, '<li class="g">')

  if(len(tokens) < 2) return(NULL)

  records = matrix('', nrow=len(tokens)-1,nc=2)
    colnames(records) = c('label','url')
  for(i in 2:len(tokens)) {
    token = tokens[i] 
    token = extract.token(token, '<a href=', '</a>', keep.marker = T)
    url = extract.token(token, 'url\\?q=', '&amp;sa=U&amp;')  
    label = remove.tags(token) 
    records[i-1,] = c(label,url)
  }

  return(records)
}


###############################################################################
# Get the latest prices from the Google finance:
# http://digitalpbk.com/stock/google-finance-get-stock-quote-realtime
#  http://finance.google.com/finance/info?client=ig&q=MSFT,AAPL,NYSE:RY
#' @export 
###############################################################################
#getQuote.google(spl('MSFT,AAPL,IBM'))
getQuote.google <- function(tickers) {
  url = paste('http://finance.google.com/finance/info?client=ig&q=', join(tickers,','), sep='')
  txt = join(readLines(url))  
    temp = gsub(':', ',', txt)  
    temp = scan(text = temp, what='', sep=',', quiet=T)
  temp = matrix(trim(temp), nr=len(temp)/len(tickers), byrow=F)
  
  index = match(spl('t,l,lt'), tolower(temp[,1]))+1
    names(index) = spl('ticker,last,date')
  
  last = as.double(temp[index['last'],])
  date = strptime(temp[index['date'],],format=' %b %d, %H,%M')
  
  out = data.frame(last,date)
    rownames(out) = temp[index['ticker'],]
  out
}

# an xml alternative
# http://www.jarloo.com/google-stock-api/ 
#  http://www.google.com/ig/api?stock=AAPL&stock=GOOG
#getQuote.google.xml(spl('MSFT,AAPL,NYSE:RY'))
#' @export 
getQuote.google.xml <- function(tickers) {
  url = paste('http://www.google.com/ig/api?', paste('stock=',tickers, '&', sep='', collapse=''), sep='')
  txt = join(readLines(url))  
  
    temp = txt    
    temp = gsub('<finance.*?>', '', temp, perl = TRUE) 
    temp = gsub('</finance>', '', temp, perl = TRUE) 
    temp = gsub('<xml.*?>', '', temp, perl = TRUE) 
    temp = gsub('</xml.*?>', '', temp, perl = TRUE) 
    temp = gsub('<\\?xml.*?>', '', temp, perl = TRUE) 
    temp = gsub('data=', '', temp, perl = TRUE) 
    temp = gsub('/><', ' ', temp)   
    temp = gsub('>', '', temp)  
    temp = gsub('<', '', temp)  
  temp = scan(text = temp, what='', sep=' ', quiet=T)
  temp = matrix(trim(temp), nr=len(temp)/len(tickers), byrow=F)
  
  cnames = spl('trade_date_utc,trade_time_utc,symbol,last,high,low,volume,open,avg_volume,market_cap,y_close')
  index = match(cnames, tolower(temp[,1]))+1
    names(index) = cnames
    
  date = strptime(paste(temp[index['trade_date_utc'],], temp[index['trade_time_utc'],]), format='%Y%m%d %H%M%S',tz='UTC')
  date = as.POSIXct(date, tz = Sys.getenv('TZ'))
  
  out = data.frame(t(temp[index[-c(1:3)],]))
    colnames(out) = cnames[-c(1:3)] 
    rownames(out) = temp[index['symbol'],]
  out
}
  
  
###############################################################################
# Get the latest prices from the Bigcharts:
# http://bigcharts.marketwatch.com/quotes/multi.asp?view=q&msymb=MSFT+AAPL+IBM
#' @export 
###############################################################################
#get.bigcharts.quote('ibm')
#get.bigcharts.quote('ibm,msft,c')
get.bigcharts.quote = function(symbolnames = spl('ibm,msft'), raw=F) {
	# download
	url = paste0('http://bigcharts.marketwatch.com/quotes/multi.asp?view=q&msymb=',join(spl(symbolnames),'+'))
	txt = get.url(url)

	# extract
	data = extract.table.from.webpage(txt, 'Change')
		colnames(data) = tolower(trim( colnames(data) ))
		data = trim(data)
		
	keep.index = nchar(data[,'symbol']) > 0
		data = data[keep.index,,drop=F]
		datan = mat2num(data)
	
	# xts
	temp = cbind(
		Open = datan[,'last'] + datan[,'change'],
		High = datan[,'high'],
		Low = datan[,'low'],
		Close = datan[,'last'],
		Volume = datan[,'volume'],
		Adjusted = datan[,'last']
		)		
	date = strptime( paste(format(Sys.Date(), '%Y-%m-%d'), data[,'time']), '%Y-%m-%d %I:%M %p')
	temp = make.xts(temp, order.by = date)
	
	out = list()
	for(i in 1:nrow(data))
		out[[ data[i,'symbol'] ]] = temp[i,]

	if(raw) out = lst(data=out, raw=data)
	out
}		
		

###############################################################################
# Get the latest prices from the Barcharts:
# https://www.barchart.com/stocks/quotes/MSFT
#' @export 
###############################################################################
#get.barcharts.quote('ibm')
get.barcharts.quote = function(ticker = 'ibm', raw=F) {
	# download
	url = paste0('https://www.barchart.com/stocks/quotes/', ticker)
	txt = get.url(url)

	# extract
	token = extract.token(txt, '<div class="bc-quote-overview row",data-ng-init,init\\(', "\\)'>")
	require(jsonlite)
	data = fromJSON(paste('[',token,']'))
	
	ticker = data[[1]]
	temp = data[[4]]$raw
	temp = cbind(
		Open = temp$openPrice,
		High = temp$highPrice,
		Low = temp$lowPrice,
		Close = temp$lastPrice,
		Volume = temp$volume,
		Adjusted = temp$lastPrice
		)		
	date = Sys.Date()
	temp = make.xts(temp, order.by = date)

	out = list()
	out[[ ticker ]] = temp
	if(raw) out = lst(data=out, raw=data[[4]]$raw)
	out
}		

		
###############################################################################
# Download historical intraday prices from Google Finance
#
# [Best Example](http://stackoverflow.com/questions/15609334/how-to-download-intraday-stock-market-data-with-r)
# as.POSIXct(1357828200-300*60, origin = '1970-01-01')
#
# http://www.mathworks.com/matlabcentral/fileexchange/32745-get-intraday-stock-price
# http://www.mathworks.com/matlabcentral/fileexchange/36115-volume-weighted-average-price-from-intra-daily-data
# http://www.codeproject.com/KB/IP/google_finance_downloader.aspx
# http://www.marketcalls.in/database/google-realtime-intraday-backfill-data.h
# getSymbol.intraday.google('GOOG','NASDAQ')
# getSymbol.intraday.google('.DJI','INDEXDJX')
#' @export 
###############################################################################
getSymbol.intraday.google <- function
(
  Symbol, 
  Exchange,
  interval = 60,  # 60 seconds
  period = '1d'
) 
{
  # download Key Statistics from yahoo  
  url = paste('https://finance.google.com/finance/getprices?q=', Symbol,
    '&x=', Exchange,
      '&i=', interval,
      '&p=', period,
      '&f=', 'd,o,h,l,c,v', sep='')

  txt = get.url(url)
  #write(txt, file='1.txt')
  #txt = read.file('google-spy-getprices.txt')
  
  lines = spl(txt,'\n')
  if(len(lines) < 7) {
    cat('Error getting data from', url, '\n')
    return(NULL)
  }
  
  marker = 'COLUMNS='
  map = c(DATE='Date', CLOSE='Close', HIGH='High', LOW='Low', OPEN='Open', VOLUME='Volume')	
  cols = map[ spl(gsub(marker, '', lines[ grep(marker,lines)[1] ])) ]
    
  marker = 'TIMEZONE_OFFSET='
  index = grep(marker,lines)

  load.packages('data.table')
  out = c()  
  for(i in 1:len(index)) {
    end = iif( i == len(index), -1,  index[(i+1)] - index[i] - 1)
    timezone.offset = as.double(gsub( marker, '', lines[index[i]] ))
    out = rbind(out, getSymbol.intraday.google.parse(txt, interval, index[i], end, timezone.offset, cols) )
  }
  out
}

getSymbol.intraday.google.parse = function(txt, interval, start, end, timezone.offset, cols) {
  out = fread(txt, stringsAsFactors = F, skip = start, nrows = end)
    setnames(out, cols)  
  
  # date logic
  date = out$Date
    date.index = substr(out$Date,1,1) == 'a'
    date = as.double(gsub('a','',date)) 
  temp = NA * date
    temp[date.index] = date[date.index]
    temp = ifna.prev(temp)
  date = temp + date * interval
    date[date.index] = temp[date.index] 
  #class(date) = c("POSIXt", "POSIXct")
  #date = date - (as.double(format(date[1],'%H')) - 9)*60*60
  date = as.POSIXct(date + timezone.offset*interval, origin = '1970-01-01')
  
  make.xts(out[,-1,with=F], date)
}

getSymbol.intraday.google.test = function() {
	# plot intraday spy over last 20 days
	data=getSymbol.intraday.google('SPY','NYSEARCA',period = '20d')

	col.names = spl('black,blue')
	cols = c(col.add.alpha(col.names[1],100), col.add.alpha(col.names[2],250))

	ret = Cl(data) / mlag(Cl(data)) - 1
		group = as.numeric(format(index(data),'%H%M'))
		stat = tapply(ret,group,mean, na.rm=T)	
			stat[1] = 0 # omit overnight return
			
		label = names(stat)
			label = iif(nchar(label)<4, paste0('0',label), label)
		dates = strptime(paste0('20170101 ', label, '01'), format='%Y%m%d %H%M%S')
		equity = 100*(cumprod(1+stat)-1)
			equity = xts(equity, dates)	
		plota(equity, type='l', col = cols[1], lwd=5, LeftMargin=3)
		
	# 2nd axis
	date = as.Date(index(ret))	
		stat = ret[date == last(date)]
		stat[1] = 0
		dates = strptime(paste0('20170101 ', format(index(stat),'%H%M'), '01'), format='%Y%m%d %H%M%S')
		equity = 100*(cumprod(1+stat)-1)
			equity = xts(coredata(equity), dates)	
			
		plota2Y(equity, las=1, col=cols[2], col.axis = cols[2])
			plota.lines(equity, type='l', col = cols[2])		
		plota.legend('20D Avg(rhs),Today(lhs)', col.names)
		
		  
#!!! add chart for trutrn intrday vs overnight!!!
}

###############################################################################
# getSymbols interface to Yahoo today's delayed qoutes
# based on getQuote.yahoo from quantmod package
#
# http://www.financialwisdomforum.org/gummy-stuff/Yahoo-data.htm
# https://github.com/joshuaulrich/quantmod/blob/master/R/getQuote.R
#
# getQuote.yahoo.info('DJP,IEF,KIE,RHS')
#
#' @export 
###############################################################################   
getQuote.yahoo.info <- function(Symbols, fields = c(
	Name='Name',
	Symbol='Symbol',
	Time='Last Trade Time',
	Date='Last Trade Date',
	Close='Last Trade (Price Only)',
	Volume='Volume',
	AvgVolume='Average Daily Volume',
	Yesterday='Previous Close'
	),
	load.hist = T
) {
	Symbols = spl(Symbols)
	out = getQuote.yahoo.today(Symbols, fields)
		out = as.data.frame.matrix(out)
		rownames(out) = out$Symbol

	if(!load.hist) return(out)
	
	data = env()
	getSymbols(Symbols, src = 'yahoo', from = '1980-01-01', env = data, auto.assign = T)	

	out$Start = bt.start.dates(data)[out$Symbol,]	
	out
}


#' @export 
getQuote.yahoo.today <- function(Symbols, fields = c(
	Name='Name',
	Symbol='Symbol',
	Time='Last Trade Time',
	Date='Last Trade Date',
	Open='Open',
	High='Days High',
	Low='Days Low',
	Close='Last Trade (Price Only)',
	Volume='Volume',
	Yesterday='Previous Close'
	)
) {
  require('data.table')
    what = yahooQF(names = fields)
    names = names(fields)
    Symbols = spl(Symbols)
    all.symbols = lapply(seq(1, len(Symbols), 100), function(x) na.omit(Symbols[x:(x + 99)]))
    out = c()
    
    for(i in 1:len(all.symbols)) {
        # download
        url = paste('http://download.finance.yahoo.com/d/quotes.csv?s=',
            join( trim(all.symbols[[i]]), ','),
            '&f=', what[[1]], sep = '')
        
    txt = join(readLines(url),'\n') 
    data = fread(paste0(txt,'\n'), stringsAsFactors=F, sep=',')
          setnames(data,names)
          setkey(data,'Symbol')       
        out = rbind(out, data)
    }
    out
}

###############################################################################
# extend GLD and SLV historical prices
#' @export 
###############################################################################
extend.GLD <- function(GLD) {
  extend.data(GLD, bundes.bank.data.gold(), scale=T)
}

# gold = extend.GLD(data$GLD)
# comm = extend.data(data$DBC, get.CRB(), scale=T)
#' @export 
extend.data <- function
(
  current,
  hist,
  scale = F
) 
{
  colnames(current) = sapply(colnames(current), function(x) last(spl(x,'\\.')))
  colnames(hist) = sapply(colnames(hist), function(x) last(spl(x,'\\.')))

  # find Close in hist
  close.index = find.names('Close', hist)
  if(len(close.index)==0) close.index = 1 
  adjusted.index = find.names('Adjusted', hist)
  if(len(adjusted.index)==0) adjusted.index = close.index 

  if(scale) {
    cur.close.index = find.names('Close', current)
    if(len(cur.close.index)==0) cur.close.index = 1 
    cur.adjusted.index = find.names('Adjusted', current)
    if(len(cur.adjusted.index)==0) cur.adjusted.index = cur.close.index 
  
    # find first common observation in current and hist series
    common = merge(current[,cur.close.index], hist[,close.index], join='inner')
    
    scale = as.numeric(common[1,1]) / as.numeric(common[1,2])
      
    if( close.index == adjusted.index ) 
      hist = hist * scale
    else {
      hist[,-adjusted.index] = hist[,-adjusted.index] * scale
      
      common = merge(current[,cur.adjusted.index], hist[,adjusted.index], join='inner')
      scale = as.numeric(common[1,1]) / as.numeric(common[1,2])
      hist[,adjusted.index] = hist[,adjusted.index] * scale
    }
  }
  
  # subset history before current
  hist = hist[format(index(current[1])-1,'::%Y:%m:%d'),,drop=F]
  
  #hist = make.xts( rep.col(hist[,adjusted.index], ncol(current)), index(hist)) 
  if( ncol(hist) != ncol(current) ) 
    hist = rep.col(hist[,adjusted.index], ncol(current))
  else
    hist = hist[, colnames(current)]
  
  colnames(hist) = colnames(current)
    
  rbind( hist, current )
}


# extend data from the previously saved proxies
#' @export 
extend.data.proxy <- function(data, data.proxy = NULL, proxy.filename = 'data.proxy.Rdata') {
  if(is.null(data.proxy) && file.exists(proxy.filename))
    load(file=proxy.filename)
    
    if(!is.null(data.proxy))
    for(n in ls(data.proxy))
        if( !is.null(data[[n]]) )
          data[[n]] = extend.data(data[[n]], data.proxy[[n]], scale=T)
}  
  
###############################################################################
# Leveraged series
###############################################################################  
# Create Leveraged series with data from the unlevereged.
#
# Please use only with Adjusted time series. For example create.leveraged(data$QQQ, leverage=2)
# will produce erroneous values because QQQ had 2: 1 Stock Split on Mar 20, 2000 
# Hence at 2x leverage the value goes to zero.
#
# @example create.leveraged(tlt, 2)
# @example extend.data(data$UBT, create.leveraged(data$TLT, leverage=2), scale=T)
# @example extend.data(data$TMF, create.leveraged(data$TLT, leverage=3), scale=T)
#' @export 
create.leveraged = function(hist, leverage=2) {
  rets = 1 + leverage * (hist / mlag(hist) - 1)
  rets[1,] = 1  
  bt.apply.matrix(rets, cumprod)
} 


create.leveraged.test = function() {
    tickers = spl('TMF,UBT,TLT')
    data = new.env()
    
    getSymbols(tickers, src = 'yahoo', from = '1970-01-01', env = data, auto.assign = T)   

    
    test2 = extend.data(data$UBT, create.leveraged(data$TLT, leverage=2), scale=T)
    test3 = extend.data(data$TMF, create.leveraged(data$TLT, leverage=3), scale=T)    
    proxy.test(list(TLT=data$TLT, UBT=test2, TMF=test3),price.fn=Ad)
  
    test0 = create.leveraged(data$TLT, leverage=2)    
    proxy.test(list(UBT=data$UBT, EXTEND=test0),price.fn=Ad)
    
    test0 = create.leveraged(data$TLT, leverage=3)    
    proxy.test(list(TMF=data$TMF, EXTEND=test0),price.fn=Ad)

    # please note the difference in the above extension is due to difference in the
    # underlying benchmarks. I.e.
    #    
    # http://www.proshares.com/funds/ubt.html
    # ProShares Ultra 20+ Year Treasury (UBT) seeks daily investment results
    # that correspond to two times (2x) the daily performance of the Barclays U.S. 20+ Year Treasury Bond Index.
    #
    # http://www.direxioninvestments.com/products/direxion-daily-20-year-treasury-bull-3x-etf
    # Direxion Daily 20+ Yr Trsy Bull 3X ETF (TMF) seeks daily investment results
    # that correspond to three times (3x) the daily performance of the NYSE 20 Year Plus Treasury Bond Index (AXTWEN).
}

###############################################################################
# Bundes Bank - long history of gold prices
# http://www.bundesbank.de/Navigation/EN/Statistics/Time_series_databases/Macro_economic_time_series/its_list_node.html?listId=www_s331_b01015_3
#' @export 
###############################################################################  
bundes.bank.data <- function(symbol) {
  url = paste('http://www.bundesbank.de/cae/servlet/CsvDownload?tsId=', symbol, '&its_csvFormat=en&mode=its', sep='')
  temp = read.csv(url, skip=5, header=F, stringsAsFactors=F)

  #hist = make.xts(as.double(temp[,2]), as.Date(temp[,1], '%Y-%m-%d'))    
  hist = make.xts(as.double(temp[,2]), as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%Y-%m-%d'))
    xts::tclass(hist) = 'Date'     
    colnames(hist)='Close'
  return( hist[!is.na(hist)] )
}

#' @export 
bundes.bank.data.gold <- function() {
  bundes.bank.data('BBEX3.D.XAU.USD.EA.AC.C05')
}


###############################################################################
# Pacific Exchange Rate Service - FX rates
# Daily data is maximum for 4 years
# http://fx.sauder.ubc.ca/data.html
# http://fx.sauder.ubc.ca/cgi/fxdata?b=USD&c=AUD&c=GBP&c=CAD&c=NOK&c=EUR&c=JPY&c=NZD&c=SEK&c=CHF&rd=&fd=1&fm=1&fy=2011&ld=31&lm=12&ly=2012&y=daily&q=volume&f=csv&o= 
#
# Example
# base.cur = 'USD'
# target.curs = 'AUD,CAD,EUR'
# fx.data = rbind(fx.sauder.data(2000, 2003, base.cur, target.curs), 
#       fx.sauder.data(2004, 2007, base.cur, target.curs), 
#       fx.sauder.data(2008, 2011, base.cur, target.curs),
#       fx.sauder.data(2012, 2012, base.cur, target.curs))
#' @export 
###############################################################################  
fx.sauder.data <- function(start.year, end.year, base.cur, target.curs) {
  url = paste('http://fx.sauder.ubc.ca/cgi/fxdata?b=', base.cur, join(paste('&c=', spl(target.curs), sep='')), '&rd=&fd=1&fm=1&fy=', start.year, '&ld=31&lm=12&ly=', end.year, '&y=daily&q=volume&f=csv&o=', sep='')
  temp = read.csv(url, skip=1, header=T, stringsAsFactors=F)

  #hist = make.xts(as.matrix(temp[,-c(1:3)]), as.Date(temp[,2], '%Y/%m/%d'))    
  hist = make.xts(as.matrix(temp[,-c(1:3)]), as.POSIXct(temp[,2], tz = Sys.getenv('TZ'), format='%Y/%m/%d'))
    xts::tclass(hist) = 'Date'
    colnames(hist) = gsub(paste('.', base.cur, sep=''), '', colnames(hist))
    
  return( hist[!is.na(hist[,1]),] )
}


###############################################################################
# Download historical prices from Pi Trading - Free Market Data
# http://pitrading.com/free_market_data.htm
#' @export 
###############################################################################
getSymbols.PI <- function
(
  Symbols, 
  env = .GlobalEnv, 
  auto.assign = TRUE,
  download = TRUE 
) 
{
  # setup temp folder
  temp.folder = paste(getwd(), 'temp', sep='/')
  dir.create(temp.folder, F)
  
  # read all Symbols
  for (i in 1:len(Symbols)) { 
    if(download) {
      # http://pitrading.com/free_eod_data/SPX.zip
      url = paste('http://pitrading.com/free_eod_data/', Symbols[i], '.zip', sep='')
      filename = paste(temp.folder, '/', Symbols[i], '.zip', sep='')      
      download.file(url, filename,  mode = 'wb')
      
      # unpack
      unzip(filename, exdir=temp.folder)  
    }
    
    filename = paste(temp.folder, '/', Symbols[i], '.txt', sep='')
    
    temp = read.delim(filename, header=TRUE, sep=',')   
    #out = make.xts(temp[,-1], as.Date(temp[,1],'%m/%d/%Y'))
    out = make.xts(temp[,-1], as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%m/%d/%Y'))
      xts::tclass(out) = 'Date'
      out$Adjusted = out$Close
      
cat(i, 'out of', len(Symbols), 'Reading', Symbols[i], '\n', sep='\t')         
      
    if (auto.assign) {    
      assign(paste(gsub('\\^', '', Symbols[i]), sep='_'), out, env) 
    } 
  }
  if (!auto.assign) {
    return(out)
  } else {    
    return(env)       
  } 
}
    

###############################################################################
# Download FX qoutes: end of day and hourly
# http://www.fxhistoricaldata.com/EURUSD/
#' @export 
###############################################################################
getSymbols.fxhistoricaldata <- function
(
  Symbols, 
  type = spl('hour,day'),
  env = .GlobalEnv, 
  auto.assign = TRUE,
  download = FALSE,
  name.has.type = TRUE
) 
{   
  type = type[1]
  type0 = paste0(type,'_')
  
  # setup temp folder
  temp.folder = paste(getwd(), 'temp', sep='/')
  dir.create(temp.folder, F)
  
  # read all Symbols
  for (i in 1:len(Symbols)) { 
    if(download) {
      # http://www.fxhistoricaldata.com/download/EURUSD_hour.zip
      url = paste('http://www.fxhistoricaldata.com/download/', Symbols[i], '_', type, '.zip', sep='')
      filename = paste(temp.folder, '/', Symbols[i], '_', type, '.zip', sep='')
      download.file(url, filename,  mode = 'wb')
      
      # unpack
      unzip(filename, exdir=temp.folder)  
    }
  
    filename = paste(temp.folder, '/', Symbols[i], '_', type, '.csv', sep='')

    temp = read.delim(filename, header=TRUE, sep=',')   
      colnames(temp) = gsub('[X\\.|\\.]', '', colnames(temp))     
    out = make.xts(temp[,spl('OPEN,LOW,HIGH,CLOSE')], 
      strptime(paste(temp$DATE, temp$TIME), format='%Y%m%d %H:%M:%S'))
      
cat(i, 'out of', len(Symbols), 'Reading', Symbols[i], '\n', sep='\t')         
      
    if (auto.assign) {    
      assign(paste0(gsub('\\^', '', Symbols[i]), iif(name.has.type,type0,'')), out, env)  
    } 
  }
  if (!auto.assign) {
    return(out)
  } else {    
    return(env)       
  } 
}


###############################################################################
# Download historical data for G10
# The PowerShares DB G10 Currency Harvest Fund
# http://www.invescopowershares.com/products/overview.aspx?ticker=DBV
#
# The G10 currency universe from which the Index selects currently includes 
# U.S. dollars, 
# euros, 
# Japanese yen, 
# Canadian dollars, 
# Swiss francs, 
# British pounds, 
# Australian dollars, 
# New Zealand dollars, 
# Norwegian krone and 
# Swedish krona
#' @export 
###############################################################################
get.G10 <- function
(
  type = spl('currency')
)
{
  if( type[1] != 'currency') {
    cat('Warning:', type[1], 'is not yet implemented in getG10 function\n')
    return()
  }

  # FRED acronyms for daily FX rates
map = '
FX          FX.NAME        
DEXUSAL     U.S./Australia 
DEXUSUK     U.S./U.K.      
DEXCAUS     Canada/U.S.    
DEXNOUS     Norway/U.S.    
DEXUSEU     U.S./Euro      
DEXJPUS     Japan/U.S.     
DEXUSNZ     U.S./NewZealand
DEXSDUS     Sweden/U.S.    
DEXSZUS     Switzerland/U.S.
'
  
  map = matrix(scan(text = map, what='', quiet=T), nc=2, byrow=T)
    colnames(map) = map[1,]
    map = data.frame(map[-1,], stringsAsFactors=F)

  # convert all quotes to be vs U.S.
  convert.index = grep('DEXUS',map$FX, value=T) 

    #*****************************************************************
    # Load historical data
    #****************************************************************** 
    load.packages('quantmod')

  # load fx from fred
  data.fx <- new.env()
  quantmod::getSymbols(map$FX, src = 'FRED', from = '1970-01-01', env = data.fx, auto.assign = T)   
    for(i in ls(data.fx)) data.fx[[i]] = na.omit(data.fx[[i]])
    for(i in convert.index) data.fx[[i]] = 1 / data.fx[[i]]

  # extract fx where all currencies are available
  bt.prep(data.fx, align='remove.na')
  fx = bt.apply(data.fx, '[')
  
  return(fx)
}


###############################################################################
# Download Strategic Portfolios from wealthsimple.com
# https://help.wealthsimple.com/hc/en-ca/sections/115000097507-Portfolio-Features-and-Performance
# 
#https://help.wealthsimple.com/hc/en-ca/articles/214187018-How-has-the-Growth-portfolio-performed-
#https://help.wealthsimple.com/hc/en-ca/articles/214187188-How-has-the-Balanced-portfolio-performed-
#https://help.wealthsimple.com/hc/en-ca/articles/214880087-How-has-the-Conservative-portfolio-performed-
#https://help.wealthsimple.com/hc/en-ca/articles/220391888-How-has-the-Growth-SRI-portfolio-performed-
#https://help.wealthsimple.com/hc/en-ca/articles/220741607-How-has-the-Balanced-SRI-portfolio-performed-
#https://help.wealthsimple.com/hc/en-ca/articles/220391588-How-has-the-Conservative-SRI-portfolio-performed-
#
#
#http://faq.wealthsimple.com/article/121-how-has-the-risk-level-1-portfolio-performed
#http://faq.wealthsimple.com/article/130-how-has-the-risk-level-10-portfolio-performed
#http://faq.wealthsimple.com/article/127-how-has-the-risk-level-7-portfolio-performed
#' @export 
###############################################################################     
wealthsimple.portfolio = function(
	portfolio = spl('Conservative,Balanced,Growth'),
	type = spl(',SRI')
) 
{
	portfolio = portfolio[1]
	type = type[1]
	
	map = c(
		Growth = 'https://help.wealthsimple.com/hc/en-ca/articles/214187018-How-has-the-Growth-portfolio-performed-'
		,Balanced = 'https://help.wealthsimple.com/hc/en-ca/articles/214187188-How-has-the-Balanced-portfolio-performed-'
		,Conservative = 'https://help.wealthsimple.com/hc/en-ca/articles/214880087-How-has-the-Conservative-portfolio-performed-'
		,GrowthSRI = 'https://help.wealthsimple.com/hc/en-ca/articles/220391888-How-has-the-Growth-SRI-portfolio-performed-'
		,BalancedSRI = 'https://help.wealthsimple.com/hc/en-ca/articles/220741607-How-has-the-Balanced-SRI-portfolio-performed-'
		,ConservativeSRI='https://help.wealthsimple.com/hc/en-ca/articles/220391588-How-has-the-Conservative-SRI-portfolio-performed-'
	)
	
	# download
	url = map[ paste0(portfolio,type) ]
	txt = get.url(url)

	# extract
	# [Removing non-ASCII characters from data files](https://stackoverflow.com/questions/9934856/removing-non-ascii-characters-from-data-files)
	temp = iconv(txt, 'latin1', 'ASCII', sub='')
	temp = extract.table.from.webpage(temp, 'Breakdown,tbody', has.header = F)
		colnames(temp) = spl('name,ticker,description,weight')
		info = temp

	# parse
	temp = gsub(pattern = '%', replacement = '', temp)
	temp  = trim(temp[,c(2,4)])
	temp  = temp[!is.na(temp[,1]),]

	# create output
	weight = as.numeric(temp[,2])
		names(weight) = temp[,1]
	lst(weight, info)	
}

	
wealthsimple.portfolio.test = function() {
	# create list of all portolios
	portfolios = list()
	for(i in spl('Conservative,Balanced,Growth'))
		portfolios[[i]] = wealthsimple.portfolio(i)$weight
		
	tickers = lapply(portfolios, names)
		tickers = unique( unlist( tickers ) )
		
	data = sapply(portfolios, function(w) ifna(w[tickers],0)/100)
		rownames(data) = tickers
		
	plota.stacked(1:3, t(data), flip.legend = T, type='s', xaxp=c(1,3,2), las=1,
		main='Wealthsimple Transition Matrix', xlab='Risk Portfolio')
}	
	

###############################################################################
# getSymbols interface to tradingblox free futures and forex data
# http://www.tradingblox.com/tradingblox/free-historical-data.htm
# http://www.tradingblox.com/Data/DataOnly.zip
# Date, Open, High, Low, Close, Volume (zero for forex cash markets), 
# Open Interest (futures only), Delivery Month ( YYYYMM futures only), 
# Unadjusted Close (zero for forex cash markets)
#' @export 
###############################################################################     
getSymbols.TB <- function(
  env = .GlobalEnv, 
  auto.assign = TRUE,
  download = FALSE,
  type = c('Both', 'Futures', 'Forex'),
  rm.index =  'PB',   # remove Pork Bellies because not traded
  clean = FALSE,
  custom.adjustments = TRUE
) 
{
  # download zip archive
  if(download) {
    download.file('http://www.tradingblox.com/Data/DataOnly.zip', 'DataOnly.zip')
  }

  # setup temp folder
  temp.folder = paste(getwd(), 'temp', sep='/')
  dir.create(temp.folder, F)
    
  ##*****************************************************************
  ##  Unzip
  ##****************************************************************** 
  temp.folder = paste(getwd(), '/', 'temp', sep='')

  # clean temp
  if(clean) shell('del /F /S /Q temp\\*.*', wait = TRUE)
  
  # unpack
  files = unzip('DataOnly.zip', exdir=temp.folder)  
  
  # read definitions, based on Financial Instrument Model Infrastructure for R package from http://r-forge.r-project.org/R/?group_id=316
  def1 = try(read.csv('http://www.tradingblox.com/tradingblox/CSIUA/FuturesInfo.txt',skip=1,header=FALSE, stringsAsFactors=F),TRUE) 
  if(inherits(def1, 'try-error')) def1 = read.csv('FuturesInfo.txt',skip=1,header=FALSE, stringsAsFactors=F)      
    def1 = def1[-match(rm.index, def1[,1]),]  
    def1[,3] = 'Futures'
 
  def2 = try(read.csv('http://www.tradingblox.com/tradingblox/CSIUA/ForexInfo.txt',skip=1,header=FALSE, stringsAsFactors=F),TRUE)
  if(inherits(def2, 'try-error')) def2 = read.csv('ForexInfo.txt',skip=1,header=FALSE, stringsAsFactors=F)      
    def2[,3] = 'Forex'
  
  def = rbind(def1[,1:4], def2[,1:4])       
  if(type[1] == 'Futures') def = def1[,1:4]
  if(type[1] == 'Forex') def = def2[,1:4] 
  
    
  # read all files
  for( i in 1:nrow(def) ) {
    symbol = def[i,1]
    
    filename = paste(temp.folder, '/', def[i,3], '/', def[i,4], sep='')
    if(file.exists(filename)) {                   
      fr <- read.csv(filename, header = FALSE) 
      fr <- make.xts(fr[,-1], as.Date(as.character(fr[,1]),'%Y%m%d'))     
      colnames(fr) <- spl('Open,High,Low,Close,Volume,OpenInterest,DeliveryMonth,Unadjusted')[1:ncol(fr)]
      fr$Adjusted = fr$Close
      if (auto.assign) assign(symbol, fr, env)
cat(i, 'out of', nrow(def), 'Reading', symbol, format(index.xts(fr)[1],'%Y%m%d'), format(index.xts(fr)[nrow(fr)],'%Y%m%d'), '\n', sep='\t')   
    } else {
cat('\t\t\t Missing data for ', symbol, '\n');
    }
  }

  #*****************************************************************
  # Add symbolnames, symbol.descriptions, and symbol.groups
  #******************************************************************     
  index = match(ls(env)[ na.omit(match(def[,1], ls(env))) ], def[,1]) 

  
  temp = def[index,1]
    names(temp) = def[index,1]
    env$symbolnames = temp

  temp = def[index,2]
    names(temp) = def[index,1]
    env$symbol.descriptions = temp

  temp = def[index,3]
    names(temp) = def[index,1]
    env$symbol.groups = temp            
      
  #*****************************************************************
  # Process symbol descriptions to be more readable
  #******************************************************************   
  names = trim(gsub(pattern = '\\(.*?\\)', replacement = '', env$symbol.descriptions, perl = TRUE))
    names = trim(gsub('-NYMEX','',names,ignore.case =T))
    names = trim(gsub('-COMEX','',names,ignore.case =T))
    names = trim(gsub('-CBT','',names,ignore.case =T))
    names = trim(gsub('-CME-','',names,ignore.case =T))
    names = trim(gsub('-CME','',names,ignore.case =T))
    names = trim(gsub('-NYCE','',names,ignore.case =T))
    names = trim(gsub('-Globex','',names,ignore.case =T))
    names = trim(gsub('-FINEX','',names,ignore.case =T))
    names = trim(gsub('-CSCE','',names,ignore.case =T))
    names = trim(gsub(' w/Prj A','',names,ignore.case =T))
   
  env$symbol.descriptions.print = names
  
  #*****************************************************************
  # Custom adjustments 
  #******************************************************************       
  data = env
  
  # fix DX time series - fixed by the Trading Blox
  # if(!is.null(data$DX)) data$DX['::2007:04:04', 'Unadjusted'] = coredata(data$DX['::2007:04:04']$Unadjusted * 10)
  
 if( custom.adjustments ) {
	#*****************************************************************
	# filename = 'temp\\Futures\\LH_0_I0B.TXT'
	# fr <- read.csv(filename, header = FALSE) 
	# fr <- make.xts(fr[,-1], as.Date(as.character(fr[,1]),'%Y%m%d'))     
	# colnames(fr) <- spl('Open,High,Low,Close,Volume,OpenInterest,DeliveryMonth,Unadjusted')
	# LH = fr
	# LH$DiffClose = LH$Close - mlag(LH$Close)
	# LH$DiffUnadjusted = LH$Unadjusted - mlag(LH$Unadjusted)
	# LH = LH['2000:03:08::2000:03:15',spl('Close,DeliveryMonth,Unadjusted,DiffClose,DiffUnadjusted')]
	# LH
	# 
	#              Close DeliveryMonth Unadjusted DiffClose DiffUnadjusted
	# 2000-03-08 187.150        200004     59.450    -0.150         -0.150
	# 2000-03-09 188.450        200004     60.750     1.300          1.300
	# 2000-03-10 189.750        200004     62.050     1.300          1.300
	# 2000-03-13 189.600        200006     71.175    -0.150          9.125
	# 2000-03-14 189.575        200006     71.150    -0.025         -0.025
	# 2000-03-15 189.325        200006     70.900    -0.250         -0.250
	# 
	# There is a roll 2000-03-10, we switch from contract expiring on 200004 to contract expiring on 200006
	# The returns on 
	# 2000-03-09: 1.300 / 59.450
	# 2000-03-10: 1.300 / 60.750
	# # for the first day after roll, let's use denominator as the price of new contract 
	# # because numerator is the change in price of new contract
	# # the 71.175 -  -0.150 is the price on new contract on 2000-03-10
	# 2000-03-13: -0.150 / (71.175 -  -0.150)
	# 2000-03-14: -0.025 / 71.175
	# 2000-03-15: -0.025 / 71.150 
	#*****************************************************************
 
  #*****************************************************************
  # To compute returns and backtest, recreate each futures series:
  #
  #           (unadjusted-futures[t-1] + (back-adjusted-futures[t] - back-adjusted-futures[t-1])) 
  # futures-return[t] = --------------------------------------------------------------------------------------------------  - 1
  #           unadjusted-futures[t-1] 
  #******************************************************************       
  for(i in data$symbolnames[data$symbol.groups != 'Forex']) {
    # find rolls; alternatively can use DeliveryMonth field
    spot = as.vector(data[[i]]$Unadjusted)
      dspot = spot - mlag(spot)
    futures = as.vector(data[[i]]$Adjusted)
      dfutures = futures - mlag(futures)
    index = which(round(dspot - dfutures,4) != 0 )
  
    # for return calculations set spot on the roll to the new contract price
	spot.adjust.roll = spot
      spot.adjust.roll[(index-1)] = spot.adjust.roll[index] - dfutures[index]
      
    # compute returns
    reta = dfutures / mlag(spot.adjust.roll)
      reta[1] = 0
      n = len(spot)
    
    new.series = cumprod(1 + reta)
	# make new series match the last spot price
    data[[i]]$Adjusted = data[[i]]$Close = spot[n] * new.series / new.series[n]
  }
}  
  
  #*****************************************************************
  # Done 
  #******************************************************************             
  if (!auto.assign) {
    return(fr)
  } else {    
    return(env)
  } 
}

###############################################################################     
# Data contains historical time series for both 
# * Spot ( Unadjusted - unadjusted-futures ) and 
# * Future ( Adjusted - back-adjusted-futures)
#
# First step, I updated Spot to include roll yield (i.e. spot.adjust.roll)
# Next, I computed returns as change in futures relative to the spot.adjust.roll level
#I.e. return is
#
# (unadjusted-futures[t-1] + (back-adjusted-futures[t] - back-adjusted-futures[t-1])) 
# ------------------------------------------------------------------------------------  - 1
# unadjusted-futures[t-1] 
#
#   Change in back-adjusted-futures
# = --------------------------------
#   Prior Unadjusted-futures level (which i adjusted for roll yield)
#
# http://www.automated-trading-system.com/crude-oil-contango-and-roll-yield-for-commodity-trading/
###############################################################################     
getSymbols.TB.test = function() {
	filename = 'temp/Futures/CL20_I0B.TXT'
	i = 'CL'
	data = env()
      fr <- read.csv(filename, header = FALSE) 
      fr <- make.xts(fr[,-1], as.Date(as.character(fr[,1]),'%Y%m%d'))     
      colnames(fr) <- spl('Open,High,Low,Close,Volume,OpenInterest,DeliveryMonth,Unadjusted')[1:ncol(fr)]
      fr$Adjusted = fr$Close
      data[[i]] = fr

	# Unadjusted is Spot (unadjusted-futures)
	# Adjusted is Close is Future (back-adjusted-futures)

    # adjust spot for roll overs
    spot = as.vector(data[[i]]$Unadjusted)
      dspot = spot - mlag(spot)
    futures = as.vector(data[[i]]$Adjusted)
      dfutures = futures - mlag(futures)
    index = which(round(dspot - dfutures,4) != 0 )
  
    spot.adjust.roll = spot
      spot.adjust.roll[(index-1)] = spot.adjust.roll[index] - dfutures[index]
      
    # compute returns
    reta = (mlag(spot.adjust.roll) + futures - mlag(futures)) / mlag(spot.adjust.roll)
      reta[1] = 1
      n = len(spot)
    
    new.series = cumprod(reta)
    Close = spot[n] * new.series / new.series[n]    
    
	plot.data = as.xts(list(
		Unadjusted = data[[i]]$Unadjusted, 
		Adjusted = data[[i]]$Adjusted,
		Implied.Close = make.xts(Close, index(data[[i]]))
	))

png(filename = 'plot_CL_2009.png', width = 600, height = 500, units = 'px', pointsize = 12, bg = 'white')			    

	plota.matplot( scale.one(plot.data['2009']), main='Crude oil, CL - 2009')
	
dev.off()	
	
}


###############################################################################
# Kenneth R. French - Data Library
# http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
###############################################################################
# http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors.zip
# http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_weekly.zip
# http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily.zip
#
# data2 = get.fama.french.data('F-F_Research_Data_Factors', periodicity = 'weeks',download = F, clean = F)
# data3 = get.fama.french.data('6_Portfolios_2x3', periodicity = 'days',download = F, clean = F)  
#' @export 
###############################################################################
get.fama.french.data <- function(
  name = c('F-F_Research_Data_Factors', 'F-F_Research_Data_Factors'),
  periodicity = c('days','weeks', 'months'),
  force.download = FALSE,
  clean = FALSE,
  file.suffix = '_TXT'
) 
{
	warning('get.fama.french.data is depreciated as of Apr 25, 2016 please use data.ff function instead')
	data.ff(name, periodicity, force.download, clean, file.suffix)
}

#' @export 
data.ff <- function(
  name = c('F-F_Research_Data_Factors', 'F-F_Research_Data_Factors'),
  periodicity = c('days','weeks', 'months'),
  force.download = FALSE,
  clean = FALSE,
  file.suffix = '_TXT'
) 
{
  # map periodicity
  map = c(days = '_daily', weeks = '_weekly', months = '')
  
  # url
  period = ifna(map[periodicity[1]], periodicity[1])
  filename.zip = paste(name[1], period, file.suffix, '.zip', sep='')
  filename.txt = paste(name[1], period, gsub('_','.',file.suffix), sep='')  
  url = paste('http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/', filename.zip, sep='')
        
  # download zip archive
  if( !file.exists(filename.zip) || force.download) 
    download.file(url, filename.zip)

## download using curl  !!!!

  # setup temp folder
  temp.folder = paste(getwd(), 'temp', sep='/')
  dir.create(temp.folder, F)
    
  ##*****************************************************************
  ##  Unzip
  ##****************************************************************** 
  temp.folder = paste(getwd(), '/', 'temp', sep='')

  # clean temp
  if(clean) shell('del /F /S /Q temp\\*.*', wait = TRUE)
  
  # unpack
  files = unzip(filename.zip, exdir=temp.folder)  

  if(len(files) == 1) {
  	filename = paste(temp.folder, '/', filename.txt, sep='')
	if(file.suffix != '_CSV')	
  	  return( data.ff.internal.one.file(filename) )
	else
  	  return( data.ff.internal.one.file.csv(filename) )	
  }
  
  data = env()
  library(stringr)
  names = str_trim(str_match(files,'.*/(.*)\\..*')[,2])
  for(i in 1:len(files))
  	if(file.suffix != '_CSV')  
  	  data[[ names[i] ]] = data.ff.internal.one.file(files[i])
	else
  	  data[[ names[i] ]] = data.ff.internal.one.file.csv(files[i])
  
  data  
}  
  

data.ff.internal.one.file.csv = function(filename) {  
  out = readLines(filename)
    index = which(nchar(out) == 0)
  
  data.index = grep('^[ 0-9\\.\\+-\\,]+$', out)
    temp.index = which(diff(data.index) > 1)
  data.index = matrix(data.index[sort(c(1, temp.index, temp.index+1, len(data.index)))], nc=2, byrow=T)

  # extract sections  
  data = list() 
  for(i in 1:nrow(data.index)) {
    start.index = data.index[i,1] - 2
	name = trim(out[start.index])
	if(nchar(name) == 0) name = 'data'
    colnames = scan(text = out[start.index+1], what='', sep=',', quiet=T)	

    # re-read data  
    temp =  matrix(scan(filename, what = double(), sep=',', quiet=T,
      skip = (data.index[i,1]-1),  
      nlines = (data.index[i,2] - data.index[i,1]+1))
      , nc=len(colnames), byrow=T)
    
    date.format = '%Y%m%d'  
    date.format.add = ''
    date.format.n = nchar(paste(temp[1,1]))
    
    if( date.format.n == 6 ) {
      date.format.add = '01'    
    } else if( date.format.n == 4 ) {
      date.format.add = '0101'  
    } 

    find.name = function(name,data, i=0) if( is.null(data[[name]]) ) name else find.name(paste(name,i+1), data, i+1)    
    name = find.name(name, data)
    	
    data[[name]] = make.xts(temp[,-1], as.Date(paste(temp[,1], date.format.add, sep=''),date.format))
      colnames(data[[name]]) = colnames[-1]
  }
  data
}

  
# internal helper function
data.ff.internal.one.file = function(filename) {  
  out = readLines(filename)
    index = which(nchar(out) == 0)
  
  data.index = grep('^[ 0-9\\.\\+-]+$', out)
    temp.index = which(diff(data.index) > 1)
  data.index = matrix(data.index[sort(c(1, temp.index, temp.index+1, len(data.index)))], nc=2, byrow=T)

  # extract sections  
  data = list() 
  for(i in 1:nrow(data.index)) {
    start.index = index[which( index > data.index[i,1] ) - 1][1] + 1
    if(is.na(start.index)) start.index = index[len(index)] + 1
    end.index = data.index[i,1] - 1
    n.index = end.index - start.index + 1

    # column names
    name = 'data'
    colnames = scan(text = out[start.index], what='', quiet=T)
    if(n.index == 2) {
      name = trim(out[start.index])
      colnames = scan(text = out[end.index], what='', quiet=T)
      
      colnames1 = scan(text = out[end.index+1], what='', quiet=T)
      if(len(colnames) > len(colnames1)) {
        cindex = which(diff(gregexpr(' ',out[end.index+1])[[1]]) > 1)
        cindex = c(1, gregexpr(' ',out[end.index+1])[[1]][(cindex+1)], nchar(out[end.index])+1)
        colnames = rep('', len(cindex)-1)
        for(j in 2:len(cindex))
          colnames[j-1] = substr(out[end.index], cindex[j-1], cindex[j]-1)
        colnames = trim(colnames)
        colnames = colnames[nchar(colnames) > 0]        
      }
      
    } else if(n.index > 2) {
      name = trim(out[start.index])
      colnames0 = scan(text = out[(end.index-1)], what='', quiet=T)
      colnames1 = scan(text = out[end.index], what='', quiet=T)
      colnames = paste(rep(colnames0, each = len(colnames1) / len(colnames0)), colnames1, sep='.')      
    }
    colnames = gsub('-', '.', colnames)
    #out[start.index:end.index]

    # re-read data  
    temp =  matrix(scan(filename, what = double(), quiet=T,
      skip = (data.index[i,1]-1),  
      nlines = (data.index[i,2] - data.index[i,1]+1))
      , nc=len(colnames)+1, byrow=T)
    
    date.format = '%Y%m%d'  
    date.format.add = ''
    date.format.n = nchar(paste(temp[1,1]))
    
    if( date.format.n == 6 ) {
      date.format.add = '01'    
    } else if( date.format.n == 4 ) {
      date.format.add = '0101'  
    } 

    find.name = function(name,data, i=0) if( is.null(data[[name]]) ) name else find.name(paste(name,i+1), data, i+1)    
    name = find.name(name, data)
    	
    data[[name]] = make.xts(temp[,-1], as.Date(paste(temp[,1], date.format.add, sep=''),date.format))
      colnames(data[[name]]) = colnames   
  }
  return( data )
}


###############################################################################
# CBOE Futures
#' @export 
###############################################################################
download.helper <- function(url,download) {
  # setup temp folder
  temp.folder = paste(getwd(), 'temp', sep='/')
  dir.create(temp.folder, F)

  filename = paste0(temp.folder, '/', basename(url))
  if(download || !file.exists(filename))
    tryCatch({ download.file(url, filename, mode='wb') }, error = function(ex) cat('', file=filename))
  filename
}


# Update files, download = T
#for(y in date.year(Sys.Date()):(1+date.year(Sys.Date())))
# for(m in 1:12) {
#   temp = getSymbol.CBOE('VX', m, y, T)
#' @export 
getSymbol.CBOE <- function
(
  Symbol, 
  Month,
  Year,
  download = FALSE
) 
{

  # month codes of the futures
  m.codes = spl('F,G,H,J,K,M,N,Q,U,V,X,Z')
  
  url = paste0("http://cfe.cboe.com/Publish/ScheduledTask/MktData/datahouse/CFE_",
    m.codes[Month], substring(Year,3,4), '_', Symbol, '.csv')
  
  filename = download.helper(url, download)   
  if(file.exists(filename) && file.info(filename)$size > 1)
    read.xts(filename, format='%m/%d/%Y')
  else
    NULL
}


# SPX Volatility Term Structure
#' @export 
cboe.volatility.term.structure.SPX <- function(make.plot = T) {
  url = 'http://www.cboe.com/data/volatilityindexes/volatilityindexes.aspx'
  txt = join(readLines(url))

  temp.table = extract.table.from.webpage(txt, 'Trade Date', has.header = T)
    colnames(temp.table) = gsub(' ','.',trim(tolower(colnames(temp.table))))
    temp.table = data.frame(temp.table)
    temp.table$trade.date = as.POSIXct(temp.table$trade.date, format="%m/%d/%Y %I:%M:%S %p")
    temp.table$expiration.date = as.Date(temp.table$expiration.date, "%d-%b-%y")
    temp.table[,3] = as.numeric(as.character(temp.table[,3]))
    temp.table[,4] = as.numeric(as.character(temp.table[,4]))
  temp.table

  if(make.plot) {
    plot(temp.table$expiration.date, temp.table$vix, type = 'b', 
      main=paste('VIX Term Structure, generated ',  max(temp.table$trade.date)), 
      xlab = 'Expiration Month', ylab='VIX Index Level')
    grid()
  }
  
  temp.table
}


#' @export 
load.VXX.CBOE <- function() {
  # https://r-forge.r-project.org/scm/viewvc.php/pkg/qmao/R/getSymbols.cfe.R?view=markup&root=twsinstrument
  index = "::2007-03-25"
  fields = spl('Open,High,Low,Close,Settle')
 
  # days remaining before settlement, on a given date
  dr <- function(index, date) sum(index>date)

  data <- new.env()
  futures = list()  
  i = 1
  for(y in 2004:(1+date.year(Sys.Date())))
    for(m in 1:12) {
      temp = getSymbol.CBOE('VX', m, y)
      if(is.null(temp)) next    
      
	  temp = temp[!is.na(index(temp))]
      temp = temp[temp$Settle > 0]
      if(nrow(temp)==0) next    
      if(len(temp[index,1])> 0)
            temp[index,fields] = temp[index,fields]/10
          
          label = paste0(y*100+m)
          dates = index(temp)
                                  
      futures[[ i ]] = list()
      futures[[ i ]]$data = temp
      futures[[ i ]]$label = label
      futures[[ i ]]$index = dates
      futures[[ i ]]$settle.date = last(dates)
      # set roll period of each future
      if(i==1)
        futures[[ i ]]$dt = len(dates)
      else  
        futures[[ i ]]$dt = dr(dates, futures[[ i-1 ]]$settle.date)
          
          temp$i = i
          #number of business days in roll period
          temp$dt = futures[[ i ]]$dt
          # number of remaining dates in the first futures contract
          temp$dr = (len(dates)-1):0
      data[[ label ]] = temp
                  
          i = i + 1    
    }
      
  bt.prep(data, align='keep.all')
  
  # debug
  # bt.apply(data, function(x) x[,'dr'])[1150:1160,48:55]
  # count(t(coredata(bt.apply(data, function(x) x[,'Settle']))))
  
  data
}
  
#' @export 
extract.VXX.CBOE <- function(data, field, index, exact.match=T) {
  map  = 1:ncol(data$prices)
  
  temp = bt.apply(data, function(x) x[,field])
    temp = coredata(temp)
    
  t(apply(temp, 1, function(x) {
    if(exact.match) {
      pos = map[!is.na(x)][1] - 1
      x[(index + pos)]
    } else {
      pos = map[!is.na(x)][index]
      x[pos]      
    }
  }))
}


# Reconstructing VXX from CBOE futures data 
# http://tradingwithpython.blogspot.ca/2012/01/reconstructing-vxx-from-cboe-futures.html
# for VXX calculation see http://www.ipathetn.com/static/pdf/vix-prospectus.pdf
# page PS-20
#' @export 
reconstruct.VXX.CBOE <- function(exact.match=T) {
  data = load.VXX.CBOE()

  dt = extract.VXX.CBOE(data, 'dt', 1, exact.match)[1,]
  dr = extract.VXX.CBOE(data, 'dr', 1, exact.match)[1,]
  x  = extract.VXX.CBOE(data, 'Settle', 1:2, exact.match)
    
  # for VXX calculation see http://www.ipathetn.com/static/pdf/vix-prospectus.pdf
  # page PS-20: 1/2 months    
  # VXX Short-Term
  w = cbind(dr / dt, (dt - dr) / dt)  
  val.cur = rowSums(x * mlag(w))
  val.yest = rowSums(mlag(x) * mlag(w))
  ret = val.cur / val.yest - 1
  
  # on roll it is simply future2's return
  index = ifna(mlag(dr) == 0, F)
  ret[index] = (x[,1] / mlag(x[,2]) - 1)[index]
      
  # [Backtest of VXX Volatility ETN From 2004 Including Yearly Fees](https://sixfigureinvesting.com/2013/11/backtest-on-vxx/)
  af = 0.89 / 100
  dc = c(NA,diff(data$dates))
  
  Close = cumprod(1+ifna(ret,0))
  CloseAF = cumprod( (1+ifna(ret,0)) * (1-af/365)^ifna(dc,0) )
  
  VXX = make.xts(cbind(Close,CloseAF,x,dt,dr,ret), data$dates)
  
  # VXZ Mid-Term: 4,5,6,7 months
  x  = extract.VXX.CBOE(data, 'Settle', 4:7, exact.match)
  
  w = cbind(dr / dt, 1, 1, (dt - dr) / dt)  
  val.cur = rowSums(x * mlag(w))
  val.yest = rowSums(mlag(x) * mlag(w))
  ret = val.cur / val.yest - 1
  
  index = ifna(mlag(dr) == 0, F)
  ret[index] = (rowSums(x[,-4]) / mlag(rowSums(x[,-1])) - 1)[index]

  # [Backtest of VXX Volatility ETN From 2004 Including Yearly Fees](https://sixfigureinvesting.com/2013/11/backtest-on-vxx/)
  af = 0.89 / 100
  dc = c(NA,diff(data$dates))
  
  Close = cumprod(1+ifna(ret,0))  
  CloseAF = cumprod( (1+ifna(ret,0)) * (1-af/365)^ifna(dc,0) )
  
  VXZ = make.xts(cbind(Close,CloseAF,x,dt,dr,ret), data$dates)
  
  # debug
  # plota(VXZ,type='l',lwd=2) 
  # write.xts(VXZ, file='vix-mid.txt')  
    
  list(VXX = VXX, VXZ = VXZ)
}  


###############################################################################
# Load Country Codes from
# http://www.nationsonline.org/oneworld/country_code_list.htm
#' @export 
###############################################################################
country.code = function
(
  force.download = FALSE,
  data.filename = 'country.code.Rdata'
)
{ 
	if(!force.download && file.exists(data.filename)) {
		load(file=data.filename)
		return(temp)
	}
	
	url = 'http://www.nationsonline.org/oneworld/country_code_list.htm'
	
	library(curl)
	#curl_download(url, file,mode = 'wb',quiet=T)
	#txt = join(readLines(url))
	txt = rawToChar(curl_fetch_memory(url)$content)
	
	temp = extract.table.from.webpage(txt, 'Country or Area Name')
		temp = trim(temp[,c(2:5)])
		colnames(temp)=spl('name,code2,code3,code')
	save(temp,file=data.filename)
	temp
}


###############################################################################
# Search/Lookup tickers at http://markets.ft.com
#' 
#' @examples
#' \dontrun{ 
#' data.ft.search.ticker('s&p 500')
#' }
#' @export
#' @rdname DataFTFunctions
###############################################################################
data.ft.search.ticker = function
(
  search.field = 'tsx',
  sec.type = 'IN'
)
{
    #[search](http://markets.ft.com/Research/Markets/Company-Search?searchField=tsx&country=&secType=IN)
    url = paste0('http://markets.ft.com/Research/Markets/Company-Search?searchField=', curl_escape(search.field), '&country=&secType=', sec.type)
    
    library(curl)
	h = new_handle()	
	req = curl_fetch_memory(url, h)
	if(req$status_code != 200) 
		warning('error getting data, status_code:', req$status_code, 'for url:', url, 'content:', rawToChar(req$content))
	
    txt = rawToChar(req$content)
    
    # cat(txt, file='dump.txt') # save to analyze
    # gregexpr('TSEA:TOR',txt) # find location
    # substr(txt, 20400,20800)
    
    # extract links
    temp = gsub(pattern = '<a', replacement = '<td', txt, perl = TRUE)
    temp = gsub(pattern = '</a>', replacement = '</td>', temp, perl = TRUE)

    temp = extract.table.from.webpage(temp, 'Symbol,Exchange,Country')
	
	
        colnames(temp)[1:4] = spl('Name,Symbol,Exchange,Country')
    temp[,1:4]
}


###############################################################################
# List Index Members at http://markets.ft.com
#' [sp500](http://markets.ft.com/research/Markets/Tearsheets/Constituents?s=INX:IOM)
#' [tsx](http://markets.ft.com/research/Markets/Tearsheets/Constituents?s=TSEA:TOR)
#' [ftse](http://markets.ft.com/research/Markets/Tearsheets/Constituents?s=FTSE:FSI)
#'
#' [get industry/sector information from yahoo finance](https://ca.finance.yahoo.com/q/pr?s=RY.TO)
#' 
#' @examples
#' \dontrun{ 
#' data.ft.index.members('TSEA:TOR')
#' }
#' @export
#' @rdname DataFTFunctions
###############################################################################
data.ft.index.members = function
(
  ft.symbol = 'INX:IOM',
  force.download = FALSE,
  data.filename = paste0(gsub(':','_',ft.symbol),'.Rdata'),
  data.keep.days = 30
)
{
	# if NOT forced to download and file exists and file is less than 30 days old
	if( !force.download && 
		file.exists(data.filename) &&
		as.numeric(Sys.Date() - as.Date(file.mtime(data.filename))) <= data.keep.days
	) {
		load(file=data.filename)
		return(data)
	}	
	
	#h = handle_setopt(h, useragent = "moo=moomooo", referer)
    # 
	# Main Page
	#
    #[sp500](http://markets.ft.com/research/Markets/Tearsheets/Constituents?s=INX:IOM)
    url = paste0('http://markets.ft.com/research/Markets/Tearsheets/Constituents?s=', ft.symbol)
    	    
	#[The curl package: a modern R interface to libcurl](https://cran.r-project.org/web/packages/curl/vignettes/intro.html)
	library(curl)
	h = new_handle()
    txt = rawToChar(curl_fetch_memory(url, h)$content)
    	
    # extract links
    temp = gsub(pattern = '<a', replacement = '<td', txt, perl = TRUE)
    temp = gsub(pattern = '</a>', replacement = '</td>', temp, perl = TRUE)

    temp = extract.table.from.webpage(temp, 'Equities')
		
    # 
	# Paging
	#	
	library(stringr)
	token = extract.token(txt,'<div class="wsod-paging-key">','</div>')
	nstep = str_match(token,' data-ajax-paging-end-row="([0-9]+)">')[2]
		nstep = as.numeric(nstep)
	nfound = str_match(token,' data-ajax-paging-total-rows="([0-9]+)">')[2]
		nfound = as.numeric(nfound)

	data = matrix('',nr=nfound,nc=2)
		colnames(data) = spl('Name,Symbol') #spl('Name,Symbol,LastPrice,TodayChange,YearChange')
	data[1:nstep,] = temp[,1:2]
		
	#[PhantomJS](http://stackoverflow.com/questions/15739263/phantomjs-click-an-element)
	#[Short R tutorial: Scraping Javascript Generated Data with R](https://www.datacamp.com/community/tutorials/scraping-javascript-generated-data-with-r)	
	#[webshot::install_phantomjs()](https://cran.r-project.org/web/packages/webshot/index.html)	
    # 
	# URL options
	#	
	#[Firefox - Web Developer Tools]
	#	Log request and Response Bodies	
	token = extract.token(txt,'<div class="wsodHidden">','</div>')
		token = spl(token,'<input')	
	names = str_match(token,'data-ajax-param="([^"]*)"')[-1,2]
	values = str_match(token,'value="([^"]*)"')[-1,2]
	settings = as.list(sapply(1:len(names), function(i) { t=c(values[i]); names(t)=names[i]; t}))
		settings$ResetPaging = 'false'
		
	h = handle_setopt(h, referer=url)

	for(istart in seq(nstep+1,nfound,by=nstep)) {
		cat(istart, 'out of', nfound, '\n')

		settings$startRow = paste(istart)
		handle_setform(h,.list=settings)
			
		url = 'http://markets.ft.com/Research/Remote/UK/Tearsheets/IndexConstituentsPaging'
		txt = rawToChar(curl_fetch_memory(url, h)$content)
	
		#cat(txt, file='dump1.htm') # save to analyze	 	
		#txt0 = txt
		#[iconv('pretty\u003D\u003Ebig', "UTF-8", "ASCII")](http://stackoverflow.com/questions/17761858/converting-a-u-escaped-unicode-string-to-ascii)	
		#[Getting started with JSON and jsonlite](https://cran.r-project.org/web/packages/jsonlite/vignettes/json-aaquickstart.html)
		library(jsonlite)
		txt=fromJSON(txt)$html
	
		# extract links
		temp = gsub(pattern = '<a', replacement = '<td', txt, perl = TRUE)
		temp = gsub(pattern = '</a>', replacement = '</td>', temp, perl = TRUE)

		temp = extract.table.from.webpage(temp, has.header=F)
			
		data[istart:min(istart+nstep-1,nfound),] = temp[,1:2]
	}

	if( file.exists(data.filename) && requireNamespace('ftouch', quietly = T) ) {
		data.copy = data
		load(file=data.filename)
		
		if( all.equal(data.copy, data) ) {
			ftouch::touch(data.filename)
			return(data) 
		}
	}		
	
	save(data,file=data.filename)
	data
}

#
# data function template:
#
# introduce following parameters:
# force.download = FALSE - flag to indicate that data need to be updated
# data.filename = 'data.Rdata' - location to save data
# data.keep.days = 30 - number of days that data does not need to be refreshed
#
#	# if NOT forced to download and file exists and file is less than 30 days old
#	if( !force.download && 
#		file.exists(data.filename) &&
#		as.numeric(Sys.Date() - as.Date(file.mtime(data.filename))) <= data.keep.days
#	) {
#		load(file=data.filename)
#		return(data)
#	}	
#
# Once data is downloaded check if needs to be saved
#
#	if( file.exists(data.filename) && requireNamespace('ftouch', quietly = T) ) {
#		data.copy = data
#		load(file=data.filename)
#		
#		if( all.equal(data.copy,data) ) {
#			ftouch::touch(data.filename)
#			return(data) 
#		}
#	}		
#	
#	save(data,file=data.filename)
#	data
#}
#


###############################################################################
# Data from https://www.ishares.com/us/products/etf-product-list
#' 
#' [Download File in R with POST while sending data](https://stackoverflow.com/questions/34864162/download-file-in-r-with-post-while-sending-data)
#' 
#' @examples
#' \dontrun{ 
#' data.ishares.universe()
#' }
#' @export
#' @rdname DataFTFunctions
###############################################################################
data.ishares.universe = function
(
  portfolios="239726-239623-239458-239566-239706-239763-239708-239637-239710-239467-239774-239565-239665-239826-239707-239500-239725-239695-239718-239644-244049-239451-244050-239452-239728-239456-239454-239465-239561-239719-239626-239766-239699-239572-239463-239717-239714-239855-239712-239709-239455-239600-239627-239563-239762-239650-239764-239723-239536-239520-239482-239724-239466-259622-239775-239681-239659-239641-239612-239534-239773-239605-239615-239499-239628-239736-256101-239686-239522-239622-239601-239854-239464-239468-239674-268708-244048-239690-239619-239594-239511-239657-239607-239737-239744-239670-239512-239507-251614-239508-239685-239741-239524-239768-239772-239506-264617-239516-239423-239683-239513-239505-239746-258100-239713-239757-239460-239769-239453-239580-239664-239543-239514-239761-239715-239750-239510-239830-239756-251616-239758-239716-239540-272532-239502-239771-239450-264619-239740-239523-239457-259623-239519-239579-239720-259624-239678-244051-239501-239509-239731-239582-239503-239661-239667-264615-239765-239545-239680-239649-264623-239517-239751-239521-239729-239689-239705-239648-239588-239618-239528-239462-239688-239692-239669-239684-239730-239677-239581-270319-239675-239668-239739-239733-239696-239459-239518-239645-271054-239585-239767-239742-239671-239606-251465-239583-268704-239676-239586-239584-239753-239748-239430-251474-239745-239752-279626-239666-239550-239614-239424-239662-239722-239655-239734-268752-239526-258098-239610-271056-239831-239429-272342-239654-272341-239663-239629-254263-264507-239759-239587-239829-239461-239621-239551-258510-254551-239504-239672-239721-239642-239515-254553-272343-269394-272824-239609-239738-239529-239539-239544-239431-239527-254555-272340-260973-239660-264503-239537-271544-251476-239770-272822-239693-239443-272344-239735-272346-264273-264613-239656-273753-251477-272345-275382-239445-239653-276546-264542-264275-272112-264544-239613-264611-273746-276544-239570-239652-239651-239530-239525-239647-239620-260652-260975-239673-272819-254562-271540-258806-239552-283378-239691-271538-264127-239444-273743-273775-239638-275389-280052-273771-272825-275397-264606-253433-280049-272823-275384-271542-272821-273750-272820-273763-279286-270316-280048-280051-280050-275399-280769-273748-280771-273766-280774-273759-273768",
  # above list must be updated manually :(
  force.download = FALSE,
  data.filename = 'ishares.universe.Rdata',
  data.folder = paste(getwd(), 'data.ishares', sep='/'),
  data.keep.days = 30
)
{
	data.filename = file.path(data.folder, data.filename)
	
	# if NOT forced to download and file exists and file is less than 30 days old
	if( !force.download && 
		file.exists(data.filename) &&
		as.numeric(Sys.Date() - as.Date(file.mtime(data.filename))) <= data.keep.days
	) {
		load(file=data.filename)
		return(data)
	}
	
	# make sure folder exists
	dir.create(data.folder, F)
	
	# get data
	url = 'https://www.ishares.com/us/product-screener-download.dl'
		
    library(curl)
	h = new_handle()
	handle_setopt(h, useragent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0', referer='https://www.ishares.com/us/products/etf-product-list')
	handle_setopt(h, customrequest = 'POST')
	handle_setopt(h, postfields=paste0('productView=ishares&portfolios=', portfolios))

	req = curl_fetch_memory(url, h)
	if(req$status_code != 200) 
		warning('error getting data, status_code:', req$status_code, 'for url:', url, 'content:', rawToChar(req$content))
	
    txt = rawToChar(req$content)
	
	# nchar(txt)
	# write.file(txt,file='text.txt')
	
	# export data	
	temp = gsub('<table>', '<table>', txt, perl = T, ignore.case = T)
	temp = gsub('</table>', '</table>', temp, perl = T, ignore.case = T)
	temp = gsub('<row>', '<tr>', temp, perl = T, ignore.case = T)
	temp = gsub('</row>', '</tr>', temp, perl = T, ignore.case = T)
	temp = gsub('<cell', '<td', temp, perl = T, ignore.case = T)
	temp = gsub('</cell', '</td', temp, perl = T, ignore.case = T)
	
	temp = gsub('ss:MergeAcross="([0-9]+)"','>REP_\\1_<a', temp, perl=T)
	temp = gsub('ss:Index="([0-9]+)"','>IDX_\\1_<a', temp, perl=T)
	
	data = extract.table.from.webpage(temp, 'Ticker', has.header=F)
	
	# process headers, ugly
	library(stringr)
	header = data[1,]	
	index = str_match(header,'REP_([0-9]+)')[,2]
		index = as.numeric(index)
	header = gsub('REP_([0-9]+)_','', header, perl=T)
		
	temp = header
	j = 1
	for(i in 1:len(header))
		if( !is.na(index[i]) ) {
			temp[j:(j+index[i])] = header[i]
			j = j + index[i] + 1
		
		} else j = j + 1
	
	data[1,] = temp
	
	header = data[2,]	
	index = str_match(header,'IDX_([0-9]+)')[,2]
		index = as.numeric(index)
	header = gsub('IDX_([0-9]+)_','', header, perl=T)
		
	temp = rep('', len(header))
	for(i in 1:len(header))
		if( !is.na(index[i]) ) {
			temp[index[i]] = header[i]
		}
		
	data[2,] = temp
	
	# save data
	colnames(data) = trim(apply(data[1:2,],2,join, ' '))
		data = data[-c(1:2),]
		
	if( file.exists(data.filename) && requireNamespace('ftouch', quietly = T) ) {
		data.copy = data
		load(file=data.filename)
		
		if( all.equal(data.copy, data) ) {
			ftouch::touch(data.filename)
			return(data) 
		}
	}		
	
	save(data,file=data.filename)
	data
}


###############################################################################
# download Profile page from Yahoo Finance
# 
# [get industry/sector information from yahoo finance](https://ca.finance.yahoo.com/q/pr?s=RY.TO)
# data.yahoo.profile('RY.TO')
# 
#' @export 
###############################################################################
data.yahoo.profile = function(symbol) {	
	url = paste0('http://finance.yahoo.com/q/pr?s=', symbol)
	txt = get.url(url)
	
	temp = extract.table.from.webpage(txt, 'Sector', has.header = F)
		temp = gsub(':','',temp )
		out = temp[,2]
		names(out) = tolower(temp[,1])
	out
}


###############################################################################
# download URL with curl
# 
# get.url('http://money.cnn.com/data/dow30/')
# 
#' @export 
###############################################################################
get.url = function
(
	url,
	useragent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0',
	referer = NULL
)
{
    library(curl)
	h = new_handle()
	if( !is.null(useragent) ) handle_setopt(h, useragent = useragent)
	if( !is.null(referer) ) handle_setopt(h, referer=referer)

	req = curl_fetch_memory(url, h)
	if(req$status_code != 200) 
		warning('error getting data, status_code:', req$status_code, 'for url:', url, 'content:', rawToChar(req$content))
	
    txt = rawToChar(req$content)
	txt
}	
	

###############################################################################
# Load FOMC dates
# http://www.federalreserve.gov/monetarypolicy/fomccalendars.htm
# http://www.federalreserve.gov/monetarypolicy/fomchistorical2008.htm
# http://quant.stackexchange.com/questions/141/what-data-sources-are-available-online
# http://www.returnandrisk.com/
#' @export 
###############################################################################
get.FOMC.dates <- function
(
  force.download = FALSE,
  data.filename = 'fomc.Rdata'
)
{
	warning('get.FOMC.dates is depreciated as of Apr 25, 2016 please use data.fomc function instead')
	data.fomc(force.download, data.filename)
}

#' @export
data.fomc <- function
(
  force.download = FALSE,
  data.filename = 'fomc.Rdata'
)
{ 
  # download data 
  url = 'http://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'
  txt = join(readLines(url))
	
  library(stringr)
  
  # extract data from page
  sb = string.buffer()
  for(year in 2009:(1 + date.year(Sys.Date()))) {
    temp = extract.table.from.webpage(txt, paste(year,'FOMC Meetings'))
    if(nrow(temp) == 0) next
    
    temp = tolower(trim(temp[,1:2]))
      temp = temp[nchar(temp[,1]) > 0,]
    month = temp[,1]
    day = temp[,2]
    
    status = paste0(
    	ifna( str_match(day, '\\(.*\\)'), ''),
    	ifna( str_match(day, '\\*'), '')
    )
    status = gsub('\\(','',gsub('\\)','',status))
    
    day = str_replace(day, '(\\(.*?\\))','')
    day = str_replace(day, '\\*','')
    
    day = paste(year, 
	    sapply( iif(grepl('/',month), month, paste0(month,'/',month)), spl, '/'),
    	sapply( iif(grepl('-',day), day, paste0(day,'-',day)), spl, '-')
    )
    day = matrix(day, nc=2, byrow=T)
    
    for(i in 1:len(status)) add(sb, day[i,1], day[i,2], status[i])
  }
  data = matrix(scan(what='',text= string(sb),sep=',', quiet=T),nc=3,byrow=T)
  	close(sb)
	sb=NULL
	  
  first.year = min(as.numeric(substr(data[,1],1,4)))
  recent.data = data
    
  # check if update is needed
  if(!force.download && file.exists(data.filename)) {
    load(file=data.filename)
    # check if data needs to be updates
    if( last(FOMC$day) == as.Date(last(recent.data[,2]),'%Y %B %d') )   
    	return(FOMC)
  }
  
  # extract data from page
  sb = string.buffer()
  for(year in 1936:(first.year-1)) {
    cat(year,'\n')
    url = paste0('http://www.federalreserve.gov/monetarypolicy/fomchistorical', year, '.htm')
    txt = join(readLines(url))
      
    tokens = spl(txt,'<div id="historical">')
    
    for(token in tokens[-1])
    	add(sb, colnames(extract.table.from.webpage(token, 'year'))[1])
  }
  
  data = scan(what='',text= string(sb),sep='\n', quiet=T)
  	close(sb)
	sb=NULL
  
  # remove year
  year = substring(data,nchar(data)-3)
  day = tolower(substring(data,1,nchar(data)-4))
  # remove Conference Call, Conference Calls, Meeting, Meetings
  status = paste0(
    	iif(grepl('conference call',day), 'conference call', ''),
    	iif(grepl('meeting',day), 'meeting', '')
  )
  
  day = gsub('conference call', '', gsub('conference calls','',day))
  day = gsub('meeting', '', gsub('meetings','',day))
    
  day = gsub(',', '-', gsub('and', '',day))
  
#[870] "october 15 "
#[871] "november 2-3 "
#[872] "december 14 "
#[615] "october 21- 22- 23- 26- 27- 28- 29-  30  "
#[680] "june 30-july 1 "  


	# helper fn
	parse.token = function(year, token) {
		parts = trim(spl(token,'-'))
		n = len(parts)
		if( n > 1 ) {
			month = ifna.prev(iif(nchar(parts) > 3,
				sapply(parts, function(x) spl(x, ' ')[1]), # first token
				NA))
			parts = iif(nchar(parts) > 3, parts, paste(month, parts))
		}
		paste(year, parts[c(1,n)])
	}

	day = sapply(1:len(day), function(i) parse.token(year[i], day[i]))	
	all.data = rbind(cbind(t(day), status), recent.data)
	
  FOMC = list(day = as.Date(all.data[,2],'%Y %B %d'), start.day = as.Date(all.data[,1],'%Y %B %d'), status=all.data[,3])
  save(FOMC,file=data.filename)
  FOMC
} 

# todo comute a$start.day - mlag(a$day) in buisness days
# parse Minutes: See end of minutes of October 29-30 meeting / (Released November 20, 2013)
# Minutes: See end of minutes of December 11 meeting / Minutes (Released Jan 2, 2008)


###############################################################################
# Get EDGAR info
# www.sec.gov/cgi-bin/browse-edgar?CIK=AAPL&Find=Search&owner=exclude&action=getcompany
# mktstk.wordpress.com/2015/03/03/sic-lookup-by-stock-symbol/
#' @export 
###############################################################################
edgar.info <- function(ticker)
{
  # please note readLines only works with http, for more detail please read
  # http://stackoverflow.com/questions/26540485/readlines-does-not-read-from-https-url-when-called-from-systemrscript
  url = paste0('http://www.sec.gov/cgi-bin/browse-edgar?CIK=', ticker, '&Find=Search&owner=exclude&action=getcompany')
  txt = join(readLines(url))
  out = list()

  # extract table from this page
  temp = extract.table.from.webpage(txt, 'seriesDiv,Filings', has.header = T)
    out$fillings= clean.table(temp)
  
  temp = extract.token(txt, 'contentDiv,mailer,Mailing Address','</div>')
  out$mailing = t(clean.table(extract.table.from.webpage(temp, has.header=F, end.marker='</span>')))
    colnames(out$mailing) = 'Mailing Address'

  temp = extract.token(txt, 'contentDiv,mailer,Business Address','</div>')
  out$business = t(clean.table(extract.table.from.webpage(temp, has.header=F, end.marker='</span>')))
    colnames(out$business) = 'Business Address'

  temp = extract.token(txt, 'contentDiv,companyInfo,>','</div>')
  temp = gsub('\\|', '</span>', replace.token(temp, '<br','>','</span>'))
  temp = clean.table(extract.table.from.webpage(temp, has.header=F, end.marker='</span>'))
  out$company = t(temp)
    colnames(out$company) = 'Company Info'
  
  out$sic = trim(spl(spl(temp[grep('SIC', temp)],':')[2],'-'))

  return(out)
}


###############################################################################
# Get Zacks info
# http://www.zacks.com/stock/research/IBM/earnings-announcements
#' @export
###############################################################################
zacks.info <- function(ticker = 'IBM')
{
  url = paste0('http://www.zacks.com/stock/research/', ticker, '/earnings-announcements')
  txt = get.url(url)
  #write(txt, file='1.txt')    
    
	require(jsonlite)
	
	# extract data
	temp = extract.token(txt,'document.obj_data = ','};')
	temp = fromJSON(paste(temp, '}'))
	names(temp) = gsub('_table','',gsub('earnings_announcements_','',names(temp)))
	data = temp

	# extract headers
	url = 'http://staticx.zacks.com/js/zacks/inline/company_event_detail.js'
	info = get.url(url)
	temp = extract.token(info,'var aryColTableChecked = ','};')	
	temp = gsub('title','"title"',temp)
	temp = gsub('class','"class"',temp)
	temp = fromJSON(paste(temp, '}'))
	names(temp) = gsub('_table','',gsub('earnings_announcements_','',names(temp)))
	header = temp
	
	# set headers
	for(i in names(data))
		if( len(data[[ i ]]) > 0) {
		temp = rep('',  ncol(data[[ i ]]))
		temp[1:nrow(header[[ i ]])] = header[[ i ]][,1]
		colnames(data[[ i ]]) = temp
	}
	data
}


###############################################################################
# Get quantumonline info
# www.quantumonline.com/search.cfm?tickersymbol=458140100&sopt=cusip&1.0.1=Search    
# quantumonline.info(id = '458140100', type='cusip')
#' @export
###############################################################################
quantumonline.info <- function
(
    id,
    type=c(
        'cusip', # by CUSIP Number
        'symbol', # by Ticker Symbol
        'sname' # Symbol Lookup
    )
)
{
    # http://www.quantumonline.com/search.cfm?tickersymbol=458140100&sopt=cusip&1.0.1=Search    
    url = paste0('http://www.quantumonline.com/search.cfm?tickersymbol=', id, '&sopt=', type[1], '&1.0.1=Search')
    txt = join(readLines(url))
    out = list()

    # extract table from this page
    out$main = extract.table.from.webpage(gsub('&nbsp;', ',', txt), "Company's Online Profile", has.header = F)    
    out$address = extract.table.from.webpage( txt, 'Address:', has.header = F)

    return(out)
}


###############################################################################	
#' URL for various data providers
#'
#' [lookup ticker](http://www.quotemedia.com/portal/quote?qm_symbol=pot:ca)
#' [check sector / industry info for any company](http://www.quotemedia.com/portal/profile?qm_symbol=m)
#' 
#' hist = read.xts(hist.quotes.url('IBM', '1992-11-01', '2016-05-05', 'quotemedia'))
#' hist = read.xts(hist.quotes.url('HOU:CA', '1992-11-01', '2016-05-05', 'quotemedia'))
#' hist = read.xts(get.url(hist.quotes.url('HOD:CA', '1992-11-01', '2016-05-05', 'quotemedia')))
#'
#' library(readr)
#' hist = read.xts(read_csv(get.url(hist.quotes.url('HOU:CA', '1992-11-01', '2016-05-05', 'quotemedia')),,na=c('','NA','N/A')))
#'
#' http://web.tmxmoney.com/pricehistory.php?qm_page=90043&qm_symbol=HOD
#' http://www.quotemedia.com/portal/history?qm_symbol=HOD:CA
#'
#' @export hist.quotes.url
###############################################################################
hist.quotes.url <- function
(
	ticker = 'IBM',
	from = '1900-01-01', 
	to = Sys.Date(),
	src = spl('yahoo,google,quotemedia')
)
{
	if(class(from) != 'Date') from = as.Date(from, '%Y-%m-%d')	
	if(class(to) != 'Date') to = as.Date(to, '%Y-%m-%d')
	
	switch(src[1],
		yahoo = paste('http://ichart.finance.yahoo.com/table.csv?',
         's=', ticker,  
         '&a=', sprintf('%.2d', date.month(from) - 1),
         format(from, '&b=%d&c=%Y'),
         '&d=', sprintf('%.2d', date.month(to) - 1),
         format(to, '&e=%d&f=%Y'),
         '&g=d&q=q&y=0&z=file&x=.csv',
         sep=''),
         
		google = paste('http://finance.google.com/finance/historical?',
         'q=', ticker,  
         '&startdate=', format(from, '%b+%d+%Y'),
         '&enddate=', format(to, '%b+%d+%Y'),
         '&output=csv',
         sep=''),
         
		quotemedia = paste('http://app.quotemedia.com/quotetools/getHistoryDownload.csv?webmasterId=501&',
         'symbol=', ticker,  
         '&startMonth=', sprintf('%.2d', date.month(from) - 1),
         format(from, '&startDay=%d&startYear=%Y'),
         '&endMonth=', sprintf('%.2d', date.month(to) - 1),
         format(to, '&endDay=%d&endYear=%Y'),
         '&isRanged=true',
         sep=''),
         
    # default
    ''
 ) 	
}  

###############################################################################
# Remove extreme data points
#' @export 
###############################################################################
data.clean <- function
(
  data, 
  min.ratio = 2.5, 
  min.obs = 3*252,
  iqr.mult = 20
) 
{
  data$symbolnames = iif(is.null(data$symbolnames), ls(data), data$symbolnames)
  
  # remove all series that has less than minimum number of observations
if(min.obs > 0) {  
  index = names(which(sapply(data$symbolnames, function(x) as.numeric(count(Cl(data[[x]])))) < min.obs))
  if (len(index) > 0) {
    cat('Removing', index, 'have less than', min.obs, 'observations','\n')
    rm(list=index, envir=data)
    
    data$symbolnames = setdiff(data$symbolnames, index)
  }
}
    
  for(ticker in data$symbolnames)
    data[[ticker]] = data.clean.helper(data[[ticker]], ticker, min.ratio, iqr.mult)
}

data.clean.helper <- function
(
  data, 
  ticker,
  min.ratio = 2.5, 
  iqr.mult = 20
) 
{
    data = data[Cl(data) > 0 & Ad(data) > 0]
    
    nperiods = nrow(data)
    price = Ad(data)
    
    # forward ratio
    ratio = as.vector((price)/mlag(price))
    index = which(ratio > min.ratio)
        
    if(len(index) > 0)
      for(i in index) {
        cat('Abnormal price found for', ticker, format(index(data)[i],'%d-%b-%Y'),'Ratio :', round(ratio[i],1),'\n')
        for(name in find.names('Open,Close,High,Low,Adjusted', data)) 
          data[i:nperiods,name] = data[i:nperiods,name] / ratio[i]
      }
    
    price = Ad(data)
    ret = as.vector((price)/mlag(price)) - 1
    threshold = iqr.mult * IQR(ret, na.rm=T)
    index = which(ret > threshold | ret < -threshold)
    
    if(len(index) > 0)
      for(i in index) {
        cat('Abnormal price found for', ticker, format(index(data)[i],'%d-%b-%Y'),'based on IQR, Ratio :', round(ratio[i],1),'\n')
        for(name in find.names('Open,Close,High,Low,Adjusted', data)) 
          data[i:nperiods,name] = data[i:nperiods,name] / ratio[i]
      }
              
    # backward ratio
    price = Ad(data)
    ratio = as.vector(mlag(price)/(price))
    index = which(ratio > min.ratio)

    if(len(index) > 0)
      for(i in index) {
        cat('Abnormal price found for', ticker, format(index(data)[i],'%d-%b-%Y'),'Inverse Ratio :', round(ratio[i],1),'\n')        
        for(name in find.names('Open,Close,High,Low,Adjusted', data)) 
          data[i:nperiods,name] = data[i:nperiods,name] * ratio[i]
      }
      
    data      
}


###############################################################################
# Create data proxy, more details at
# http://systematicinvestor.github.io/Data-Proxy/
#' @export 
###############################################################################
make.data.proxy <- function() {
    #*****************************************************************
    # Load external data
    #******************************************************************   
    load.packages('quantmod')  

	raw.data = env()
    
	#--------------------------------   
    # TRJ_CRB file was downloaded from the 
    # http://www.corecommodityllc.com/CoreIndexes.aspx
    # select TR/CC-CRB Index-Total Return and click "See Chart"
    # on Chart page click "Download to Spreadsheet" link
    # copy TR_CC-CRB, downloaded file, to data folder
    filename = 'data/TR_CC-CRB'
    if(file.exists(filename)) {
    	temp = extract.table.from.webpage( join(readLines(filename)), 'EODValue' )
    	temp = join( apply(temp, 1, join, ','), '\n' )
    	raw.data$CRB = make.stock.xts( read.xts(temp, format='%m/%d/%y' ) )
    }
     
	#--------------------------------   
	# load 3-Month Treasury Bill from FRED (BIL)
	filename = 'data/TB3M.Rdata'
	if(!file.exists(filename)) {
		TB3M = quantmod::getSymbols('DTB3', src='FRED', auto.assign = FALSE)
		save(TB3M, file=filename)
	}
	load(file=filename)
	TB3M[] = ifna.prev(TB3M)
	#compute.raw.annual.factor(TB3M)
	raw.data$TB3M = make.stock.xts(processTBill(TB3M, timetomaturity = 1/4, 261))
	
	
	#--------------------------------   
	# load 3 years t-bill from FRED (BIL)
	filename = 'data/TB3Y.Rdata'
	if(!file.exists(filename)) {
		TB3Y = quantmod::getSymbols('DGS3', src='FRED', auto.assign = FALSE)
		save(TB3Y, file=filename)
	}
	load(file=filename)
	TB3Y[] = ifna.prev(TB3Y)
	#compute.raw.annual.factor(TB3Y)
	raw.data$TB3Y = make.stock.xts(processTBill(TB3Y, timetomaturity = 3, 261))

	#--------------------------------   
	# load 10 years t-bill from FRED (BIL)
	filename = 'data/TB10Y.Rdata'
	if(!file.exists(filename)) {
		TB10Y = quantmod::getSymbols('DGS10', src='FRED', auto.assign = FALSE)
		save(TB10Y, file=filename)
	}
	load(file=filename)
	TB10Y[] = ifna.prev(TB10Y)
	#compute.raw.annual.factor(TB10Y)
	raw.data$TB10Y = make.stock.xts(processTBill(TB10Y, timetomaturity = 10, 261))

	#--------------------------------   
	# load 20 years t-bill from FRED (BIL)
	filename = 'data/TB20Y.Rdata'
	if(!file.exists(filename)) {
		TB20Y = quantmod::getSymbols('GS20', src='FRED', auto.assign = FALSE)
		save(TB20Y, file=filename)
	}
	load(file=filename)

	TB20Y[] = ifna.prev(TB20Y)
  
	#compute.raw.annual.factor(TB10Y)
	raw.data$TB20Y = make.stock.xts(processTBill(TB20Y, timetomaturity = 20, 12))

	#--------------------------------
	filename = 'data/GOLD.Rdata'
	if(!file.exists(filename)) {
		GOLD = bundes.bank.data.gold()
		save(GOLD, file=filename)
	}
	load(file=filename)
	raw.data$GOLD = make.stock.xts(GOLD)

	#--------------------------------
	# FTSE NAREIT U.S. Real Estate Index monthly total return series
	# http://returns.reit.com/returns/MonthlyHistoricalReturns.xls
	# https://r-forge.r-project.org/scm/viewvc.php/pkg/FinancialInstrument/inst/parser/download.NAREIT.R?view=markup&root=blotter
	filename = 'data/NAREIT.xls'
	if(!file.exists(filename)) {
		url = 'http://returns.reit.com/returns/MonthlyHistoricalReturns.xls'
		download.file(url, filename,  mode = 'wb')
	}
	
	load.packages('readxl')	
	temp = read_excel(filename, sheet='Index Data', skip=7)
	NAREIT = make.xts(temp$Index, as.Date(temp$Date)) 

	raw.data$NAREIT = make.stock.xts(NAREIT)
	
	
	#--------------------------------


	tickers = '
COM = DBC;GSG + CRB

RExUS = [RWX] + VNQ + VGSIX
RE = [RWX] + VNQ + VGSIX
RE.US = [ICF] + VGSIX

EMER.EQ = [EEM] + VEIEX
EMER.FI = [EMB] + PREMX

GOLD = [GLD] + GOLD,
US.CASH = [BIL] + TB3M,
SHY + TB3Y,

US.HY = [HYG] + VWEHX

# Bonds
US.BOND = [AGG] + VBMFX
INTL.BOND = [BWX] + BEGBX

JAPAN.EQ = [EWJ] + FJPNX
EUROPE.EQ = [IEV] + FIEUX
US.SMCAP = IWM;VB + NAESX
TECH.EQ = [QQQ] + ^NDX
US.EQ = [VTI] + VTSMX + VFINX
US.MID = [VO] + VIMSX
EAFE = [EFA] + VDMIX + VGTSX

MID.TR = [IEF] + VFITX
CORP.FI = [LQD] + VWESX
TIPS = [TIP] + VIPSX + LSGSX
LONG.TR = [TLT] + VUSTX
'


	data.proxy = env()
	getSymbols.extra(tickers, src = 'yahoo', from = '1970-01-01', env = data.proxy, raw.data = raw.data, auto.assign = T)

	data.proxy.raw = raw.data
	save(data.proxy.raw, file='data/data.proxy.raw.Rdata',compress='gzip') 
	save(data.proxy, file='data/data.proxy.Rdata',compress='gzip') 
}


#*****************************************************************
# Load/download data from Excel file from AQR data set
# [Betting Against Beta: Equity Factors, Monthly](https://www.aqr.com/library/data-sets/betting-against-beta-equity-factors-monthly)
# http://www.aqr.com/library/data-sets/betting-against-beta-equity-factors-monthly/data
#
# [Time Series Momentum: Factors, Monthly](https://www.aqr.com/library/data-sets/time-series-momentum-factors-monthly)
# http://www.aqr.com/library/data-sets/time-series-momentum-factors-monthly/data
#
# [Andrea Frazzini - AQR Capital Management, LLC](http://www.econ.yale.edu/~af227/data_library.htm)
# http://www.aqr.com/library/data-sets/quality-minus-junk-factors-daily/data
# http://www.aqr.com/library/data-sets/quality-minus-junk-factors-monthly/data
#
#' @export 
###############################################################################
load.aqr.data = function
(
	data.set = 'betting-against-beta-equity-factors', #'time-series-momentum-factors'
	frequency = c('monthly','daily'),
	sheet = 1,
	force.download = F,
	last.col2extract = 'Global'
)
{
	warning('load.aqr.data is depreciated as of Apr 25, 2016 please use data.aqr function instead')
	data.aqr(data.set, frequency, sheet, force.download, last.col2extract)
}

#' @export 
data.aqr = function
(
	data.set = 'betting-against-beta-equity-factors', #'time-series-momentum-factors'
	frequency = c('monthly','daily'),
	sheet = 1,
	force.download = F,
	last.col2extract = 'Global'
)
{
	data.folder = paste(getwd(), 'aqr.data', sep='/')
	url = paste0('http://www.aqr.com/library/data-sets/', data.set, '-', frequency[1], '/data')
	filename = file.path(data.folder, paste0(data.set, '-', frequency[1],'.xlsx'))
		
	if( !file.exists(filename) || force.download) {
		dir.create(data.folder, F)
		download.file(url, filename,  mode = 'wb')
	}

	require(readxl)
	data = read_excel(filename, sheet=sheet)
	skip = which(data[,1]=='DATE')
	data = read_excel(filename, sheet=sheet,skip=skip)
	
	if( is.character(last.col2extract) ) last.col2extract = which(colnames(data)==last.col2extract)-1	
	data = data[!is.na(data[,1]), 1:last.col2extract]
	data = data[rowSums(!is.na(data[,-1,drop=F])) > 0,]
		
	make.xts(data[,-1], as.Date(data[,1]))	
}


###############################################################################
# Load/download CSI security master
# http://www.csidata.com/factsheets.php?type=commodity&format=csv
# (Stock Factsheet - TSX - Toronto Stock Exchange)[http://www.csidata.com/factsheets.php?type=stock&format=htmltable&exchangeid=82]
#' @export 
###############################################################################
data.csi.security.master = function
(
	type=c('commodity', 'stock'),
	exchangeid=c(NA, 82),
	force.download = FALSE,
	data.filename = paste0(type[1],'.csv'),
	data.keep.days = 30,
	data.folder = 'data.csi'
) 
{
	data.folder = paste(getwd(), data.folder, sep='/')
	data.filename = file.path(data.folder, data.filename)
		
	# if NOT forced to download and file exists and file is less than 30 days old
	if( !force.download && 
		file.exists(data.filename) &&
		as.numeric(Sys.Date() - as.Date(file.mtime(data.filename))) <= data.keep.days
	) {
		return(read.csv(data.filename))
	}	
	
	type = type[1]
	exchangeid = exchangeid[1]
	
	if( is.na(exchangeid[1]) )	
		url = paste0('http://www.csidata.com/factsheets.php?type=', type[1], '&format=csv')
	else
		url = paste0('http://www.csidata.com/factsheets.php?type=', type[1], '&format=csv&exchangeid=', exchangeid[1])
	
	dir.create(data.folder, F)
	txt = get.url(url)
	write(txt, file=data.filename)
	read.csv(data.filename)
}

###############################################################################
#' Get list of FX symbols from FRED
#' [FRED H.10 Foreign Exchange Rates](https://research.stlouisfed.org/fred2/release?rid=17)
#'
#' @examples
#' \dontrun{ 
#' info = fred.fx.symbol()
#' info$fx$symbol
#' }
#' @export
###############################################################################
fred.fx.symbol = function() {
	url = 'https://research.stlouisfed.org/fred2/release/tables?rid=17&eid=23340'
	txt = join(readLines(url))

	# extract links: <a href="/fred2/series/DEXUSAL" target="_blank">AUSTRALIA</a>
    temp = gsub(pattern = 'series', replacement = '<td>', txt, perl = TRUE)
    temp = gsub(pattern = 'target', replacement = '</td><', temp, perl = TRUE) 
    
	# extract Symbols table from this page
	temp = extract.table.from.webpage(temp, 'Country', has.header = F)
  
	# format
	data = gsub('/','',gsub('"','',trim(temp[,c(2,3,7)])))
		colnames(data) = spl('symbol,name,description')
	data[,'description']
  
	# remove empty
	keep.index = !is.na(data[,'description']) & nchar(data[,'description']) > 0
		data = data.frame(data[keep.index,])
	
	# split FX and index
	index = grep('index',data[,'description'],T)
	list(fx = data[-index,], index = data[index,])
}


###############################################################################
#' Get list of FX symbols from FXHISTORICALDATA.COM
#' [FXHISTORICALDATA.COM](http://www.fxhistoricaldata.com/)
#'
#' @examples
#' \dontrun{ 
#' info = fxhistoricaldata.fx.symbol()
#' info
#' }
#' @export
###############################################################################
fxhistoricaldata.fx.symbol = function() {
	url = 'http://www.fxhistoricaldata.com/'
	txt = join(readLines(url))

	# extract list options
	temp = gsub(pattern = '<ul>', replacement = '<table>', txt, perl = TRUE)
	temp = gsub(pattern = '</ul>', replacement = '</table>', temp, perl = TRUE)
    temp = gsub(pattern = '<li>', replacement = '<td>', temp, perl = TRUE)
    temp = gsub(pattern = '</li>', replacement = '</td>', temp, perl = TRUE) 
	# remove comments
	temp = gsub(pattern = '<!--.*?-->', replacement = '', temp, perl = TRUE) 
	
	# extract info
	temp = extract.table.from.webpage(temp, 'EURUSD', has.header = F)
	
	as.character(temp)
}