In [1]:
# Use Julia v0.4.5
# Pkg.clone("https://github.com/tanmaykm/AxisArrays.jl.git")
# Pkg.checkout("AxisArrays", "tan")
using Base.Dates
using BusinessDays
using AxisArrays
using Vega

In [2]:
# simulate some data
# we are generating ohlc data for 500 stocks across 10 years
function gen_days(nyears)
    cal = BusinessDays.UnitedStates()
    BusinessDays.initcache(cal)
    listbdays(cal, today()-Year(10), today())
    # we need a businessday calendar to represent only business days, until then just use all days
    #[Date(now()-Year(nyears)):Date(now());]
end

function gen_volatility(dayavgs, volatility)
    ndays = size(dayavgs, 2)
    out = copy(dayavgs)
    for d in 1:ndays
        out[:,d] += out[:,d] .* volatility .* randn(500)
    end
    abs(out)
end

function gen_prices(nstocks, ndays)
    t1prices = repmat([50.:4.3:650.;], 4)[randperm(nstocks)]
    t2prices = repmat([-50.:8.7:800.;], 6)[randperm(nstocks)]
    incrprices = (t2prices .- t1prices) / ndays

    dayavgs = Array(Float64, nstocks, ndays)
    dayavgs[:,1] = t1prices
    for d in 2:ndays
        prevday = dayavgs[:,d-1]
        dayavgs[:,d] = prevday .+ incrprices
    end
    dayavgs
end

function generate_data()
    bussdays = gen_days(10)
    dateaxis = Axis{:date}(bussdays)
    scriptaxis = Axis{:script}([symbol("stock$i") for i in 1:500])
    priceaxis = Axis{:price}([:open, :high, :low, :close])

    ndays = length(dateaxis)
    nstocks = length(scriptaxis)
    dayavgs = gen_prices(nstocks, ndays)
    volatility = repmat([0.0:0.5:20.0;], 30)[1:nstocks] / 100

    opens = gen_volatility(dayavgs, volatility)
    closes = gen_volatility(dayavgs, volatility)
    highs = gen_volatility(dayavgs, volatility)
    lows = gen_volatility(dayavgs, volatility)
    highs = max(opens, closes, highs, lows)
    lows = min(opens, closes, highs, lows)
    
    A = AxisArray(zeros(length(scriptaxis), length(dateaxis), length(priceaxis)), scriptaxis, dateaxis, priceaxis);
    for script in 1:500
        A[script, :, :open] = opens[script, :]
        A[script, :, :close] = closes[script, :]
        A[script, :, :low] = lows[script, :]
        A[script, :, :high] = highs[script, :]
    end
    A
end

generate_data (generic function with 1 method)

In [3]:
# `data` has our market data (simulated for now)
data = generate_data();

# Explore data (display, slice, merge, toss and turn)

TODO: plotting, nicer show output

In [4]:
# display the data
data

3-dimensional AxisArray{Float64,3,...} with axes:
    :script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
    :date, [2006-05-30,2006-05-31,2006-06-01,2006-06-02,2006-06-05,2006-06-06,2006-06-07,2006-06-08,2006-06-09,2006-06-12  …  2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27]
    :price, [:open,:high,:low,:close]
And data, a 500x2510x4 Array{Float64,3}:
[:, :, 1] =
 411.2     411.352   411.505   411.657   …  793.443    793.595    793.748 
 227.748   226.501   224.883   226.581      437.182    436.697    435.189 
 175.872   177.096   178.424   178.519      147.979    152.633    148.918 
  66.4987   68.4522   67.7661   68.7268     697.159    663.366    679.311 
 121.495   127.604   124.365   117.294      598.315    591.53     589.436 
 568.747   532.102   527.204   531.291   …  151

In [5]:
# display the axes only
axes(data)

:script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
:date, [2006-05-30,2006-05-31,2006-06-01,2006-06-02,2006-06-05,2006-06-06,2006-06-07,2006-06-08,2006-06-09,2006-06-12  …  2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27]
:price, [:open,:high,:low,:close]


In [6]:
# display slices by indexing into it with axis names and values/ranges
data[Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:script}(:stock20), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock20]
    :date, [2006-06-15,2006-06-16]
And data, a 1x2 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 274.011  271.172

In [7]:
# can also index with positions/ranges
# indexing with position is better in iterations, whereas using names is convenient for exploration
data[20, 1:3, 4]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock20]
    :date, [2006-05-30,2006-05-31,2006-06-01]
And data, a 1x3 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 246.671  242.954  268.599

In [8]:
# rotate axes with permutedims (useful for exploration and efficient access)
# refer to axes either by name or index
# permutedims(data, (Axis{:date}, Axis{:script}, Axis{:price}))
permutedims(data, (2,1,3))

3-dimensional AxisArray{Float64,3,...} with axes:
    :date, [2006-05-30,2006-05-31,2006-06-01,2006-06-02,2006-06-05,2006-06-06,2006-06-07,2006-06-08,2006-06-09,2006-06-12  …  2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27]
    :script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
    :price, [:open,:high,:low,:close]
And data, a 2510x500x4 Array{Float64,3}:
[:, :, 1] =
 411.2    227.748  175.872   66.4987  …  309.935  323.068   547.77   329.931
 411.352  226.501  177.096   68.4522     310.69   311.511   554.522  342.563
 411.505  224.883  178.424   67.7661     305.334  310.295   545.904  330.237
 411.657  226.581  178.519   68.7268     319.079  308.699   549.592  343.255
 411.81   227.952  178.834   67.7603     305.82   320.967   528.289  346.44 
 411.962  225.848  179.701   67.296  

In [9]:
# use cat to append data to specified axis
d19 = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:high)]
d20 = data[Axis{:script}(:stock20), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:high)]
@show d19
@show d20
cat(Axis{:script}, d19, d20)

d19 = [368.2137119756261 430.845824239013]
d20 = [274.01137833219735 271.17242739460625]


2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19,:stock20]
    :date, [2006-06-15,2006-06-16]
And data, a 2x2 Array{Float64,2}:
 368.214  430.846
 274.011  271.172

In [10]:
# use merge to create a new axis representing multiple series of data
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:high)]
d19low = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:low)]
@show d19high
@show d19low
merge(Axis{:price}([:high, :low]), d19high, d19low)

d19high = [368.2137119756261 430.845824239013]
d19low = [309.15114774175834 365.6927600049974]


3-dimensional AxisArray{Float64,3,...} with axes:
    :script, [:stock19]
    :date, [2006-06-15,2006-06-16]
    :price, [:high,:low]
And data, a 1x2x2 Array{Float64,3}:
[:, :, 1] =
 368.214  430.846

[:, :, 2] =
 309.151  365.693

# shifting (lead, lag)

In [11]:
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-10")..Date("2006-06-17")), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2006-06-12,2006-06-13,2006-06-14,2006-06-15,2006-06-16]
And data, a 1x5 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 415.593  402.707  393.286  368.214  430.846

In [12]:
lead(d19high, Axis{:date}(Day(1)))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2006-06-12,2006-06-13,2006-06-14,2006-06-15]
And data, a 1x4 Array{Float64,2}:
 402.707  393.286  368.214  430.846

In [13]:
lag(d19high, Axis{:date}(Day(1)))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2006-06-13,2006-06-14,2006-06-15,2006-06-16]
And data, a 1x4 Array{Float64,2}:
 415.593  402.707  393.286  368.214

In [14]:
# in general, any axis can be shifted by any amount (though it may not make sense always)
shiftaxis(d19high, Axis{:date}(Day(5)))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, Date[]
And data, a 1x0 Array{Float64,2}

# windowing
Windows can either be sliding or tumbling.

E.g.:
- `moving` uses a sliding window
- `collapse` uses a tumbling window

In [15]:
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2008-06-01")..Date("2016-06-01")), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2008-06-02,2008-06-03,2008-06-04,2008-06-05,2008-06-06,2008-06-09,2008-06-10,2008-06-11,2008-06-12,2008-06-13  …  2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27]
And data, a 1x2006 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 483.752  471.957  529.755  497.246  …  813.706  750.141  735.117  730.421

In [16]:
moving(d19high, Axis{:date}(Week(1)), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-02:1 week:2016-05-23
And data, a 1x417 Array{Float64,2}:
 483.752  544.558  437.19  466.161  …  793.941  794.036  786.5  737.987

In [17]:
collapse(d19high, Axis{:date}(Week(1)), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-02:1 week:2016-05-23
And data, a 1x417 Array{Float64,2}:
 483.752  544.558  437.19  466.161  …  793.941  794.036  786.5  737.987

In [18]:
# windowing can be applied to any axis
# the above is equivalent to
window(d19high, TumblingWindow(Axis{:date}(Week(1))), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-02:1 week:2016-05-23
And data, a 1x417 Array{Float64,2}:
 483.752  544.558  437.19  466.161  …  793.941  794.036  786.5  737.987

# resampling

`resample` is the generic API to resample any axis

`per` allows easy resampling of data axes

In [19]:
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2008-06-01")..Date("2016-06-01")), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2008-06-02,2008-06-03,2008-06-04,2008-06-05,2008-06-06,2008-06-09,2008-06-10,2008-06-11,2008-06-12,2008-06-13  …  2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27]
And data, a 1x2006 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 483.752  471.957  529.755  497.246  …  813.706  750.141  735.117  730.421

In [20]:
# downsample to per week mean (generated data has same mean across weeks)
per(d19high, Axis{:date}(Week(1)), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-02:1 week:2016-05-23
And data, a 1x417 Array{Float64,2}:
 625.689  625.689  625.689  625.689  …  625.689  625.689  625.689  625.689

In [22]:
# upsample to per hour (using repmat just as an example).
# our time series has holes (non business days), and interpolation function (repmat) doesn't fill them.
# we can provide custom interpolated axis points to match the behavior of our function.
hours(d::Date) = [DateTime(d):Hour(1):(DateTime(d)+Day(1)-Hour(1));]
function hours(v::Vector{Date})
    h = sizehint!(DateTime[], length(v)*24)
    for d in v
        append!(h, hours(d))
    end
    h
end
per(d19high, Axis{:date}(hours(axes(d19high, Axis{:date}).val)), x->repmat(x,24))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2008-06-02T00:00:00,2008-06-02T01:00:00,2008-06-02T02:00:00,2008-06-02T03:00:00,2008-06-02T04:00:00,2008-06-02T05:00:00,2008-06-02T06:00:00,2008-06-02T07:00:00,2008-06-02T08:00:00,2008-06-02T09:00:00  …  2016-05-27T14:00:00,2016-05-27T15:00:00,2016-05-27T16:00:00,2016-05-27T17:00:00,2016-05-27T18:00:00,2016-05-27T19:00:00,2016-05-27T20:00:00,2016-05-27T21:00:00,2016-05-27T22:00:00,2016-05-27T23:00:00]
And data, a 1x48144 Array{Float64,2}:
 483.752  471.957  529.755  497.246  …  813.706  750.141  735.117  730.421

# mapaxis
map data along any axis.

This is more like `mappart`

Regular Julia map can be used to map over individual data elements

In [23]:
mapaxis(d19high, Axis{:script}, x->x*2)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, [2008-06-02,2008-06-03,2008-06-04,2008-06-05,2008-06-06,2008-06-09,2008-06-10,2008-06-11,2008-06-12,2008-06-13  …  2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27]
And data, a 1x2006 Array{Float64,2}:
 967.503  943.914  1059.51  994.492  …  1627.41  1500.28  1470.23  1460.84

# A mock analysis over the stock price data

In [24]:
function plotprices(data::AxisArray, stock, pricecol)
    prices = data[stock, :, pricecol]
    L = length(prices)
    lineplot(x=1:L, y=reshape(prices, L))
end

function plotprices(data::AxisArray, stock)
    prices = data[stock, :]
    L = length(prices)
    lineplot(x=1:L, y=reshape(prices, L))
end

plotprices (generic function with 2 methods)

In [25]:
plotprices(data, 22, :low)

In [26]:
function dailyreturns(data::AxisArray)
    d0 = data[:,1:(end-1),:close]
    d1 = lead(data[:,:,:close], Axis{:date}(Day(1)))
    AxisArray(100 * (d0 .- d1) ./ d1, d1.axes...)
end

dailyreturns (generic function with 1 method)

In [27]:
drets = dailyreturns(data)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
    :date, [2006-05-30,2006-05-31,2006-06-01,2006-06-04,2006-06-05,2006-06-06,2006-06-07,2006-06-08,2006-06-11,2006-06-12  …  2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-22,2016-05-23,2016-05-24,2016-05-25,2016-05-26]
And data, a 500x2509 Array{Float64,2}:
  -0.0370656   -0.0370518   -0.0370381  …   -0.0192126   -0.0192089
   0.419917    -0.336458     0.516562        0.330056    -0.0908802
  -2.53007      1.3789       0.752811        0.900207     0.979221 
  -0.298621    -0.536106     0.732117        0.643504    -2.88866  
  -2.98913      2.90259     -4.08828         3.2896      -1.31686  
   4.78545     -7.68799      3.72933    …    0.678234    -1.46757  
   9.77943     -4.17514     -3.95707        -0.558984     6.32295 

In [28]:
plotprices(drets, 4)

In [29]:
movavg = moving(drets, Axis{:date}(Day(5)), mean);

In [30]:
plotprices(movavg, 22)

In [31]:
volat = window(movavg, TumblingWindow(Axis{:date}(Day(365*10 + 5))), std);

In [32]:
minimum(volat), median(volat), maximum(volat)

(6.500055676701421e-5,4.838388630044942,616.4798588348508)

In [49]:
mask = 15 .< volat .< 600
maskedvolat = volat[mask]
x = axisvalues(maskedvolat, Axis{:script}())
y = reshape(copy(maskedvolat.data), length(maskedvolat)) .- 10
barplot(x=x, y=y)