In [1]:
# Use Julia v0.4.5
# Pkg.clone("https://github.com/tanmaykm/AxisArrays.jl.git")
# Pkg.checkout("AxisArrays", "tan")
using Base.Dates
using AxisArrays
using Vega

In [2]:
# simulate some data
# we are generating ohlc data for 500 stocks across 10 years
function gen_days(nyears)
    # we need a businessday calendar to represent only business days, until then just use all days
    [Date(now()-Year(nyears)):Date(now());]
end

function gen_volatility(dayavgs, volatility)
    ndays = size(dayavgs, 2)
    out = copy(dayavgs)
    for d in 1:ndays
        out[:,d] += out[:,d] .* volatility .* randn(500)
    end
    abs(out)
end

function gen_prices(nstocks, ndays)
    t1prices = repmat([50.:4.3:650.;], 4)[randperm(nstocks)]
    t2prices = repmat([-50.:8.7:800.;], 6)[randperm(nstocks)]
    incrprices = (t2prices .- t1prices) / ndays

    dayavgs = Array(Float64, nstocks, ndays)
    dayavgs[:,1] = t1prices
    for d in 2:ndays
        prevday = dayavgs[:,d-1]
        dayavgs[:,d] = prevday .+ incrprices
    end
    dayavgs
end

function generate_data()
    dateaxis = Axis{:date}(Date(now()-Year(10)):Date(now()))
    scriptaxis = Axis{:script}([symbol("stock$i") for i in 1:500])
    priceaxis = Axis{:price}([:open, :high, :low, :close])

    ndays = length(dateaxis)
    nstocks = length(scriptaxis)
    dayavgs = gen_prices(nstocks, ndays)
    volatility = repmat([0.0:0.5:20.0;], 30)[1:nstocks] / 100

    opens = gen_volatility(dayavgs, volatility)
    closes = gen_volatility(dayavgs, volatility)
    highs = gen_volatility(dayavgs, volatility)
    lows = gen_volatility(dayavgs, volatility)
    highs = max(opens, closes, highs, lows)
    lows = min(opens, closes, highs, lows)
    
    A = AxisArray(zeros(length(scriptaxis), length(dateaxis), length(priceaxis)), scriptaxis, dateaxis, priceaxis);
    for script in 1:500
        A[script, :, :open] = opens[script, :]
        A[script, :, :close] = closes[script, :]
        A[script, :, :low] = lows[script, :]
        A[script, :, :high] = highs[script, :]
    end
    A
end

generate_data (generic function with 1 method)

In [3]:
# `data` has our market data (simulated for now)
data = generate_data();

# Explore data (display, slice, merge, toss and turn)

TODO: plotting, nicer show output

In [4]:
# display the data
data

3-dimensional AxisArray{Float64,3,...} with axes:
    :script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
    :date, 2006-05-19:1 day:2016-05-19
    :price, [:open,:high,:low,:close]
And data, a 500x3654x4 Array{Float64,3}:
[:, :, 1] =
 587.5     587.402  587.303   587.205   …  228.695   228.597   228.498 
 608.741   611.787  608.672   614.391      640.178   637.745   638.65  
 473.767   472.64   461.926   471.765      154.845   158.629   159.555 
 261.444   266.039  263.442   264.464      164.924   170.787   168.515 
 648.076   620.506  632.395   612.297      164.555   170.938   162.823 
 104.288   102.246   98.5002  103.882   …  152.083   155.496   166.674 
 579.47    610.685  582.172   545.113      303.89    308.131   291.182 
 412.63    447.642  436.441   437.378       71.1582   72.9506   71.0141
 563.606   573.875  589.251   604.574      7

In [5]:
# display the axes only
axes(data)

(AxisArrays.Axis{:script,Array{Symbol,1}}([:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]),AxisArrays.Axis{:date,StepRange{Date,Base.Dates.Day}}(2006-05-19:1 day:2016-05-19),AxisArrays.Axis{:price,Array{Symbol,1}}([:open,:high,:low,:close]))

In [6]:
# display slices by indexing into it with axis names and values/ranges
data[Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:script}(:stock20), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock20]
    :date, 2006-06-15:1 day:2006-06-17
And data, a 1x3 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 174.961  163.56  168.902

In [7]:
# can also index with positions/ranges
# indexing with position is better in iterations, whereas using names is convenient for exploration
data[20, 1:3, 4]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock20]
    :date, 2006-05-19:1 day:2006-05-21
And data, a 1x3 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 159.386  155.473  161.96

In [8]:
# rotate axes with permutedims (useful for exploration and efficient access)
# refer to axes either by name or index
# permutedims(data, (Axis{:date}, Axis{:script}, Axis{:price}))
permutedims(data, (2,1,3))

3-dimensional AxisArray{Float64,3,...} with axes:
    :date, 2006-05-19:1 day:2016-05-19
    :script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
    :price, [:open,:high,:low,:close]
And data, a 3654x500x4 Array{Float64,3}:
[:, :, 1] =
 587.5    608.741  473.767  261.444  …  196.059  605.233   608.039  599.544
 587.402  611.787  472.64   266.039     203.101  606.841   609.615  620.435
 587.303  608.672  461.926  263.442     191.236  597.014   640.549  603.716
 587.205  614.391  471.765  264.464     199.997  593.717   618.956  591.713
 587.107  606.784  472.31   264.5       205.472  591.183   603.614  624.684
 587.009  612.477  469.699  255.829  …  201.693  605.115   605.357  638.782
 586.91   610.807  465.915  261.409     202.742  620.203   624.004  578.136
 586.812  607.137  476.987  260.159     197.861  583.514   595.439  599.313
 586.714  60

In [9]:
# use cat to append data to specified axis
d19 = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:high)]
d20 = data[Axis{:script}(:stock20), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:high)]
@show d19
@show d20
cat(Axis{:script}, d19, d20)

d19 = [145.14606627126872 131.72962512614941 138.09929568549137]
d20 = [174.96145106455486 163.5600477500204 168.9016192704025]


2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19,:stock20]
    :date, 2006-06-15:1 day:2006-06-17
And data, a 2x3 Array{Float64,2}:
 145.146  131.73  138.099
 174.961  163.56  168.902

In [10]:
# use merge to create a new axis representing multiple series of data
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:high)]
d19low = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-15")..Date("2006-06-17")), Axis{:price}(:low)]
@show d19high
@show d19low
merge(Axis{:price}([:high, :low]), d19high, d19low)

d19high = [145.14606627126872 131.72962512614941 138.09929568549137]
d19low = [142.96195454423602 126.33584935991728 123.61983879644556]


3-dimensional AxisArray{Float64,3,...} with axes:
    :script, [:stock19]
    :date, 2006-06-15:1 day:2006-06-17
    :price, [:high,:low]
And data, a 1x3x2 Array{Float64,3}:
[:, :, 1] =
 145.146  131.73  138.099

[:, :, 2] =
 142.962  126.336  123.62

# shifting (lead, lag)

In [11]:
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2006-06-10")..Date("2006-06-17")), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2006-06-10:1 day:2006-06-17
And data, a 1x8 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 145.429  140.163  137.457  161.861  168.098  145.146  131.73  138.099

In [12]:
lead(d19high, Axis{:date}(Day(1)))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2006-06-10:1 day:2006-06-16
And data, a 1x7 Array{Float64,2}:
 140.163  137.457  161.861  168.098  145.146  131.73  138.099

In [13]:
lag(d19high, Axis{:date}(Day(1)))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2006-06-11:1 day:2006-06-17
And data, a 1x7 Array{Float64,2}:
 145.429  140.163  137.457  161.861  168.098  145.146  131.73

In [14]:
# in general, any axis can be shifted by any amount (though it may not make sense always)
shiftaxis(d19high, Axis{:date}(Day(5)))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2006-06-15:1 day:2006-06-17
And data, a 1x3 Array{Float64,2}:
 145.429  140.163  137.457

# windowing
Windows can either be sliding or tumbling.

E.g.:
- `moving` uses a sliding window
- `collapse` uses a tumbling window

In [15]:
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2008-06-01")..Date("2016-06-01")), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 day:2016-05-19
And data, a 1x2910 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 237.82  237.603  228.24  227.552  …  518.936  572.288  554.022  538.863

In [16]:
moving(d19high, Axis{:date}(Week(1)), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 week:2016-05-15
And data, a 1x416 Array{Float64,2}:
 237.82  208.32  225.992  233.294  …  572.962  613.226  561.729  502.534

In [17]:
collapse(d19high, Axis{:date}(Week(1)), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 week:2016-05-15
And data, a 1x416 Array{Float64,2}:
 237.82  208.32  225.992  233.294  …  572.962  613.226  561.729  502.534

In [18]:
# windowing can be applied to any axis
# the above is equivalent to
window(d19high, TumblingWindow(Axis{:date}(Week(1))), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 week:2016-05-15
And data, a 1x416 Array{Float64,2}:
 237.82  208.32  225.992  233.294  …  572.962  613.226  561.729  502.534

# resampling

`resample` is the generic API to resample any axis

`per` allows easy resampling of data axes

In [19]:
d19high = data[Axis{:script}(:stock19), Axis{:date}(Date("2008-06-01")..Date("2016-06-01")), Axis{:price}(:high)]

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 day:2016-05-19
And data, a 1x2910 SubArray{Float64,2,Array{Float64,3},Tuple{UnitRange{Int64},UnitRange{Int64},Int64},3}:
 237.82  237.603  228.24  227.552  …  518.936  572.288  554.022  538.863

In [20]:
# downsample to per week mean (generated data has same mean across weeks)
per(d19high, Axis{:date}(Week(1)), mean)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 week:2016-05-15
And data, a 1x416 Array{Float64,2}:
 390.495  390.495  390.495  390.495  …  390.495  390.495  390.495  390.495

In [21]:
# upsample to per hour (using repmat just as an example)
per(d19high, Axis{:date}(Hour(1)), x->repmat(x,24))

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01T00:00:00:1 hour:2016-05-19T23:00:00
And data, a 1x69840 Array{Float64,2}:
 237.82  237.603  228.24  227.552  …  518.936  572.288  554.022  538.863

# mapaxis
map data along any axis.

This is more like `mappart`

Regular Julia map can be used to map over individual data elements

In [22]:
mapaxis(d19high, Axis{:script}, x->x*2)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock19]
    :date, 2008-06-01:1 day:2016-05-19
And data, a 1x2910 Array{Float64,2}:
 475.639  475.207  456.48  455.105  …  1037.87  1144.58  1108.04  1077.73

# A mock analysis over the stock price data

In [23]:
function plotprices(data::AxisArray, stock, pricecol)
    prices = data[stock, :, pricecol]
    L = length(prices)
    lineplot(x=1:L, y=reshape(prices, L))
end

function plotprices(data::AxisArray, stock)
    prices = data[stock, :]
    L = length(prices)
    lineplot(x=1:L, y=reshape(prices, L))
end

plotprices (generic function with 2 methods)

In [24]:
plotprices(data, 22, :low)

In [25]:
function dailyreturns(data::AxisArray)
    d0 = data[:,1:(end-1),:close]
    d1 = lead(data[:,:,:close], Axis{:date}(Day(1)))
    AxisArray(100 * (d0 .- d1) ./ d1, d1.axes...)
end

dailyreturns (generic function with 1 method)

In [26]:
drets = dailyreturns(data)

2-dimensional AxisArray{Float64,2,...} with axes:
    :script, [:stock1,:stock2,:stock3,:stock4,:stock5,:stock6,:stock7,:stock8,:stock9,:stock10  …  :stock491,:stock492,:stock493,:stock494,:stock495,:stock496,:stock497,:stock498,:stock499,:stock500]
    :date, 2006-05-19:1 day:2016-05-18
And data, a 500x3653 Array{Float64,2}:
   0.0167306    0.0167334    0.0167362  …    0.042991     0.0430095
  -1.19135     -0.159897     0.253637        0.512091    -0.0520386
   0.268278     3.55032     -4.70471        -1.33075      0.422842 
   2.92995     -3.49634     -1.09419         0.475043    -0.0544691
   1.40752     -2.45333      3.01354         1.21306     -0.0689503
  -6.25786      8.41307     -7.93637    …    3.87321      0.20918  
  -1.62308      4.5888      -0.613658       -4.54584      2.77231  
  -5.55755      5.84669     -5.50297        -4.89078     -2.0753   
   6.57402     -1.32896      3.80496         0.545493    -1.42403  
 -12.7993       0.532626     6.17909         5.42478      1.

In [27]:
plotprices(drets, 4)

In [28]:
movavg = moving(drets, Axis{:date}(Day(5)), mean);

In [29]:
plotprices(movavg, 22)

In [30]:
volat = window(movavg, TumblingWindow(Axis{:date}(Day(365*10 + 5))), std);

In [31]:
minimum(volat), median(volat), maximum(volat)

(0.00019670853104873358,3.1598970038973446,255.39534057911553)

In [32]:
mask = 10 .< volat .< 74
maskedvolat = volat[mask]
x = axisvalues(maskedvolat, Axis{:script}())
y = reshape(copy(maskedvolat.data), length(maskedvolat)) .- 10
barplot(x=x, y=y)