# GOR.jl - examples

In [1]:
using GOR
using DataFrames

In [2]:
left = GorFile(GOR.pkgpath("test", "left.gor"))
right = GorFile(GOR.pkgpath("test", "right.gor"))



<GorFile: /Users/florian/projects/GOR.jl/src/../test/right.gor>

In [3]:
eltype(left)

NamedTuple{(:Chrom, :Pos, :Val), Tuple{InlineStrings.String7, Int64, InlineStrings.String7}}

In [4]:
left |> write_arrow("/tmp/left.arrow")

"/tmp/left.arrow"

In [5]:
ArrowFile("/tmp/left.arrow") |> top(5) |> DataFrame

Unnamed: 0_level_0,Chrom,Pos,Val
Unnamed: 0_level_1,String,Int64,String
1,chr1,1,l1
2,chr1,2,l2
3,chr1,3,l3
4,chr1,4,l4
5,chr1,5,l5


In [6]:
a = ArrowFile("/tmp/left.arrow") 
r = Tables.namedtupleiterator(a)
r |> top(5) |> DataFrame

Unnamed: 0_level_0,Chrom,Pos,Val
Unnamed: 0_level_1,String,Int64,String
1,chr1,1,l1
2,chr1,2,l2
3,chr1,3,l3
4,chr1,4,l4
5,chr1,5,l5


In [7]:
right |> top(5) |> DataFrame

Unnamed: 0_level_0,Chrom,Pos,Val
Unnamed: 0_level_1,String7,Int64,String7
1,chr1,1,r1
2,chr1,2,r2
3,chr1,3,r3
4,chr1,4,r4
5,chr1,5,r5


In [8]:
left |> GOR.filter(r -> (r.Pos > 3) & (r.Pos < 8)) |> DataFrame

Unnamed: 0_level_0,Chrom,Pos,Val
Unnamed: 0_level_1,String7,Int64,String7
1,chr1,4,l4
2,chr1,5,l5
3,chr1,6,l6
4,chr1,7,l7


In [9]:
j = GOR.join(left, right, leftjoin = true);

In [10]:
eltype(j)

NamedTuple{(:Chrom, :Pos, :Val, :Chromx, :Posx, :Valx), Tuple{InlineStrings.String7, Int64, InlineStrings.String7, Union{Missing, InlineStrings.String7}, Union{Missing, Int64}, Union{Missing, InlineStrings.String7}}}

In [11]:
j |> top(5) |> GOR.mutate(x -> (Value = x.Pos * 2,)) |> DataFrame


Unnamed: 0_level_0,Chrom,Pos,Val,Chromx,Posx,Valx,Value
Unnamed: 0_level_1,String7,Int64,String7,String7,Int64?,String7,Int64
1,chr1,1,l1,chr1,1,r1,2
2,chr1,2,l2,chr1,2,r2,4
3,chr1,3,l3,chr1,3,r3,6
4,chr1,4,l4,chr1,4,r4,8
5,chr1,5,l5,chr1,5,r5,10


In [12]:
j2 = j |> GOR.rename(:Pos => :leftPos, :Posx => :rightPos, :Val => :leftVal, :Valx => :rightVal) |> top(5) |> DataFrame

Unnamed: 0_level_0,Chrom,leftPos,leftVal,Chromx,rightPos,rightVal
Unnamed: 0_level_1,String7,Int64,String7,String7,Int64?,String7
1,chr1,1,l1,chr1,1,r1
2,chr1,2,l2,chr1,2,r2
3,chr1,3,l3,chr1,3,r3
4,chr1,4,l4,chr1,4,r4
5,chr1,5,l5,chr1,5,r5


In [13]:
left |> GOR.rename(:Pos => :Position) |> top(5)  |> DataFrame

Unnamed: 0_level_0,Chrom,Position,Val
Unnamed: 0_level_1,String7,Int64,String7
1,chr1,1,l1
2,chr1,2,l2
3,chr1,3,l3
4,chr1,4,l4
5,chr1,5,l5


In [14]:
eltype(left)

NamedTuple{(:Chrom, :Pos, :Val), Tuple{InlineStrings.String7, Int64, InlineStrings.String7}}

In [15]:
left |> GOR.mutate(row -> (x = 2*row.Pos, y = 7)) |> top(5) |> DataFrame

Unnamed: 0_level_0,Chrom,Pos,Val,x,y
Unnamed: 0_level_1,String7,Int64,String7,Int64,Int64
1,chr1,1,l1,2,7
2,chr1,2,l2,4,7
3,chr1,3,l3,6,7
4,chr1,4,l4,8,7
5,chr1,5,l5,10,7


In [16]:
left |> GOR.select(:Chrom, :Val) |> top(5) |> DataFrame

Unnamed: 0_level_0,Chrom,Val
Unnamed: 0_level_1,String7,String7
1,chr1,l1
2,chr1,l2
3,chr1,l3
4,chr1,l4
5,chr1,l5


Let's try out some things that couldmake it easier to program, now that Julia is on version 1.8

In [17]:
n = (a = 1, b = (c = 3,))

(a = 1, b = (c = 3,))

In [18]:
n.b

(c = 3,)

In [19]:
left |> Tables.schema

Tables.Schema:
 :Chrom  InlineStrings.String7
 :Pos    Int64
 :Val    InlineStrings.String7

In [20]:
Base.return_types(x -> (a = 2,c = 3))

1-element Vector{Any}:
 NamedTuple{(:a, :c), Tuple{Int64, Int64}}

In [21]:
Base.merge((a = 1, b = 2), (a = 3, c = 4))

(a = 3, b = 2, c = 4)

Base.return_types works different for Tuples and NamedTuples when Union type is used

In [22]:
rt = Base.return_types(x -> (a=2x,b=x+1), (Union{Missing, Int64},))

1-element Vector{Any}:
 NamedTuple{(:a, :b), _A} where _A<:Tuple{Union{Missing, Int64}, Union{Missing, Int64}}

In [23]:
rt[1] <: NamedTuple

true

In [24]:
typeof(rt[1])

UnionAll

To get the names, use

In [25]:
rt[1].body.parameters[1]

(:a, :b)

and to get the type, use

In [26]:
rt[1].var.ub

Tuple{Union{Missing, Int64}, Union{Missing, Int64}}

In [27]:
rt = Base.return_types(x -> (a=2x,b=x+1), (Int64,))

1-element Vector{Any}:
 NamedTuple{(:a, :b), Tuple{Int64, Int64}}

In [28]:
rt[1] <: NamedTuple

true

In [29]:
typeof(rt[1])

DataType

In [30]:
rt[1].parameters[1]

(:a, :b)

In [31]:
rt[1].parameters[2]

Tuple{Int64, Int64}

This lets us define a function

In [32]:
function returntype(func, intype)
    rt = Base.return_types(func, intype)[1]

    @assert rt <: NamedTuple "func needs to return a NamedTuple"

    if typeof(rt) === DataType
        return NamedTuple{rt.parameters[1], rt.parameters[2]}
    elseif typeof(rt) === UnionAll
        return NamedTuple{rt.body.parameters[1], rt.var.ub}
    else
        error("Unknown type")
    end
end 

returntype (generic function with 1 method)

In [33]:
returntype(r -> (r*2, r+1), (Int64,))

LoadError: AssertionError: func needs to return a NamedTuple

In [34]:
rt = returntype(r -> (a = r*2, b = r+1), (Int64,))

NamedTuple{(:a, :b), Tuple{Int64, Int64}}

In [35]:
returntype(r -> (a = r*2, b = r+1), (Union{Missing,Int64},))


NamedTuple{(:a, :b), Tuple{Union{Missing, Int64}, Union{Missing, Int64}}}

In [36]:
returntype(r -> (a = r.x*2, b = r.x+1), (NamedTuple{(:x,), Tuple{Int64}},))

NamedTuple{(:a, :b), Tuple{Int64, Int64}}

In [37]:
returntype(r -> (a = r.x*2, b = r.x+1), (NamedTuple{(:x,), Tuple{Union{Missing,Int64}}},))

NamedTuple{(:a, :b), Tuple{Union{Missing, Int64}, Union{Missing, Int64}}}

In [38]:
returntype( (x,y) -> Base.merge(x,y), (typeof((a = 2, b = 3)), NamedTuple{(:x,), Tuple{Union{Missing,Int64}}}))

NamedTuple{(:a, :b, :x), Tuple{Int64, Int64, Union{Missing, Int64}}}

In [39]:
returntype( x -> Base.merge(x, (c = x.a + 3,)), (typeof((a = 2, b = 3)),))

NamedTuple{(:a, :b, :c), Tuple{Int64, Int64, Int64}}

In [40]:
Tables.schema(left)

Tables.Schema:
 :Chrom  InlineStrings.String7
 :Pos    Int64
 :Val    InlineStrings.String7

In [41]:
eltype(left)

NamedTuple{(:Chrom, :Pos, :Val), Tuple{InlineStrings.String7, Int64, InlineStrings.String7}}

In [42]:
Base.return_types(x -> (a = x.Chrom, b = x.Pos + 123), (eltype(left),))

1-element Vector{Any}:
 NamedTuple{(:a, :b), Tuple{InlineStrings.String7, Int64}}

In [43]:
myselect2(iter) = ( (a = row.Chrom, b = row.Pos) for row in iter) 

myselect2 (generic function with 1 method)

In [45]:
Base.return_types(myselect2, (typeof(left),))

1-element Vector{Any}:
 Base.Generator{GOR.GorFileIter{Tables.NamedTupleIterator{Tables.Schema{(:Chrom, :Pos, :Val), Tuple{InlineStrings.String7, Int64, InlineStrings.String7}}, CSV.Rows{Vector{UInt8}, Tuple{}, Any, WeakRefStrings.PosLenString}}}, var"#29#30"}

In [46]:
eltype(left |> GOR.select(:Chrom))

NamedTuple{(:Chrom,), Tuple{InlineStrings.String7}}

In [52]:
DataFrame(a = 1:3, b = 4:6) |> Tables.namedtupleiterator |> GOR.mutate(r -> (Value = r.a*2,)) #|> top(1) |> DataFrame

GOR.Map{Tables.NamedTupleIterator{Tables.Schema{(:a, :b), Tuple{Int64, Int64}}, Tables.RowIterator{NamedTuple{(:a, :b), Tuple{Vector{Int64}, Vector{Int64}}}}}, GOR.var"#57#58"{var"#41#42"}, NamedTuple{(:a, :b, :Value), Tuple{Int64, Int64, Int64}}}(Tables.NamedTupleIterator{Tables.Schema{(:a, :b), Tuple{Int64, Int64}}, Tables.RowIterator{NamedTuple{(:a, :b), Tuple{Vector{Int64}, Vector{Int64}}}}}(Tables.RowIterator{NamedTuple{(:a, :b), Tuple{Vector{Int64}, Vector{Int64}}}}((a = [1, 2, 3], b = [4, 5, 6]), 3)), GOR.var"#57#58"{var"#41#42"}(var"#41#42"()))