# MyMethods.jl -- Examples

This notebook illustrates basic functionalities of the MyMethods.jl package.

### Installation and Uninstallation of MyMethods.jl

In [1]:
# Install the package from GitHub (first time only) 
#using Pkg; 
#Pkg.add(url="https://github.com/thomaswiemann/MyMethods.jl");

# Uninstall 
#Pkg.rm("MyMethods")

In [1]:
# Specify module directory (depending on GitHub location...)
push!(LOAD_PATH, "$(homedir())\\Documents\\GitHub")
push!(LOAD_PATH, "$(homedir())\\GitHub")

# Load MyMethods.jl into your Julia session 
using MyMethods

┌ Info: Precompiling MyMethods [18de7e97-ce2a-4ea9-8d3b-e4b0c3b0b515]
└ @ Base loading.jl:1278
│ - If you have MyMethods checked out for development and have
│   added DataFrames as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with MyMethods


In [3]:
# Load additionally useful (but not necessary) packages
using Statistics # for a quantile function
#using Plots; plotly() # for a plotting backend

### Exemplary Data  

Generate basic data to be used in the examples.

In [22]:
# Data parameters
N = 10000 # sample size
beta = [1,2,-1,4] # regression coefficients

# Data
_r = (1 .+ 1.5 .* (rand(N,1) .- 0.5)); # running variable
X = hcat(ones(N), rand(N,3)); # regressor matrix
y = X*beta.*_r + randn(N,1); # outcome simulation

### myLS
myLS() generates a (weighted) least squares object.

In [23]:
# Estimate least squares fit
fit_LS = myLS(y, X);

In [24]:
# Calculate and print standard errors
inference(fit_LS);

Unnamed: 0_level_0,coef,se,t-stat,p-val
Unnamed: 0_level_1,Float64,Float64,Float64,Float64
1,0.894388,0.0590209,15.1537,7.15649e-52
2,2.08412,0.0654947,31.8212,3.29263e-222
3,-0.916353,0.0650769,-14.0811,4.9648499999999995e-45
4,4.06184,0.0655904,61.9274,0.0


### myLLR
myLLR generates a local linear regression object.

In [27]:
# Estimate local constant regression with bandwidth h = 0.4
fit_LLR = myLLR(y, _r, # response and running variable
    X[:, 2:end],# additional variables to include
    _x = quantile(_r[:], collect(1:100)./100), # values at which to fit
    K=0, h=0.4); # degree and bandwidth

### continue from here
## revert pack to loadpath for easier debugging

In [28]:
# Calculate the local coefficients for quantiles of the running variable
coef_LLR = coef(fit_LLR)

100×4 Array{Float64,2}:
 0.396536  0.75313   -0.359004  1.70466
 0.401331  0.768478  -0.366341  1.73224
 0.40737   0.785831  -0.374733  1.76114
 0.413848  0.798983  -0.383284  1.7826
 0.424584  0.813592  -0.395401  1.81191
 0.434256  0.828857  -0.405252  1.84183
 0.44234   0.847177  -0.413033  1.87136
 0.449742  0.866189  -0.420413  1.89829
 0.458711  0.883111  -0.429667  1.92626
 0.468066  0.899075  -0.438433  1.95161
 0.478776  0.921538  -0.449934  1.98232
 0.487387  0.939602  -0.458959  2.01033
 0.495851  0.956534  -0.465963  2.03787
 ⋮                              
 1.4967    3.13023   -1.4636    5.96526
 1.50498   3.14572   -1.47265   5.99587
 1.51018   3.1563    -1.47803   6.02004
 1.51757   3.17031   -1.48499   6.05493
 1.52459   3.18117   -1.49179   6.08451
 1.53032   3.19218   -1.49888   6.11373
 1.5342    3.20854   -1.5064    6.15211
 1.53659   3.22403   -1.51521   6.19772
 1.54011   3.23208   -1.52158   6.22592
 1.54418   3.24256   -1.52779   6.25655
 1.54711   3.25617   -1.

In [11]:
# Plot coefficients as a function of the running variable
plot(fit_LLR._x, coef_LLR,
title = "Local linear regression coefficients",
label = ["Const." "X1" "X2" "X3"])

LoadError: UndefVarError: plot not defined

# mybootstrap

In [29]:
# bootstrap myLS
res = mybootstrap(myLS, y, 10, X, data_args = [1], 
    get = coef, red = hcat∘vcat)

10×1 Array{Array{Float64,2},2}:
 [0.8780012249009526; 2.0875491777139707; -0.8826597120765257; 4.153835515730999]
 [0.8797736881019884; 2.05279831988534; -0.9275294417154937; 4.114317049765206]
 [0.9068419481611971; 2.161432826556404; -0.9890486395039044; 4.0768128364810865]
 [0.9447501982298332; 2.039964233353081; -0.8900022228753837; 3.9718834553126348]
 [0.9437490330127215; 2.1478993436432825; -0.9890931270468774; 4.019207401480728]
 [0.8484627575690101; 2.2375841567649193; -0.8424872758615112; 3.9010489206971113]
 [0.9141009791686622; 2.1291338406466536; -0.9562500640694455; 4.047825490992517]
 [0.8063590003434772; 2.165924923002932; -0.8529976087473948; 4.105918493757763]
 [0.9028981135976719; 2.063166302272383; -0.9004204650759662; 4.043466379157406]
 [0.79714068302567; 2.026775660194343; -0.7200028738664938; 4.132754663254229]

In [30]:
# bootstrap myLLR
res = mybootstrap(myLLR, y, 10, _r, X[:, 2:end], data_args = [1, 2],
    _x = quantile(_r[:], collect(1:10)./10), 
    get = coef) # think about bootstrapping myLLR!

10-element Array{Any,1}:
 [[0.45526220375835913 0.9680388140065543 -0.4886361280019072 2.1547832314746467; 0.5451229085130421 1.136242967352638 -0.5605883007044845 2.4368785674925753; … ; 1.28136322612984 3.1021656608740744 -1.2767362512907812 6.01412723092873; 1.353363535081194 3.266740572308276 -1.3651347040553372 6.26094538027208]]
 [[0.3889853658807692 1.0802149616067778 -0.3153805316910531 2.0717354177997396; 0.4871136499205204 1.263042233586715 -0.40039464668085556 2.327130542313399; … ; 1.484823334003419 3.1018783210868213 -1.5216552238271663 5.87053482559707; 1.6069698334564146 3.178333870982547 -1.60143195718377 6.10320674564853]]
 [[0.5650080151747631 0.8736799937733455 -0.5045177318497576 2.0412323948998736; 0.6169524560839049 1.0465680263967292 -0.5412079409501728 2.3978950945852184; … ; 1.5446626912046373 3.0648901335152257 -1.4455870634573291 5.77531638818212; 1.6354749181250308 3.217909704388602 -1.5362128933043946 6.006029053196175]]
 [[0.44185296627979215 0.93351779368

### Parallel Computing

In [14]:
using Distributed

In [15]:
addprocs(3)

3-element Array{Int64,1}:
 2
 3
 4

In [16]:
@everywhere using MyMethods

      From worker 4:	│ - If you have MyMethods checked out for development and have
      From worker 4:	│   added DataFrames as a dependency but haven't updated your primary
      From worker 4:	│   environment's manifest file, try `Pkg.resolve()`.
      From worker 4:	│ - Otherwise you may need to report an issue with MyMethods
      From worker 2:	│ - If you have MyMethods checked out for development and have
      From worker 2:	│   added DataFrames as a dependency but haven't updated your primary
      From worker 2:	│   environment's manifest file, try `Pkg.resolve()`.
      From worker 2:	│ - Otherwise you may need to report an issue with MyMethods
      From worker 3:	│ - If you have MyMethods checked out for development and have
      From worker 3:	│   added DataFrames as a dependency but haven't updated your primary
      From worker 3:	│   environment's manifest file, try `Pkg.resolve()`.
      From worker 3:	│ - Otherwise you may need to report an issue with MyMethods


In [17]:
# bootstrap myLS
res = mybootstrapPAR(myLS, y, 10, X, data_args = [1], 
    get = coef, red = hcat∘vcat)

10×1 Array{Array{Float64,2},2}:
 [0.8457001054630248; 2.1825671470351113; -0.8944855934348915; 4.04688690718834]
 [0.9373090862909891; 2.0940918769346597; -0.948752636120286; 4.017162520394206]
 [0.8507148666019242; 2.0749954268338637; -0.8830533994991726; 4.12602531607754]
 [0.8014808274913243; 2.080395695759484; -0.7761127133526732; 4.0846830416763495]
 [0.9176496288512465; 2.0578995711870784; -0.8539456377419344; 3.9783865214218768]
 [1.001301130535213; 1.9205582435279795; -0.8569072105544677; 3.9739084282577966]
 [0.9167132934554411; 2.126301679851184; -0.945166007690668; 3.9219204827835905]
 [0.8548611528638019; 2.067775315143256; -0.8849871095247795; 4.074145801982866]
 [0.9704688194402704; 1.949345550706462; -0.9909546567402508; 4.119448906026626]
 [0.9081846920883201; 2.185545488657733; -0.9866043708865838; 4.013858069747636]

In [33]:
## Parallel coefficient function for myLLR object
function coefPAR2(fit::myLLR, _x=fit._x;
        dynamic=false)
    # Data parameters
    N_x = length(_x)
    # Check whether additional variables are included 
    with_control = !isnothing(fit.control)
    if with_control
        dim_coef = size(fit.control,2) + fit.K + 1
    else
        dim_coef = fit.K + 1
    end
    
    # Run LLR in parallel
    if !dynamic # w/o dynamic job scheduling
        coef_mat = @distributed (hcat) for idx in 1:length(_x)
            coef(fit, _x[idx])'
        end
    else # w/ dynamic job scheduling
        coef_mat = Array{Float64, 2}(undef, N_x, dim_coef)
        @sync begin
            for p in workers()
                @async begin
                    for idx in 1:length(_x)
                        coef_mat[idx,:] = remotecall_fetch(coef, p, 
                        fit, _x[idx])'
                    end
                end
            end
        end
    end
    
    # Return local coefficients
    return coef_mat'
end #COEFPAR.MYLLR

coefPAR2 (generic function with 2 methods)

In [34]:
# bootstrap myLLR
coef_LLR = coefPAR2(fit_LLR)

LoadError: TaskFailedException:
On worker 2:
TypeError: in new, expected Int64, got a value of type Array{Float64,2}
deserialize at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Serialization\src\Serialization.jl:1356
handle_deserialize at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Serialization\src\Serialization.jl:837
deserialize at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Serialization\src\Serialization.jl:1350
handle_deserialize at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Serialization\src\Serialization.jl:837
deserialize at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Serialization\src\Serialization.jl:773 [inlined]
deserialize_msg at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\messages.jl:99
#invokelatest#1 at .\essentials.jl:710 [inlined]
invokelatest at .\essentials.jl:709 [inlined]
message_handler_loop at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\process_messages.jl:185
process_tcp_streams at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\process_messages.jl:142
#99 at .\task.jl:356
Stacktrace:
 [1] remotecall_fetch(::Function, ::Distributed.Worker, ::Function, ::Vararg{Any,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\remotecall.jl:394
 [2] remotecall_fetch(::Function, ::Distributed.Worker, ::Function, ::Vararg{Any,N} where N) at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\remotecall.jl:386
 [3] remotecall_fetch(::Function, ::Int64, ::Function, ::Vararg{Any,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\remotecall.jl:421
 [4] remotecall_fetch at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\remotecall.jl:421 [inlined]
 [5] (::Distributed.var"#157#158"{typeof(hcat),var"#37#39"{myLLR,Array{Float64,1}},UnitRange{Int64},Array{UnitRange{Int64},1},Int64,Int64})() at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\Distributed\src\macros.jl:269

## KNN matching

In [1]:
# Specify module directory (depending on GitHub location...)
push!(LOAD_PATH, "$(homedir())\\Documents\\GitHub")
push!(LOAD_PATH, "$(homedir())\\GitHub")

# Load MyMethods.jl into your Julia session 
using MyMethods

┌ Info: Precompiling MyMethods [18de7e97-ce2a-4ea9-8d3b-e4b0c3b0b515]
└ @ Base loading.jl:1278
│ - If you have MyMethods checked out for development and have
│   added DataFrames as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with MyMethods


In [2]:
# Data parameters
N = 1000 # sample size
beta = [1,3] # regression coefficients

# Data
x = rand(N,1) # variable to match on
D = rand(0:1, N); # running variable
X = hcat(ones(N), D); # regressor matrix
y = X*beta + randn(N,1); # outcome simulation

K = 5
replacement = true

true

In [3]:
res = myMatch(y, D, x)

myMatch(3.095475695249317, 3.0191368567902934, 3.05692458182751, [2.4528075495977095; 3.5499682573386613; … ; 1.3664945106925346; 3.7549427700731366], [1, 1, 1, 0, 1, 0, 1, 0, 1, 0  …  0, 1, 0, 1, 1, 0, 1, 0, 0, 1], [0.5768106880495436; 0.9861526319665128; … ; 0.3509891674534471; 0.889224257429414], 1, true)

### mySieve

In [4]:
N = 1000
z = rand(N,1)
x = 2 .* (1 .- 2 .* z) # \sim U[-2, 2]
y = sin.(2 .* x) .+ 2 .* exp.(-16 .* x.^2) .+ 3 .* randn(N,1);

K = 3;

In [6]:
res = mySieve(y, x, basis="LSplines")



LoadError: UndefVarError: N not defined

### mySplines

In [171]:
using Statistics

In [183]:
r = quantile(x[:], collect(1:(K))./(K+1))

3-element Array{Float64,1}:
 -0.9332992761689949
 -0.022016838769304492
  0.9016281952267564

In [184]:
collect(1:(K))./(K+1)

3-element Array{Float64,1}:
 0.25
 0.5
 0.75

In [185]:
[(x.>r[k]).*(x.-r[k]) for k in 1:K]

3-element Array{Array{Float64,2},1}:
 [2.025396229234434; 2.5167799773415602; … ; 2.285982527966956; 1.9490962500829008]
 [1.1141137918347432; 1.6054975399418696; … ; 1.3747000905672655; 1.0378138126832104]
 [0.19046875783868233; 0.6818525059458087; … ; 0.4510550565712046; 0.11416877868714947]