In [1]:
using DataFrames, RDatasets, FixedEffectModels
using BenchmarkTools
using CSV, HTTP

In [2]:
# This is the basic Fixed effects example. It has like he formula approch like 
# in python moduals....
df = dataset("plm", "Cigar")
@btime reg(df, @formula(log(Sales) ~ NDI + fe(State) + fe(Year)), Vcov.cluster(:State), weights = :Pop)



                             Fixed Effect Model                             
Number of obs:                   1380   Degrees of freedom:                32
R2:                             0.803   R2 Adjusted:                    0.799
F-Stat:                        11.615   p-value:                        0.001
R2 within:                      0.140   Iterations:                         5
log(Sales) |    Estimate  Std.Error  t value Pr(>|t|)   Lower 95%   Upper 95%
-----------------------------------------------------------------------------
NDI        | -4.29493e-5 1.26022e-5 -3.40808    0.001 -6.76713e-5 -1.82273e-5


In [150]:
# I have a gravity dataset setup on my github site. So first you have to grab it 
# with the HTTP package

f = HTTP.get("https://raw.githubusercontent.com/mwaugh0328/Gravity-Estimation/master/gravity_data.csv").body;

# Then this converts the bytes to a "csv" file

csv = CSV.File(f);

# Then this creates the dataframe from the csv file

df = DataFrame(csv);

# this final bit then renames the columns based on there location. Not 
# as slick as pandas 

rename!(df, [1 => :importer, 2 => :exporter, 3 => :trade_data,
        4 => :distance, 5 => :border]);

first(df, 10)

Unnamed: 0_level_0,importer,exporter,trade_data,distance,border
Unnamed: 0_level_1,Int64,Int64,Float64,Float64,Int64
1,1,2,0.000209176,7333.02,0
2,1,3,0.000901651,7353.16,0
3,1,4,0.00470273,7038.12,0
4,1,5,0.077198,1050.78,1
5,1,6,0.00122796,5573.76,0
6,1,7,0.0156956,11990.9,0
7,1,8,0.000420131,7355.06,0
8,1,9,0.000694624,8059.99,0
9,1,10,0.00693367,6879.98,0
10,1,11,0.0119252,7153.31,0


In [151]:
df.distbin = 0.0.*df.distance;

df[:, :distbin] .= ifelse.(df.distance .<= 375, 1.0, df[:, :distbin]);

df[:, :distbin] .= ifelse.((375 .< df.distance .<= 750), 2.0, df[:, :distbin]);

df[:, :distbin] .= ifelse.((750 .< df.distance .<= 1500), 3.0, df[:, :distbin]);

df[:, :distbin] .= ifelse.((1500 .< df.distance .<= 3000), 4.0, df[:, :distbin]);

df[:, :distbin] .= ifelse.((3000 .< df.distance .<= 6000), 5.0, df[:, :distbin]);

df[:, :distbin] .= ifelse.((6000 .< df.distance), 6.0, df[:, :distbin]);

In [152]:
first(df, 30)

Unnamed: 0_level_0,importer,exporter,trade_data,distance,border,distbin
Unnamed: 0_level_1,Int64,Int64,Float64,Float64,Int64,Float64
1,1,2,0.000209176,7333.02,0,6.0
2,1,3,0.000901651,7353.16,0,6.0
3,1,4,0.00470273,7038.12,0,6.0
4,1,5,0.077198,1050.78,1,3.0
5,1,6,0.00122796,5573.76,0,5.0
6,1,7,0.0156956,11990.9,0,6.0
7,1,8,0.000420131,7355.06,0,6.0
8,1,9,0.000694624,8059.99,0,6.0
9,1,10,0.00693367,6879.98,0,6.0
10,1,11,0.0119252,7153.31,0,6.0


In [153]:
# This does some filtering drop the one values
# drop the zero trade observations. 
# note the ! which automatically changes the df

filter!(row -> row.trade_data != 1.0, df);

filter!(row -> row.trade_data != 0.0, df);

In [163]:
@time outreg = reg(df, @formula(log(trade_data) ~ importer + exporter + 
         distbin + border), save = true,
        ; contrasts = Dict(:importer => DummyCoding(base = 1.0), :exporter => DummyCoding(base = 1.0),
        :distbin => DummyCoding(base = 1.0)))

# this does the more standard dummy variable, vs. fixed effects. Whats the difference?

  1.115649 seconds (1.15 M allocations: 65.101 MiB, 4.96% gc time)


                                  Linear Model                                  
Number of obs:                      866  Degrees of freedom:                  65
R2:                               0.883  R2 Adjusted:                      0.874
F-Stat:                         94.4922  p-value:                          0.000
log(trade_data) |   Estimate Std.Error   t value Pr(>|t|)  Lower 95%   Upper 95%
--------------------------------------------------------------------------------
importer: 2     |    1.00012  0.163824   6.10484    0.000   0.678543     1.32169
importer: 3     |   0.485183   0.17022   2.85033    0.004   0.151054    0.819313
importer: 4     |    2.23622  0.171532   13.0367    0.000    1.89951     2.57292
importer: 5     | -0.0454859  0.164083 -0.277212    0.782   -0.36757    0.276598
importer: 6     |   0.799009   0.16643   4.80087    0.000   0.472318      1.1257
importer: 7     |  -0.333613   0.16706  -1.99696    0.046  -0.661541 -0.00568546
importer: 8     |   0.506493

In [182]:
@time outreg = reg(df, @formula(log(trade_data) ~ fe(importer) + fe(exporter) + 
         fe(distbin) + border), save = true)

  1.752851 seconds (1.75 M allocations: 89.787 MiB, 4.11% gc time)


                           Fixed Effect Model                           
Number of obs:                  866   Degrees of freedom:              67
R2:                           0.883   R2 Adjusted:                  0.873
F-Stat:                     39.9625   p-value:                      0.000
R2 within:                    0.048   Iterations:                      12
log(trade_data) | Estimate Std.Error t value Pr(>|t|) Lower 95% Upper 95%
-------------------------------------------------------------------------
border          | 0.771749  0.122081 6.32159    0.000  0.532111   1.01139


In [200]:
-4.94697 + -2.44264

-7.38961

In [168]:
-1.44933 + -4.856 + -1.08

-7.385

In [194]:
outreg.formula_predict(log(trade_data), df)

LoadError: [91mUndefVarError: trade_data not defined[39m

In [202]:
-1.44933 + (-4.8569) + (-1.08338)

-7.38961

In [201]:
# then the outreg.fe is a dataframe with the fixed effects lined
# up with the names. This is cool. Easy now todo model stuff with it.
# must have save = true to do this

first(outreg.fe, 30)

Unnamed: 0_level_0,importer,exporter,distbin,fe_importer,fe_exporter,fe_distbin
Unnamed: 0_level_1,Int64,Int64,Float64,Float64?,Float64?,Float64?
1,1,2,6.0,-4.8569,-0.483684,-1.44933
2,1,3,6.0,-4.8569,-0.727153,-1.44933
3,1,4,6.0,-4.8569,0.949411,-1.44933
4,1,5,3.0,-4.8569,0.376908,0.697198
5,1,6,5.0,-4.8569,-0.000320605,-1.08497
6,1,7,6.0,-4.8569,2.16794,-1.44933
7,1,8,6.0,-4.8569,-1.46779,-1.44933
8,1,9,6.0,-4.8569,-0.822348,-1.44933
9,1,10,6.0,-4.8569,0.937389,-1.44933
10,1,11,6.0,-4.8569,1.86388,-1.44933


In [137]:
outreg.fe

0.0009118819655545162

In [103]:
?reg

search: [0m[1mr[22m[0m[1me[22m[0m[1mg[22m [0m[1mR[22m[0m[1me[22m[0m[1mg[22mex [0m[1mR[22m[0m[1me[22m[0m[1mg[22mexMatch [0m[1mr[22m[0m[1me[22m[0m[1mg[22mressions [0m[1mR[22m[0m[1me[22m[0m[1mg[22mressionModel is[0m[1mr[22m[0m[1me[22m[0m[1mg[22mression out[0m[1mr[22m[0m[1me[22m[0m[1mg[22m



No documentation found.

`FixedEffectModels.reg` is a `Function`.

```
# 2 methods for generic function "reg":
[1] reg(df, formula::FormulaTerm) in FixedEffectModels at C:\Users\irmew01\.julia\packages\FixedEffectModels\KIQzR\src\fit.jl:43
[2] reg(df, formula::FormulaTerm, vcov::StatsBase.CovarianceEstimator; contrasts, weights, save, method, nthreads, double_precision, tol, maxiter, drop_singletons, progress_bar, dof_add, subset, first_stage) in FixedEffectModels at C:\Users\irmew01\.julia\packages\FixedEffectModels\KIQzR\src\fit.jl:43
```
