In [2]:
using LightGraphs, Distributions, DataFrames, ProgressMeter, RCall

In [3]:
srand(20130810)

MersenneTwister(UInt32[0x01332bfa], Base.dSFMT.DSFMT_state(Int32[-1772545288, 1073534108, 1077066014, 1072915095, -2146195133, 1072843413, 301764553, 1073404181, 750472136, 1073628106  …  -1491411563, 1073194977, 716119449, 1072893711, 1632331784, 758890923, 1433693833, -13012230, 382, 0]), [1.98619, 1.94589, 1.75428, 1.61862, 1.16592, 1.5365, 1.47187, 1.92902, 1.83274, 1.72863  …  1.67703, 1.52454, 1.16355, 1.10165, 1.20014, 1.30681, 1.41491, 1.39147, 1.69956, 1.56541], 382)

*Note: This workbook replicates the results from "The chilling effects of network externalities" by Goldenberg, Libai and Muller (2007). This is a self-didactic attempt*

> Two factors influence the transition of a non-adopter to an adopter:
- External factors: Some probability $a$ exists such that in a given time period, an individual will be influenced by external influence mechanisms such as advertising, mass media, and other marketing efforts, to adopt the innovative product
- Internal factors: Some probability $b$ exists such that during a given time period, an individual will be affected by an interaction (e.g., word of mouth) with exactly one other individual who has already adopted the product

> Let the number of cumulative adopters at time $t$ in a market of size $N$ be $x(t)$ and the threshold of an individual $i$ be $h_i$. If $i$ is connected to $m_i(t)$ adopters belonging to her personal network, her probability of adoption is:
$p_i(t) = 1 - (1 - a)(1 - b)^{m_i(t)}$ if $x(t)/N > h_i$ and 0 otherwise.

> In order to compare growth processes with and without network effects, we chose to express our measure as the ratio of the NPV of the growth process with and without network effects. Thus, we compute the NPV for the non-externalities case and for the externalities case using a 10% discount rate per period, which is a reasonable yearly rate for many
markets and fixed profit margins. 

> We assume that the threshold distribution follows a truncated normal distribution with mean $\mu$ and standard deviation $\sigma$.

It follows from the above verbatim from the paper that the parameters are $(a, b, \mu, \sigma)$.

In [4]:
mutable struct Network
    G::LightGraphs.SimpleGraphs.SimpleGraph{Int}
    node_status::BitVector
    threshold::Vector{Float64}
end

In [5]:
function network_externalities_effect(N::Network, node::Int, a::Float64, b::Float64)
    if sum(N.node_status)/nv(N.G) > N.threshold[node]
        n_active_nbrs = sum(N.node_status[neighbors(N.G, node)]) 
        return 1 - (1 - a)*((1 - b)^n_active_nbrs)
    else
        return a
    end
end

network_externalities_effect (generic function with 1 method)

In [6]:
function evolve!(N::Network, a::Float64, b::Float64)
    for node in shuffle(vertices(N.G))
        if rand(Uniform()) < network_externalities_effect(N, node, a, b)
            N.node_status[node] = true
        end
    end
    
    return nothing
end

evolve! (generic function with 1 method)

In [7]:
function evolve!(N::Network, a::Float64)
    for node in shuffle(vertices(N.G))
        if rand(Uniform()) < a
            N.node_status[node] = true
        end
    end
    
    return nothing
end

evolve! (generic function with 2 methods)

In [43]:
function simulate(n::Int, z::Int; T = 30, n_realizations = 10)
    parameter_space = [(a, b, mu, sigma) for a in linspace(0.005, 0.05, 5),
                                             b in linspace(0.05, 0.25, 5),
                                             mu in linspace(0.01, 0.2, 5),
                                             sigma in linspace(0.005, 0.025, 5)]
    output = DataFrame(r = Int[], t = Int[], 
                       a = Float64[], b = Float64[], mu = Float64[], sigma = Float64[], 
                       n_engaged_externalities = Int[], n_engaged_no_externalities = Int[])
    
    @showprogress 1 "Simulating..." for (a, b, mu, sigma) in parameter_space
        for r in 1:n_realizations
            g = erdos_renyi(n, z/n)
            threshold = rand(TruncatedNormal(mu, sigma, 0, Inf), nv(g))
            N_externalities = Network(g, falses(nv(g)), threshold)
            N_no_externalities = Network(g, falses(nv(g)), threshold)
            
            for t in 1:T
                evolve!(N_externalities, a, b)
               
                evolve!(N_no_externalities, a)
                
                push!(output, [r, t, a, b, mu, sigma, sum(N_externalities.node_status), sum(N_no_externalities.node_status)])
            end
        end
    end
    
    return output
end

simulate (generic function with 1 method)

In [44]:
result = simulate(625, 8)
@rput result

[32mSimulating...100%|██████████████████████████████████████| Time: 0:01:02[39m


Unnamed: 0,r,t,a,b,mu,sigma,n_engaged_externalities,n_engaged_no_externalities
1,1,1,0.005,0.05,0.01,0.005,1,4
2,1,2,0.005,0.05,0.01,0.005,3,7
3,1,3,0.005,0.05,0.01,0.005,10,11
4,1,4,0.005,0.05,0.01,0.005,22,15
5,1,5,0.005,0.05,0.01,0.005,31,18
6,1,6,0.005,0.05,0.01,0.005,44,19
7,1,7,0.005,0.05,0.01,0.005,62,21
8,1,8,0.005,0.05,0.01,0.005,85,26
9,1,9,0.005,0.05,0.01,0.005,108,28
10,1,10,0.005,0.05,0.01,0.005,149,31


In [45]:
R"""
library(tidyverse)
result_summ <- result %>% group_by(a, b, mu, sigma, t) %>% 
                          summarize(Avg_Engaged_Externalities = mean(n_engaged_externalities), 
                                    Avg_Engaged_No_Externalities = mean(n_engaged_no_externalities))

"""

RCall.RObject{RCall.VecSxp}
# A tibble: 18,750 x 7
# Groups:   a, b, mu, sigma [?]
         a      b     mu   sigma     t Avg_Engaged_Extern~ Avg_Engaged_No_Exte~
     <dbl>  <dbl>  <dbl>   <dbl> <int>               <dbl>                <dbl>
 1 0.00500 0.0500 0.0100 0.00500     1                4.60                 3.40
 2 0.00500 0.0500 0.0100 0.00500     2                9.40                 5.80
 3 0.00500 0.0500 0.0100 0.00500     3               16.5                  9.40
 4 0.00500 0.0500 0.0100 0.00500     4               30.7                 12.7 
 5 0.00500 0.0500 0.0100 0.00500     5               47.1                 15.9 
 6 0.00500 0.0500 0.0100 0.00500     6               70.8                 18.6 
 7 0.00500 0.0500 0.0100 0.00500     7              100                   21.2 
 8 0.00500 0.0500 0.0100 0.00500     8              137                   24.6 
 9 0.00500 0.0500 0.0100 0.00500     9              181                   27.5 
10 0.00500 0.0500 0.0100 0.00500    1

In [46]:
R"""
Cash_Flow <- result_summ %>% group_by(a, b, mu, sigma) %>% 
                             mutate(Cash_Flow_Externalities = (Avg_Engaged_Externalities - lag(Avg_Engaged_Externalities))/(1.1^t),
                                    Cash_Flow_No_Externalities = (Avg_Engaged_No_Externalities - lag(Avg_Engaged_No_Externalities))/(1.1^t)) 
"""

RCall.RObject{RCall.VecSxp}
# A tibble: 18,750 x 9
# Groups:   a, b, mu, sigma [625]
         a      b     mu   sigma     t Avg_Engaged_Extern~ Avg_Engaged_No_Exte~
     <dbl>  <dbl>  <dbl>   <dbl> <int>               <dbl>                <dbl>
 1 0.00500 0.0500 0.0100 0.00500     1                4.60                 3.40
 2 0.00500 0.0500 0.0100 0.00500     2                9.40                 5.80
 3 0.00500 0.0500 0.0100 0.00500     3               16.5                  9.40
 4 0.00500 0.0500 0.0100 0.00500     4               30.7                 12.7 
 5 0.00500 0.0500 0.0100 0.00500     5               47.1                 15.9 
 6 0.00500 0.0500 0.0100 0.00500     6               70.8                 18.6 
 7 0.00500 0.0500 0.0100 0.00500     7              100                   21.2 
 8 0.00500 0.0500 0.0100 0.00500     8              137                   24.6 
 9 0.00500 0.0500 0.0100 0.00500     9              181                   27.5 
10 0.00500 0.0500 0.0100 0.00500   

In [48]:
R"""
NPV <- Cash_Flow %>% group_by(a, b, mu, sigma) %>% 
                     summarize(NPV_Externalities = sum(Cash_Flow_Externalities, na.rm = TRUE),
                               NPV_No_Externalities = sum(Cash_Flow_No_Externalities, na.rm = TRUE)) %>%
                     mutate(NPV_Ratio = NPV_Externalities/NPV_No_Externalities)

"""

RCall.RObject{RCall.VecSxp}
# A tibble: 625 x 7
# Groups:   a, b, mu [125]
         a      b     mu   sigma NPV_Externalities NPV_No_Externalit~ NPV_Ratio
     <dbl>  <dbl>  <dbl>   <dbl>             <dbl>              <dbl>     <dbl>
 1 0.00500 0.0500 0.0100 0.00500               209               25.5      8.20
 2 0.00500 0.0500 0.0100 0.0100                202               24.8      8.14
 3 0.00500 0.0500 0.0100 0.0150                186               24.9      7.49
 4 0.00500 0.0500 0.0100 0.0200                196               26.7      7.35
 5 0.00500 0.0500 0.0100 0.0250                180               25.9      6.95
 6 0.00500 0.0500 0.0575 0.00500               136               26.2      5.20
 7 0.00500 0.0500 0.0575 0.0100                120               25.6      4.67
 8 0.00500 0.0500 0.0575 0.0150                131               24.7      5.31
 9 0.00500 0.0500 0.0575 0.0200                131               25.2      5.20
10 0.00500 0.0500 0.0575 0.0250              

In [56]:
R"""
library(caret)

model1 <- train(NPV_Ratio ~ 0 + log(a) + log(b) + I(log(sigma/mu)), 
               method = "lm",
               data = NPV,
               preProcess = c("center", "scale"),
               trControl = trainControl(method = "repeatedcv"))
"""

RCall.RObject{RCall.VecSxp}
Linear Regression 

625 samples
  4 predictor

Pre-processing: centered (3), scaled (3) 
Resampling: Cross-Validated (10 fold, repeated 1 times) 
Summary of sample sizes: 563, 561, 563, 563, 563, 562, ... 
Resampling results:

  RMSE     Rsquared   MAE     
  1.78134  0.4643107  1.141438

Tuning parameter 'intercept' was held constant at a value of TRUE


In [62]:
R"""
summary(model)
"""

RCall.RObject{RCall.VecSxp}

Call:
lm(formula = .outcome ~ ., data = dat)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.7048 -0.7037  0.0146  0.6228  9.7472 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         3.30738    0.07204  45.908  < 2e-16 ***
`log(a)`           -1.21038    0.07210 -16.787  < 2e-16 ***
`log(b)`            0.46173    0.07210   6.404 2.99e-10 ***
`I(log(sigma/mu))`  1.03794    0.07210  14.396  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.801 on 621 degrees of freedom
Multiple R-squared:  0.4605,	Adjusted R-squared:  0.4579 
F-statistic: 176.7 on 3 and 621 DF,  p-value: < 2.2e-16



In [65]:
R"""

model1 <- train(NPV_Ratio ~ a + b + mu + sigma,
                data = NPV,
                method = "rf",
                trControl = trainControl(method = "repeatedcv", repeats = 5))

"""

RCall.RObject{RCall.VecSxp}
Random Forest 

625 samples
  4 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 5 times) 
Summary of sample sizes: 561, 563, 561, 565, 563, 563, ... 
Resampling results across tuning parameters:

  mtry  RMSE       Rsquared   MAE      
  2     0.3978363  0.9822575  0.2020591
  3     0.2466893  0.9896299  0.1234395
  4     0.2251829  0.9909139  0.1163380

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was mtry = 4.
