## Simulate 1000 users
+ A user pick a number between 1 and 100
+ If the number <= 3, "clicks"!  
  
  
+ Repeat the above 1000 times

In [168]:
# Pick a number between 1 and 100.  Do it a thousand times.
totalRes <- sample(c(1:100), 1000, replace=TRUE)
clicks <- sum(totalRes <= 3)
cat(sprintf("Out of 1000 users, we got %d clicks\n", clicks))

Out of 1000 users, we got 35 clicks


## Do that simulation for A and B
## And measure the difference in %

In [None]:
oneABTest <- function(size, fullResult = FALSE) {
    # Simulate group A
    totalRes.A <- sample(c(1:100), size, replace=TRUE)
    clicks.A <- sum(totalRes.A <= 3)

    # Simulate group B
    totalRes.B <- sample(c(1:100), size, replace=TRUE)
    clicks.B <- sum(totalRes.B <= 3)

    diff <- (clicks.B - clicks.A) / clicks.A
    
    if (fullResult) {
        result <- list("A" = clicks.A, "B" = clicks.B, "diff" = diff)
        return (result)
    }
    else {
        return (diff)
    }
}

In [170]:
# Do one AB test.  No. of users in each group = 1000
result <- oneABTest(1000, fullResult=TRUE)
cat(sprintf("Number of clicks from A group: %d\n", result$A))
cat(sprintf("Number of clicks from B group: %d\n", result$B))
cat(sprintf("B is better than A by %.2f%%\n", result$diff*100))

Number of clicks from A group: 22
Number of clicks from B group: 28
B is better than A by 27.27%


## Do the same A/B Test many times!

In [180]:
# Do the A/B test 10,000 times!
results <- replicate(10000, oneABTest(1000))

In [181]:
# In the A/B test done in the presentation, we saw:
# A:300, B:330
# So B is better than A by 10% (0.1)
atLeast10pt <- mean(results >= 0.1) 
cat(sprintf("You will see an 'at least ten percent improvement' %.f%% of the time!\n", 100*atLeast10pt))

You will see an 'at least ten percent improvement' 35% of the time!


## Repeat again, but increase the number of users from 1000 to 10,000

In [173]:
results <- replicate(10000, oneABTest(10000))

In [174]:
atLeast10pt <- mean(results >= 0.1)
cat(sprintf("You will see at least ten percents improvement %.f%% of the time!\n", 100*atLeast10pt))

You will see at least ten percents improvement 12% of the time!


## Some mathematical work below....

In [None]:
# Why 36% above?

# X and Y are two independent random variables.
# Both follows the Bernoulli distribution where p = 0.03
p <- 0.03
n <- 1000

# Var(X) = p(1-p)/n where n is the number of samples.
var.X <- n*(p * (1-p))
var.Y <- n*(p * (1-p))

In [None]:
# Define D = X - Y

# E[D] = E[X] - E[Y] = 0

# Because X and Y are independent, 
# Var(D) = Var(X-Y) = Var(X) + Var(Y)

var.D <- var.X + var.Y
sd.D <- sqrt(var.D)

# Again, what is D?
# In X, suppose we flip the coin many times and on average we get 0.03 heads.
# In Y, suppose we flip the coin many times and on average we get 0.033 heads.
# That's a 10% increase, and D = 0.003 in here.

# Since D ~ N(0, se.D), let's find the probability of getting 0.003 or more
pnorm(0.003, mean = 0, sd = sd.D, lower.tail=FALSE)

pnorm(0.003, mean = 0, sd = 2*n*p*(1-p), lower.tail=FALSE)

In [None]:
help(pbinom)