title | author | date | output | ||||
---|---|---|---|---|---|---|---|
Midterm Exam |
Monikrishna Roy |
2021-05-11 |
|
The standard Laplace distribution has density
A random sample of size 1000 is generated from the inverse transform method. Here, the histogram ans laplace density function curve are shown to copare the result.
# inverse transform to generate random sample
laplace_quantile <- function (size) {
u <- runif(size)
sapply(u, function (v) if (v > 0.5) { -log(-2*v + 2) } else { log(2*v) })
}
# laplace density function
laplace_density <- function (v) {
1/2*exp(-abs(v))
}
# variable initialize
size <- 1000
range <- c(-10,10)
x_seq <- seq(range[1], range[2], length.out = size)
# generating output and density
x_density <- laplace_density(x_seq)
y_random <- laplace_quantile(size)
# histogram ans density graph to compare the result
hist(y_random, probability = TRUE, breaks = 100, xlim = range)
lines(x_seq, y = x_density)
The Rayleigh density
$$
f (x) = \frac{x}{σ^2}e^{−x^2/(2σ^2)} , x ≥ 0, σ > 0.
$$
Develop an algorithm to generate random samples from a
Generate random samples from Rayleigh distribution. The quanltie function for Rayleigh is,
Used
# Inverse transform method to generate random sample
rayleigh_random <- function (size, sigma) {
u <- runif(size)
sqrt(-2 * sigma ^ 2 * log(1 - u))
}
size <- 1000
# generate a collection of Sigma
sigmas <- 1:5
# generating the sample
xs <- sapply(sigmas, function (v) rayleigh_random(size, v))
# convert to matrix
mat <- matrix(xs, ncol = length(sigmas), dimnames = list(NULL, sigmas))
# convert to data frame
df <- data.frame(mat)
# diagram the histogram
p1 = qplot(df$X1, main = paste0("Sigma = ", sigmas[1]), binwidth=0.1, geom="histogram")
p2 = qplot(df$X2, main = paste0("Sigma = ", sigmas[2]), binwidth=0.1, geom="histogram")
p3 = qplot(df$X3, main = paste0("Sigma = ", sigmas[3]), binwidth=0.1, geom="histogram")
p4 = qplot(df$X4, main = paste0("Sigma = ", sigmas[4]), binwidth=0.1, geom="histogram")
p5 = qplot(df$X5, main = paste0("Sigma = ", sigmas[5]), binwidth=0.1, geom="histogram")
grid.arrange(p1, p2, p3, p4, p5)
# Estimate the mode
mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
sapply(df, mode)
## X1 X2 X3 X4 X5
## 0.4807475 0.9488279 4.1856703 0.9161810 4.8222020
The rescaled Epanechnikov kernel [92] is a symmetric density function
Generated sample and density from Epanechnikov. Constructed histogram for the simulated sample.
# generating iid
iid_gen <- function (size) {
u1 <- runif(size, min = -1, max = 1)
u2 <- runif(size, min = -1, max = 1)
u3 <- runif(size, min = -1, max = 1)
sapply(1:size, function(v)
if (abs(u3[v]) >= abs(u2[v]) && abs(u3[v]) >= abs(u1[v])) u2[v] else u3[v]
)
}
# generate the sample
size <- 1000
random_sample <- iid_gen(size = size)
# histogram of the random sample
hist(random_sample, probability = TRUE, breaks=20)
Prove that the algorithm given in Exercise 3.9 generates variates from the density
Drawing the density curve using the given density function. The density curve over the histogram proves that the generated vairates consistent with density.
# density function of Ex 3.9
epanechnikov_density <- function (x) {
3/4 * (1 - x^2)
}
xlim <- range(random_sample)
x_seq <- seq(from = xlim[1], to = xlim[2], by = 0.01)
# histogram of Ex 3.9
hist(random_sample, probability = TRUE, breaks=20)
# Density from the funciton fe
lines(x = x_seq, y = epanechnikov_density(x_seq))
Generate a random sample of size 1000 from a normal location mixture. The components of the mixture have
After constructing histogram for different values of
# generate the normal location mixture
mixture <- function(n, p) {
x1 <- rnorm(n, 0, 1)
x2 <- rnorm(n, 3, 1)
r <- sample(c(0, 1), n, replace = TRUE, prob = c(1 - p, p))
r * x1 + (1 - r) * x2
}
# set the size
size <- 1000
# for p1 = 0.75
m_1 <- mixture(size, 0.75)
hist(m_1, main = expression(p == 0.75), xlab='', ylab='')
par(mfrow=c(2,2)) #creates 2x2 'matrix' of plots
# construct histogram for different p1
m_2 <- mixture(size, 0.25)
hist(m_2, main = expression(p == 0.25), xlab='', ylab='')
m_2 <- mixture(size, 0.4)
hist(m_2, main = expression(p == 0.4), xlab='', ylab='')
m_3 <- mixture(size, 0.5)
hist(m_3, main = expression(p == 0.5), xlab='', ylab='')
m_3 <- mixture(size, 0.6)
hist(m_3, main = expression(p == 0.6), xlab='', ylab='')
Generate 200 random observations from the 3-dimensional multivariate normal distribution having mean vector
using the Choleski factorization method. Use the R pairs plot to graph an array of scatter plots for each pair of variables. For each pair of variables, (visually) check that the location and correlation approximately agree with the theoretical parameters of the corresponding bivariate normal distribution.
A program to generate multivariate normal via Choleski factorization. Plotting the scatter plots and noticed that the sample patterns agree with correlations.
runCholeski <- function(size, mus, sigma) {
d <- length(mus)
q <- t(chol(sigma))
z <- matrix(rnorm(n = d * size), ncol = size, nrow = d)
t(q %*% z + mus)
}
mus <- 0:2
size <- 200
sigma <- matrix(c(1, -0.5, 0.5, -0.5, 2, -0.5, 0.5, -0.5, 3), ncol = 3)
ys <- runCholeski(size = size, mus = mus, sigma = sigma)
pairs(ys)
Efron and Tibshirani discuss the
Computed the covariance matrix of the transformed sample of test scores.
# normalization
normalize <- function (data) {
Sigma <- cov(data)
mus <- sapply(1:ncol(data), function (v) mean(data[,v]))
A <- chol(solve(Sigma))
t(A %*% (t(data)-mus))
}
# bootstrap scor
norm.scor <- scor
m1 <- normalize(data.matrix(norm.scor[,1:2]))
m2 <- normalize(data.matrix(norm.scor[,3:5]))
norm.scor[,1:2] = m1
norm.scor[,3:5] = m2
# covariance matrix
round(cov(norm.scor), digits=4)
## mec vec alg ana sta
## mec 1.0000 0.0000 0.1621 0.0776 0.1772
## vec 0.0000 1.0000 0.3323 0.2770 0.4364
## alg 0.1621 0.3323 1.0000 0.0000 0.0000
## ana 0.0776 0.2770 0.0000 1.0000 0.0000
## sta 0.1772 0.4364 0.0000 0.0000 1.0000
A compound Poisson process is a stochastic process
The estimated mean and variance are almost similar with theoretical values.
lambda = 2
shape = 5
scale = 2
size = 10000
t = 10
# generate N(t) which follow the poisson process.
ns = rpois(size, t * lambda)
# generate X(t) as in the problem description.
xs = sapply(ns, function (n) {
ys = c(rgamma(n = n, shape = shape, scale = scale))
sum(ys[1:n])
})
# Mean from function
(mean.s = mean(xs))
## [1] 199.7574
# mean theoretical
(mean.t = lambda * t * shape * scale)
## [1] 200
# Variance
(var.s = var(xs))
## [1] 2377.782
# From theoretical
(var.t = lambda * t * (shape * scale)^2)
## [1] 2000