In [5]:
import sympy as sp

# Define symbolic variables
x, mu, theta, sigma = sp.symbols('x mu theta sigma', real=True, positive=True)

# Define the probability density functions for P ~ N(mu, sigma^2) and Q ~ N(theta, sigma^2)
p = (1 / (sp.sqrt(2 * sp.pi) * sigma)) * sp.exp(-((x - mu)**2) / (2 * sigma**2))
q = (1 / (sp.sqrt(2 * sp.pi) * sigma)) * sp.exp(-((x - theta)**2) / (2 * sigma**2))

# Compute the logarithm of the ratio p/q
log_ratio = sp.log(p / q)

# Simplify the log ratio
log_ratio_simplified = sp.simplify(log_ratio)
# log_ratio_simplified should simplify to ((theta - mu)*(2*x - mu - theta))/(2*sigma**2)
# However, it's easier to expand and simplify step by step

# Let's compute p * log(p/q)
kl_integrand = p * log_ratio

# Expand the log_ratio
log_p = -sp.log(sp.sqrt(2 * sp.pi) * sigma) - ((x - mu)**2) / (2 * sigma**2)
log_q = -sp.log(sp.sqrt(2 * sp.pi) * sigma) - ((x - theta)**2) / (2 * sigma**2)
log_p_over_q = log_p - log_q

# Now, kl_integrand = p * (log_p - log_q)
kl_integrand = p * (log_p_over_q)

# Simplify log_p_over_q
log_p_over_q = (-((x - mu)**2) / (2 * sigma**2)) - (-((x - theta)**2) / (2 * sigma**2))
log_p_over_q = ((x - theta)**2 - (x - mu)**2) / (2 * sigma**2)
# Expand the squares
log_p_over_q = ((x**2 - 2*x*theta + theta**2) - (x**2 - 2*x*mu + mu**2)) / (2 * sigma**2)
log_p_over_q = (-2*x*theta + theta**2 + 2*x*mu - mu**2) / (2 * sigma**2)
log_p_over_q = (2*x*(mu - theta) + (theta**2 - mu**2)) / (2 * sigma**2)
log_p_over_q = (x*(mu - theta) + (theta**2 - mu**2)/2) / sigma**2

# So kl_integrand = p * (x*(mu - theta) + (theta**2 - mu**2)/2) / sigma**2

# Now, integrate kl_integrand over x from -infty to +infty
# Due to linearity, split the integral
# D_KL = (mu - theta)/sigma**2 * ∫x p(x) dx + (theta**2 - mu**2)/(2*sigma**2) * ∫p(x) dx

# We know that ∫x p(x) dx = mu and ∫p(x) dx = 1

D_KL = (mu - theta)/sigma**2 * mu + (theta**2 - mu**2)/(2*sigma**2) * 1

# Simplify the expression
D_KL = (mu**2 - mu*theta) / sigma**2 + (theta**2 - mu**2) / (2 * sigma**2)
D_KL = (2*(mu**2 - mu*theta) + theta**2 - mu**2) / (2 * sigma**2)
D_KL = (mu**2 - 2*mu*theta + theta**2) / (2 * sigma**2)
D_KL = (mu - theta)**2 / (2 * sigma**2)

# Display the result
print("KL Divergence D_KL(N(mu, sigma^2) || N(theta, sigma^2)) =")
sp.simplify(D_KL)


KL Divergence D_KL(N(mu, sigma^2) || N(theta, sigma^2)) =


(mu - theta)**2/(2*sigma**2)

In [1]:
import sympy as sp

# Define the symbolic variables
lambda_, mu, x = sp.symbols('lambda_ mu x')

# Define the exponential distribution terms
p_x = lambda_ * sp.exp(-lambda_ * x)  # PDF of Exp(lambda)
q_x = mu * sp.exp(-mu * x)            # PDF of Exp(mu)

# Define the log term
log_term = sp.log(lambda_ / mu) + (mu - lambda_) * x

# Integrate the KL divergence
kl_integral = sp.integrate(p_x * log_term, (x, 0, sp.oo))

# Simplify the result
kl_divergence_simplified = sp.simplify(kl_integral)

# Display the result
kl_divergence_simplified


Piecewise((log(lambda_/mu) - 1 + mu/lambda_, Abs(arg(lambda_)) < pi/2), (lambda_*Integral((-x*(lambda_ - mu) + log(lambda_/mu))*exp(-lambda_*x), (x, 0, oo)), True))

In [7]:
import numpy as np
n=7
one=[-1.0,-0.8, -2.9, 1.4, 0.3, -0.8, 1.4]
np.abs(np.mean(one))*np.sqrt(n)
two=[-1.7, -0.1, -0.2, 0.3, 0.3, -0.9, -0.02]
np.abs(np.mean(two))*np.sqrt(n)
three=[-0.2,0.6,1.1,-0.9,0.1,-1.2,1.1]
np.abs(np.mean(three))*np.sqrt(n)

0.2267786838055364

In [15]:
import sympy as sp

# Define the variable and limits
theta = sp.Symbol('theta', real=True, positive=True)
t = sp.Symbol('t', real=True, positive=True)

# Define the integral
integral = sp.integrate(1/t, (t, 0.0001, 1/2))

integral

8.51719319141624

In [17]:
(0.2*13*125+81)/12

33.833333333333336

In [1]:
b180/0.9

200.0

In [2]:
5*0.8


4.0

In [3]:
5000/40

125.0

In [4]:
-125*40-4000

-9000

In [12]:
from scipy import stats
import numpy as np
# Given test statistic
z_statistic = (0.5-0.645)/(np.sqrt(0.654*(1-0.645))/np.sqrt(124))

# Compute the p-value for a two-tailed test
p_value_two_tailed = 2 * (1 - stats.norm.cdf(abs(z_statistic)))

# Compute the p-value for a one-tailed test
p_value_one_tailed = 1 - stats.norm.cdf(abs(z_statistic))

print(f"Two-tailed p-value: {p_value_two_tailed:.4f}")
print(f"One-tailed p-value: {p_value_one_tailed:.4f}")

Two-tailed p-value: 0.0008
One-tailed p-value: 0.0004


In [13]:
lambd=1/1.13

import math

(math.sqrt(42)/lambd)*(lambd-1)

-0.8424962907930215

In [34]:
from scipy import stats
import numpy as np
# Given test statistic
z_statistic = np.sqrt(1000)*np.abs((257-260)/np.sqrt(38**2+39**2))
# Compute the p-value for a two-tailed test
p_value_two_tailed = 2 * (1 - stats.norm.cdf(abs(z_statistic)))

# Compute the p-value for a one-tailed test
p_value_one_tailed = 1 - stats.norm.cdf(abs(z_statistic))

print(f"Two-tailed p-value: {p_value_two_tailed:.4f}")
print(f"One-tailed p-value: {p_value_one_tailed:.4f}")

Two-tailed p-value: 0.0815
One-tailed p-value: 0.0407


In [15]:
1/(np.log(0.362))**2

0.9685401680508671

In [19]:
stats.norm.cdf(1.96)

0.9750021048517795

In [27]:
from scipy.stats import norm

alpha = 0.05
quantile = norm.ppf()

quantile

1.32-1.959963984540054

-0.639963984540054

In [5]:
import numpy as np
a=[1/3,1/3,1/3]
a=[1/4,1/4,1/4,1/4]
print(np.sum([i**2 for i in a]))

0.25


In [7]:
dat=[1,3,1,2,2,2,1,1,3,1,1,2]
dat=[i/len(dat) for i in dat]

test_statistic = len(dat)*np.sum([(i-1/3)**2/(1/3) for i in dat])
test_statistic

17.999999999999996

In [9]:
import numpy as np
from scipy.stats import chi2

# Original data
dat = [1, 3, 1, 2, 2, 2, 1, 1, 3, 1, 1, 2]

# Observed counts for each category (1, 2, 3)
observed_counts = [dat.count(1), dat.count(2), dat.count(3)]
total_count = len(dat)

# Expected counts (assuming equal probability for each category)
expected_count = total_count / 3

# Chi-squared test statistic
test_statistic = sum((obs - expected_count)**2 / expected_count for obs in observed_counts)

# Degrees of freedom (K-1, where K is the number of categories)
degrees_of_freedom = len(observed_counts) - 1

# P-value for the chi-squared test
p_value = 1 - chi2.cdf(test_statistic, degrees_of_freedom)

print("Chi-squared test statistic:", test_statistic)
print("P-value:", p_value)


Chi-squared test statistic: 2.0
P-value: 0.36787944117144233


In [21]:
from scipy.stats import chi2

# Degrees of freedom and significance level
degrees_of_freedom = 2
alpha = 0.05

# Calculate the critical value for the 95th percentile
critical_value = chi2.ppf(1 - alpha, degrees_of_freedom)

print("Chi-squared critical value at 5% significance level:", critical_value)


Chi-squared critical value at 5% significance level: 5.991464547107979


In [12]:
dat=[5,6,1,6,4,1,2,4,6,6,1,6,6,3,5]
observed_counts = [dat.count(i) for i in range(1, 7)]
expected_count = total_count / 3

# Chi-squared test statistic
test_statistic = sum((obs - expected_count)**2 / expected_count for obs in observed_counts)
test_statistic

# Degrees of freedom (K-1, where K is the number of categories)
degrees_of_freedom = len(observed_counts) - 1

# P-value for the chi-squared test
p_value = 1 - chi2.cdf(test_statistic, degrees_of_freedom)

print("Chi-squared test statistic:", test_statistic)
print("P-value:", p_value)



Chi-squared test statistic: 7.75
P-value: 0.17056249096296938


In [19]:
from scipy.stats import chi2

# Proportions and total count
observed_proportions = [20/100, 30/100, 50/100]
expected_proportions = [1/10, 4/10, 5/10]
total_count = 100

# Convert proportions to counts
observed_counts = [obs * total_count for obs in observed_proportions]
expected_counts = [exp * total_count for exp in expected_proportions]

# Chi-squared test statistic
test_statistic = sum((obs - exp)**2 / exp for obs, exp in zip(observed_counts, expected_counts))

# Degrees of freedom (K-1, where K is the number of categories)
degrees_of_freedom = len(observed_counts) - 1

# P-value for the chi-squared test
p_value = 1 - chi2.cdf(test_statistic, degrees_of_freedom)

print("Chi-squared test statistic:", test_statistic)
print("P-value:", p_value)


Chi-squared test statistic: 12.5
P-value: 0.0019304541362277217


In [29]:
dat=[339,455,180,26]
MLE=(1/1000*3)*np.sum(dat)
import numpy as np
from scipy.stats import chi2

# Given data
dat = [339, 455, 180, 26]

# Calculate the MLE for the binomial parameter p
MLE = (1 / (1000 * 3)) * np.sum(dat)


print(MLE)
# Total count (sum of observed data)
total_count = sum(dat)
print(total_count)
# Expected counts for each bin (0, 1, 2, 3) in a binomial distribution with parameters n=3, p=MLE
n = 3
expected_counts = [total_count * (np.math.comb(n, k) * MLE**k * (1 - MLE)**(n - k)) for k in range(0,4)]

# Chi-squared test statistic
test_statistic = sum((obs - exp)**2 / exp for obs, exp in zip(dat, expected_counts))

print("Chi-squared test statistic:", test_statistic)
degrees_of_freedom = 2
p_value = 1 - chi2.cdf(test_statistic, degrees_of_freedom)
print("P-value:", p_value)


0.3333333333333333
1000
Chi-squared test statistic: 17.716624999999958
P-value: 0.00014219481556709823


  expected_counts = [total_count * (np.math.comb(n, k) * MLE**k * (1 - MLE)**(n - k)) for k in range(4)]


In [30]:
import numpy as np
from scipy.stats import chi2, binom

# Given data
dat = [339, 455, 180, 26]

# Total count (sum of observed data)
total_count = sum(dat)
n = 3

# Calculate total successes
total_successes = sum(k * count for k, count in enumerate(dat))

# Calculate the MLE for the binomial parameter p
MLE = total_successes / (n * total_count)

print("MLE of p:", MLE)

# Expected counts for each bin (0, 1, 2, 3) in a binomial distribution with parameters n=3, p=MLE
expected_counts = [total_count * binom.pmf(k, n, MLE) for k in range(n+1)]

print("Expected counts:", expected_counts)

# Chi-squared test statistic
test_statistic = sum((obs - exp)**2 / exp for obs, exp in zip(dat, expected_counts))

print("Chi-squared test statistic:", test_statistic)

degrees_of_freedom = len(dat) - 1 - 1  # number of bins - 1 - number of estimated parameters
p_value = 1 - chi2.cdf(test_statistic, degrees_of_freedom)
print("P-value:", p_value)


MLE of p: 0.2976666666666667
Expected counts: [346.44144603703694, 440.4919952222221, 186.69167144444452, 26.374887296296304]
Chi-squared test statistic: 0.8828551921498904
P-value: 0.6431176531870422


In [15]:
zero_two=0.2**4*0.8**2*0.2
zero_four=0.4**4*0.6**2*0.4
zero_six=0.6**4*0.4**2*0.2
zero_eight=0.8**4*0.2**2*0.2
normalizer=zero_two+zero_four+zero_six+zero_eight
p=[zero_two/normalizer,zero_four/normalizer,zero_six/normalizer,zero_eight/normalizer]
p
priors=[0.2,0.4,0.6,0.8]
sum([priors[i]*p[i] for i in range(4)])
#import numpy as np
np.median(p,axis=0)
p

[0.01809954751131222,
 0.32579185520361986,
 0.3665158371040724,
 0.2895927601809955]