# Statistics

concentrate more on those which I stared

## Manual

### Descriptive Stats and **Variability**\*

``` python
# Manual Calculation of Mean, Median, Mode, Range, Variance, Standard Deviation, and IQR

def mean(data):
    total = sum(data)
    count = len(data)
    return total / count

def median(data):
    sorted_data = sorted(data)
    n = len(sorted_data)
    mid = n // 2
    if n % 2 == 0:
        return (sorted_data[mid - 1] + sorted_data[mid]) / 2
    else:
        return sorted_data[mid]

def mode(data):
    freq = {}
    for num in data:
        freq[num] = freq.get(num, 0) + 1
    max_freq = max(freq.values())
    modes = [key for key, value in freq.items() if value == max_freq]
    return modes

def data_range(data):
    return max(data) - min(data)

def variance(data):
    m = mean(data)
    return sum((x - m) ** 2 for x in data) / (len(data) - 1)  # Sample variance

def standard_deviation(data):
    return variance(data) ** 0.5

def calculate_iqr(data):
    sorted_data = sorted(data)
    n = len(sorted_data)
    
    def find_median(values):
        size = len(values)
        mid = size // 2
        return (values[mid - 1] + values[mid]) / 2 if size % 2 == 0 else values[mid]
    
    lower_half = sorted_data[:n//2] if n % 2 == 0 else sorted_data[:n//2]
    upper_half = sorted_data[n//2:] if n % 2 == 0 else sorted_data[n//2+1:]
    q1 = find_median(lower_half)
    q3 = find_median(upper_half)
    return q3 - q1, q1, q3

# Sample Data
data = [5, 2, 3, 6, 74, 9, 86, 21, 22, 385]

print("Mean:", mean(data))
print("Median:", median(data))
print("Mode:", mode(data))
print("Range:", data_range(data))
print("Variance:", variance(data))
print("Standard Deviation:", standard_deviation(data))
iqr, q1, q3 = calculate_iqr(data)
print(f"IQR: {iqr}, Q1: {q1}, Q3: {q3}")
```

### Covariance and Correlation \*

``` python
# Manual Calculation of Covariance & Correlation

def mean(data):
    return sum(data) / len(data)

def covariance(X, Y):
    n = len(X)
    mean_x, mean_y = mean(X), mean(Y)
    return sum((x - mean_x) * (y - mean_y) for x, y in zip(X, Y)) / (n - 1)  # Sample covariance

def population_covariance(X, Y):
    n = len(X)
    mean_x, mean_y = mean(X), mean(Y)
    return sum((x - mean_x) * (y - mean_y) for x, y in zip(X, Y)) / n

def variance(data):
    m = mean(data)
    return sum((x - m) ** 2 for x in data) / (len(data) - 1)  # Sample variance

def standard_deviation(data):
    return variance(data) ** 0.5

def correlation(X, Y):
    return covariance(X, Y) / (standard_deviation(X) * standard_deviation(Y))

# Sample Data
X = [3, 5, 7, 9, 11]
Y = [2, 4, 6, 8, 10]

print("Covariance:", covariance(X, Y))
print("Population Covariance:", population_covariance(X, Y))
print("Correlation:", correlation(X, Y))
```

### Probability Basics\*

``` python
def addition_rule(p_A, p_B, p_A_and_B):
    return p_A + p_B - p_A_and_B

def multiplication_rule_independent(p_A, p_B):
    return p_A * p_B

def expectation(values, probabilities):
    return sum(v * p for v, p in zip(values, probabilities))

# Example
print("P(A or B):", addition_rule(0.4, 0.3, 0.1))
print("P(A and B, independent):", multiplication_rule_independent(0.4, 0.3))
print("Expectation:", expectation([0, 1, 2], [0.5, 0.3, 0.2]))
```

### hypothesis \*

### **1️⃣ One-Sample t-Test**

``` python
python
Copy code
# One-Sample t-Test (Manual)

sample = [22, 24, 27, 23, 26, 30, 28, 25, 29, 24]
n = len(sample)
population_mean = 25

# Calculate mean
sample_mean = sum(sample) / n

# Calculate standard deviation
variance = sum((x - sample_mean) ** 2 for x in sample) / (n - 1)
sample_std = variance ** 0.5

# Calculate t-score
t_score = (sample_mean - population_mean) / (sample_std / (n ** 0.5))

print("T-Statistic:", t_score)
```

------------------------------------------------------------------------

### **2️⃣ Independent t-Test**

``` python
python
Copy code
# Independent t-Test (Manual)

sample1 = [22, 24, 27, 23, 26]
sample2 = [30, 28, 25, 29, 24]

n1, n2 = len(sample1), len(sample2)

mean1 = sum(sample1) / n1
mean2 = sum(sample2) / n2

# Calculate variances
var1 = sum((x - mean1) ** 2 for x in sample1) / (n1 - 1)
var2 = sum((x - mean2) ** 2 for x in sample2) / (n2 - 1)

# Pooled standard deviation
pooled_variance = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)

# Compute t-score
t_score = (mean1 - mean2) / pooled_std

print("T-Statistic:", t_score)
```

------------------------------------------------------------------------

### **3️⃣ Paired t-Test**

``` python
python
Copy code
# Paired t-Test (Manual)

before = [22, 24, 28, 23, 26]
after = [30, 28, 25, 29, 24]

n = len(before)

# Calculate differences
differences = [after[i] - before[i] for i in range(n)]

# Calculate mean of differences
mean_diff = sum(differences) / n

# Calculate standard deviation of differences
variance = sum((x - mean_diff) ** 2 for x in differences) / (n - 1)
std_diff = variance ** 0.5

# Compute t-score
t_score = mean_diff / (std_diff / (n ** 0.5))

print("T-Statistic:", t_score)
```

------------------------------------------------------------------------

### **4️⃣ Chi-Square Test**

``` python
python
Copy code
# Chi-Square Test (Manual)

observed = [[50, 30], [20, 40]]

# Calculate row and column sums
row_sums = [sum(row) for row in observed]
col_sums = [sum(col) for col in zip(*observed)]
total = sum(row_sums)

# Expected values
expected = [[(row_sums[i] * col_sums[j]) / total for j in range(len(col_sums))] for i in range(len(row_sums))]

# Compute Chi-Square statistic
chi_square = sum((observed[i][j] - expected[i][j]) ** 2 / expected[i][j] for i in range(len(observed)) for j in range(len(observed[0])))

print("Chi-Square Statistic:", chi_square)
```

------------------------------------------------------------------------

### **5️⃣ One-Way ANOVA**

``` python
python
Copy code
# One-Way ANOVA (Manual)

group1 = [70, 75, 80, 85, 90]
group2 = [60, 65, 70, 75, 80]
group3 = [50, 55, 60, 65, 70]

all_data = group1 + group2 + group3
k = 3  # Number of groups
n = len(all_data)  # Total number of observations

# Calculate means
group_means = [sum(g) / len(g) for g in [group1, group2, group3]]
grand_mean = sum(all_data) / n

# Between-group variance (SSB)
ssb = sum(len(g) * (mean - grand_mean) ** 2 for g, mean in zip([group1, group2, group3], group_means))

# Within-group variance (SSW)
ssw = sum(sum((x - mean) ** 2 for x in g) for g, mean in zip([group1, group2, group3], group_means))

# Degrees of freedom
df_between = k - 1
df_within = n - k

# Mean Squares
msb = ssb / df_between
msw = ssw / df_within

# Compute F-statistic
f_statistic = msb / msw

print("F-Statistic:", f_statistic)
```

------------------------------------------------------------------------

### **6️⃣ Two-Way ANOVA (Manual Calculation)**

``` python
python
Copy code
# Two-Way ANOVA (Manual)

groups = {
    "A_short": [88, 85, 87],
    "A_long": [75, 78, 80],
    "B_short": [90, 92, 89],
    "B_long": [70, 72, 68],
    "C_short": [80, 78, 82],
    "C_long": [60, 63, 59],
}

all_data = sum(groups.values(), [])
total_mean = sum(all_data) / len(all_data)

# Calculate means
group_means = {k: sum(v) / len(v) for k, v in groups.items()}

# Between-group variance (SSB)
ssb = sum(len(v) * (mean - total_mean) ** 2 for v, mean in group_means.items())

# Within-group variance (SSW)
ssw = sum(sum((x - mean) ** 2 for x in v) for v, mean in group_means.items())

# Compute F-statistic
f_statistic = (ssb / (len(groups) - 1)) / (ssw / (len(all_data) - len(groups)))

print("F-Statistic:", f_statistic)
```

------------------------------------------------------------------------

### **7️⃣ One-Sample Z-Test**

``` python
python
Copy code
# One-Sample Z-Test (Manual)

sample_mean = 50
population_mean = 50
population_sd = 3
n = 40

# Compute Z-score
z_score = (sample_mean - population_mean) / (population_sd / (n ** 0.5))

print("Z-Score:", z_score)
```

------------------------------------------------------------------------

### **8️⃣ Two-Sample Z-Test**

``` python
python
Copy code
# Two-Sample Z-Test (Manual)

mean1 = 53
mean2 = 47
sd1 = 3
sd2 = 4

n1 = 40
n2 = 40

# Compute Z-score
z_score = (mean1 - mean2) / (((sd1 ** 2) / n1 + (sd2 ** 2) / n2) ** 0.5)

print("Z-Score:", z_score)
```

### skewness and kurtosis\*

``` python
# Manual Skewness and Kurtosis
def skewness(data):
    n = len(data)
    m = mean(data)
    s = standard_deviation(data)
    return sum((x - m) ** 3 for x in data) / (n * s ** 3)

def kurtosis_manual(data):
    n = len(data)
    m = mean(data)
    s = standard_deviation(data)
    return sum((x - m) ** 4 for x in data) / (n * s ** 4) - 3  # Excess kurtosis

# Example
data = [5, 2, 3, 6, 74, 9, 86, 21, 22, 385]
print("Skewness:", skewness(data))
print("Kurtosis:", kurtosis_manual(data))
```

### Probability Distributions

``` python
# Manual Probability Distributions

def factorial(n):
    return 1 if n <= 1 else n * factorial(n - 1)

# Uniform Distribution
def uniform_probability(n):
    return 1 / n  # Each outcome has equal probability

# Binomial Distribution
#Formula: 𝑛!/𝑘!(𝑛−𝑘)!
def combination(n, k):
    return factorial(n) // (factorial(k) * factorial(n - k))
#Formula: Probability=combinations×(𝑝^𝑘)×(1−𝑝)^𝑛−𝑘
    def binomial_probability(n, k, p):
    return combination(n, k) * (p ** k) * ((1 - p) ** (n - k))

# Normal Distribution (Approximation)f(x) = [1 / (σ × √(2π))] × e^[-((x - μ)^2) / (2σ^2)]
def normal_distribution(x, mean, std_dev):
    e = 2.71828  # Approximation of e
    pi = 3.14159  # Approximation of pi
    exponent = -((x - mean) ** 2) / (2 * (std_dev ** 2))
    coefficient = 1 / (std_dev * ((2 * pi) ** 0.5))
    return coefficient * (e ** exponent)

# Bernoulli Distribution
def bernoulli_pmf(k, p):
    if k == 1:
        return p
    elif k == 0:
        return 1 - p
    return 0

# Poisson Distribution-P(X = k) = (e^(-λ) * λ^k) / k!
def poisson_pmf(k, mu):
    e = 2.71828
    return (mu ** k) * (e ** -mu) / factorial(k)

# Exponential Distribution-**f(x; λ) = λ * e^(-λx),  for x ≥ 0**
def exponential_pdf(x, scale):  # scale = 1/lambda-
    e = 2.71828
    if x >= 0:
        return (1 / scale) * (e ** (-x / scale))
    return 0

def exponential_cdf(x, scale):
    e = 2.71828
    if x >= 0:
        return 1 - (e ** (-x / scale))
    return 0

# Geometric Distribution
def geometric_pmf(k, p):
    if k < 1:
        return 0
    return (1 - p) ** (k - 1) * p

def geometric_cdf(k, p):
    if k < 1:
        return 0
    return 1 - (1 - p) ** k

# Beta Distribution (Simplified PDF, requires numerical integration for CDF)
def beta_pdf(x, alpha, beta_param):
    if 0 <= x <= 1:
        # Approximation of Beta function: B(a,b) = Γ(a)Γ(b)/Γ(a+b) ≈ factorial-based
        B = factorial(alpha - 1) * factorial(beta_param - 1) / factorial(alpha + beta_param - 2)
        return (x ** (alpha - 1)) * ((1 - x) ** (beta_param - 1)) / B
    return 0

# Gamma Distribution (Simplified PDF)
def gamma_pdf(x, shape, scale):
    e = 2.71828
    if x >= 0:
        return (x ** (shape - 1)) * (e ** (-x / scale)) / (scale ** shape * factorial(shape - 1))
    return 0

# Multinomial Distribution
def multinomial_pmf(x, n, p):
    if sum(x) != n or len(x) != len(p):
        return 0
    numerator = factorial(n)
    denominator = 1
    for xi in x:
        denominator *= factorial(xi)
    prob = 1
    for xi, pi in zip(x, p):
        prob *= pi ** xi
    return numerator / denominator * prob

# Examples
print("Uniform P(1 outcome, n=6):", uniform_probability(6))
print("Binomial P(k=2, n=3, p=0.5):", binomial_probability(3, 2, 0.5))
print("Normal f(x=1, mu=0, sigma=1):", normal_distribution(1, 0, 1))
print("Bernoulli P(k=1, p=0.6):", bernoulli_pmf(1, 0.6))
print("Poisson P(k=2, mu=3):", poisson_pmf(2, 3))
print("Exponential f(x=1, scale=2):", exponential_pdf(1, 2))
print("Geometric P(k=3, p=0.3):", geometric_pmf(3, 0.3))
print("Beta f(x=0.4, alpha=2, beta=5):", beta_pdf(0.4, 2, 5))
print("Gamma f(x=3, shape=2, scale=2):", gamma_pdf(3, 2, 2))
print("Multinomial P([1,1,3], n=5, p=[0.2,0.3,0.5]):", multinomial_pmf([1, 1, 3], 5, [0.2, 0.3, 0.5]))
```

### PMF,PDF and CDF

**Probability Mass Function (PMF)**

``` python
def calculate_pmf(data, x):
    total_count = len(data)
    occurrence = sum(1 for i in data if i == x)  # Count occurrences of x
    return occurrence / total_count

# Example dataset
data = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]  # Discrete values
x = 3  # Find probability of 3 occurring

print("Manual PMF for X=3:", calculate_pmf(data, x))
```

**Probability Density Function (PDF)**

``` python
def power(base, exp):
    result = 1
    for _ in range(abs(exp)):
        result *= base
    return result if exp >= 0 else 1 / result  # Handle negative exponents

def factorial(n):
    result = 1
    for i in range(1, n + 1):
        result *= i
    return result

def sqrt(n, precision=10):
    x = n
    for _ in range(precision):
        x = 0.5 * (x + n / x)  # Babylonian method
    return x

def exp_manual(x, terms=20):
    total = 1  # e^0 = 1
    for n in range(1, terms):
        total += power(x, n) / factorial(n)  # Taylor series expansion
    return total

def normal_pdf(x, mean, std):
    pi = 3.1415926535
    e = exp_manual(1)  # Approximate e

    coeff = 1 / (std * sqrt(2 * pi))
    exponent = exp_manual(-((x - mean) ** 2) / (2 * std ** 2))
    return coeff * exponent

# Example values
mean = 50
std = 10
x_value = 55  # Find probability at X = 55

print("Manual Normal PDF for X=55:", normal_pdf(x_value, mean, std))
```

**Cumulative Distribution Function (CDF)**

``` python
def erf_manual(x, terms=10):
    pi = 3.1415926535
    sum_erf = 0

    for n in range(terms):
        sum_erf += ((-1) ** n * power(x, 2 * n + 1)) / ((2 * n + 1) * factorial(n))
    
    return (2 / sqrt(pi)) * sum_erf

def normal_cdf(x, mean, std):
    return 0.5 * (1 + erf_manual((x - mean) / (std * sqrt(2))))

print("Manual Normal CDF for X=55:", normal_cdf(x_value, mean, std))
```

### Bias/Variance Tradeoff

``` python
def bias_variance_simulation(true_value, samples):
    n = len(samples)
    sample_means = [mean(sample) for sample in samples]
    estimator_mean = mean(sample_means)
    bias = estimator_mean - true_value
    variance = sum((m - estimator_mean) ** 2 for m in sample_means) / (n - 1)
    return bias, variance

# Example
true_mean = 10
samples = [[9, 10, 11], [8, 9, 12], [10, 11, 12]]
bias, var = bias_variance_simulation(true_mean, samples)
print(f"Bias: {bias}, Variance: {var}")
```

### Vector Calculus

``` python
def derivative_univariate(func, x, h=0.0001):
    return (func(x + h) - func(x)) / h

def partial_derivative_x(func, x, y, h=0.0001):
    return (func(x + h, y) - func(x, y)) / h

def gradient(func, x, y, h=0.0001):
    df_dx = partial_derivative_x(func, x, y, h)
    df_dy = (func(x, y + h) - func(x, y)) / h
    return [df_dx, df_dy]

# Example
def f_univariate(x):
    return x ** 2

def f_bivariate(x, y):
    return x ** 2 + y ** 2

print("Univariate Derivative f(x)=x^2 at x=2:", derivative_univariate(f_univariate, 2))
print("Gradient f(x,y)=x^2+y^2 at (2,3):", gradient(f_bivariate, 2, 3))
```

### Central Limit Theorem

``` python
def clt_simulation(population, sample_size, num_samples):
    sample_means = []
    for _ in range(num_samples):
        sample = [population[i % len(population)] for i in range(sample_size)]
        sample_means.append(mean(sample))
    return mean(sample_means), variance(sample_means)

# Example
population = [1, 2, 3, 4, 5, 6]
print("CLT Mean, Variance:", clt_simulation(population, 10, 100))
```

### Sampling

``` python
def simple_random_sample(population, sample_size):
    n = len(population)
    sample = []
    indices = list(range(n))
    for _ in range(sample_size):
        idx = indices.pop(int(mean(indices)) % len(indices))
        sample.append(population[idx])
    return sample

# Example
population = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print("Random Sample:", simple_random_sample(population, 3))
```

### Optimization Techniques

``` python
def gradient_descent(func, start_x, learning_rate=0.1, iterations=100):
    x = start_x
    for _ in range(iterations):
        grad = derivative_univariate(func, x)
        x -= learning_rate * grad
    return x

# Example
def f(x):
    return x ** 2

print("Optimal x for f(x)=x^2:", gradient_descent(f, 2))
```

## Libraries

### Probability Distributions \*

``` python
from scipy import stats
#General Order in SciPy
#For Discrete Distributions (PMF) → (k, n, p, loc)
#For Continuous Distributions (PDF) → (x, shape, scale, loc)

# Probability Distributions using SciPy
print("Uniform P(X=5, a=0, b=10):", stats.uniform.pdf(5, loc=0, scale=10))
print("Binomial P(k=2, n=3, p=0.5):", stats.binom.pmf(2, 3, 0.5))
print("Normal f(x=1, mu=0, sigma=1):", stats.norm.pdf(1, 0, 1))
print("Bernoulli P(k=1, p=0.6):", stats.bernoulli.pmf(1, 0.6))
print("Poisson P(k=2, mu=3):", stats.poisson.pmf(2, 3))
print("Exponential f(x=1, scale=2):", stats.expon.pdf(1, scale=2))
print("Geometric P(k=3, p=0.3):", stats.geom.pmf(3, 0.3))
print("Beta f(x=0.4, alpha=2, beta=5):", stats.beta.pdf(0.4, 2, 5))
print("Gamma f(x=3, shape=2, scale=2):", stats.gamma.pdf(3, 2, scale=2))
print("Multinomial P([1,1,3], n=5, p=[0.2,0.3,0.5]):", stats.multinomial.pmf([1, 1, 3], n=5, p=[0.2, 0.3, 0.5]))
```

### skewness and kurtosis\*

``` python
from scipy import stats
import numpy as np

data = np.array([5, 2, 3, 6, 74, 9, 86, 21, 22, 385])
skew_val = stats.skew(data)
kurt_val = stats.kurtosis(data)
print("Skewness:", skew_val)
print("Kurtosis:", kurt_val)
```

**Plot**

``` python
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew

# Sample data
data = np.random.gamma(2., 2., 1000)  # Example dataset

# Calculate kurtosis and skewness
data_kurtosis = kurtosis(data)
data_skewness = skew(data)

# Plot
sns.histplot(data, kde=True, color='blue', alpha=0.6, bins=30)
plt.title(f"Distribution with Skewness: {data_skewness:.2f} and Kurtosis: {data_kurtosis:.2f}")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()
```

### hypothesis \*

### **1️⃣ One-Sample t-Test** (Compare sample mean with population mean)

``` python
python
Copy code
from scipy import stats

data = [22, 24, 27, 23, 26, 30, 28, 25, 29, 24]
population_mean = 25

t_statistic, p_value = stats.ttest_1samp(data, population_mean)

print('T-statistic:', t_statistic)
print('p-value:', p_value)

alpha = 0.05
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis.')
```

------------------------------------------------------------------------

### **2️⃣ Independent t-Test** (Compare means of two independent groups)

``` python
python
Copy code
from scipy import stats

sample1 = [22, 24, 27, 23, 26]
sample2 = [30, 28, 25, 29, 24]

t_statistic, p_value = stats.ttest_ind(sample1, sample2)

print('T-statistic:', t_statistic)
print('p-value:', p_value)

alpha = 0.05
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis')
```

------------------------------------------------------------------------

### **3️⃣ Paired t-Test** (Compare before and after scores of the same group)

``` python
python
Copy code
from scipy import stats

before = [22, 24, 28, 23, 26]
after = [30, 28, 25, 29, 24]

t_statistic, p_value = stats.ttest_rel(before, after)

print('T-statistic:', t_statistic)
print('p-value:', p_value)

alpha = 0.05
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis')
```

------------------------------------------------------------------------

### **4️⃣ Chi-Square Test** (Check independence between categorical variables)

``` python
python
Copy code
from scipy.stats import chi2_contingency

# Contingency table (rows & columns)
data = [[50, 30], [20, 40]]

chi2_stat, p_value, dof, expected = chi2_contingency(data)

print('Chi-Square statistic:', chi2_stat)
print('p-value:', p_value)

alpha = 0.05
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis')
```

------------------------------------------------------------------------

### **5️⃣ One-Way ANOVA** (Compare means of 3+ independent groups)

``` python
python
Copy code
from scipy.stats import f_oneway

group1 = [70, 75, 80, 85, 90]
group2 = [60, 65, 70, 75, 80]
group3 = [50, 55, 60, 65, 70]

f_statistic, p_value = f_oneway(group1, group2, group3)

print('F-statistic:', f_statistic)
print('p-value:', p_value)

alpha = 0.05
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis')
```

------------------------------------------------------------------------

### **6️⃣ Two-Way ANOVA (Manual Calculation)** (Check effects of 2 independent variables)

``` python
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Sample Data: Two factors (Group & Condition)
data = {
    "Score":  [88, 85, 87, 75, 78, 80, 90, 92, 89, 70, 72, 68, 80, 78, 82, 60, 63, 59],
    "Group":  ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C"],
    "Condition": ["Short", "Short", "Short", "Long", "Long", "Long",
                  "Short", "Short", "Short", "Long", "Long", "Long",
                  "Short", "Short", "Short", "Long", "Long", "Long"]
}

df = pd.DataFrame(data)

# Fit Two-Way ANOVA model
model = smf.ols('Score ~ C(Group) + C(Condition) + C(Group):C(Condition)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)  # Type 2 ANOVA Table

print(anova_table)
```

## 7️⃣**one sample z-test**

``` python
from scipy import stats 
import math

sample_mean=50
population_mean=50
population_sd=3
n=40
z_score=(sample_mean-population_mean)/(population_sd/math.sqrt(n))
p_value=2*(1-stats.norm.cdf(abs(z_score)))

alpha = 0.05

# Decision
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis')
```

## 8️⃣**two sample z-test**

``` python
from scipy import stats
import math

mean1=53
mean2=47
sd1=3
sd2=4

n1=40
n2=40

z_score=(mean1-mean2)/math.sqrt((sd1**2/n1)+(sd2**2)/n2)

p_value=2*(1-stats.norm.cdf(abs(z_score)))

# Significance level
alpha = 0.05

# Decision
if p_value < alpha:
    print('Reject the null hypothesis')
else:
    print('Fail to reject the null hypothesis')
```

### Descriptive Stats and **Variability-**

``` python
import numpy as np

# Descriptive Statistics and Variability using NumPy
data = np.array([5, 2, 3, 6, 74, 9, 86, 21, 22, 385])

mean_val = np.mean(data)
median_val = np.median(data)
mode_val = np.unique(data, return_counts=True)[0][np.argmax(np.unique(data, return_counts=True)[1])]
range_val = np.max(data) - np.min(data)
variance_val = np.var(data, ddof=1)  # Sample variance
std_dev_val = np.std(data, ddof=1)   # Sample standard deviation
q1, q3 = np.percentile(data, [25, 75])
iqr_val = q3 - q1

print("Mean:", mean_val)
print("Median:", median_val)
print("Mode:", mode_val)
print("Range:", range_val)
print("Variance:", variance_val)
print("Standard Deviation:", std_dev_val)
print(f"IQR: {iqr_val}, Q1: {q1}, Q3: {q3}")
```

### Covariance and Correlation -

``` python
import numpy as np

# Covariance and Correlation using NumPy
X = np.array([3, 5, 7, 9, 11])
Y = np.array([2, 4, 6, 8, 10])

cov_matrix = np.cov(X, Y, ddof=1)  # Sample covariance
cov_xy = cov_matrix[0, 1]
pop_cov_xy = np.cov(X, Y, ddof=0)[0, 1]  # Population covariance
pearson_corr = np.corrcoef(X, Y)[0, 1]

from scipy.stats import spearmanr, kendalltau
spearman_corr, _ = spearmanr(X, Y)
kendall_corr, _ = kendalltau(X, Y)

print("Sample Covariance:", cov_xy)
print("Population Covariance:", pop_cov_xy)
print("Pearson Correlation:", pearson_corr)
print("Spearman Correlation:", spearman_corr)
print("Kendall Correlation:", kendall_corr)
```

### PMF,PDF and CDF

``` python
from scipy import stats
import numpy as np

# PMF
data = np.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
unique, counts = np.unique(data, return_counts=True)
pmf = counts / len(data)
print("PMF for X=3:", pmf[unique == 3][0])

# PDF and CDF (Normal Distribution)
mean, std = 50, 10
x = 55
print("Normal PDF for X=55:", stats.norm.pdf(x, mean, std))
print("Normal CDF for X=55:", stats.norm.cdf(x, mean, std))
```

### Bias/Variance Tradeoff

``` python
from sklearn.metrics import mean_squared_error
import numpy as np

# Bias and Variance (Simplified Example)
true_value = 10
samples = [np.array([9, 10, 11]), np.array([8, 9, 12]), np.array([10, 11, 12])]
means = [np.mean(s) for s in samples]
bias = np.mean(means) - true_value
variance = np.var(means, ddof=1)
print(f"Bias: {bias}, Variance: {variance}")
```

### Vector Calculus

``` python
import numpy as np
from scipy.misc import derivative

# Univariate Derivative
def f(x):
    return x ** 2
print("Univariate Derivative at x=2:", derivative(f, 2, dx=1e-6))

# Gradient (Numerical)
def f_bivariate(x):
    return x[0] ** 2 + x[1] ** 2
x0 = np.array([2, 3])
grad = np.gradient([f_bivariate([x0[0] + h, x0[1]]) - f_bivariate(x0) for h in [-1e-6, 0, 1e-6]], 1e-6)[1]
print("Gradient at (2,3):", grad)  # Simplified, needs adjustment for true gradient
```

### Probability Basics

``` python
import numpy as np
from scipy import stats

# Probability Rules and Expectation
p_A, p_B, p_A_and_B = 0.4, 0.3, 0.1
print("P(A or B):", p_A + p_B - p_A_and_B)
print("P(A and B, independent):", p_A * p_B)
values = np.array([0, 1, 2])
probs = np.array([0.5, 0.3, 0.2])
print("Expectation:", np.average(values, weights=probs))
```

### Central Limit Theorem

``` python
import numpy as np

# CLT Simulation
population = np.array([1, 2, 3, 4, 5, 6])
sample_size, num_samples = 10, 100
sample_means = [np.mean(np.random.choice(population, sample_size)) for _ in range(num_samples)]
print("CLT Mean, Variance:", np.mean(sample_means), np.var(sample_means, ddof=1))
```

### Sampling

``` python
import numpy as np

# Random Sampling
population = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
sample = np.random.choice(population, size=3, replace=False)
print("Random Sample:", sample)
```

### Optimization Techniques

``` python
from scipy.optimize import minimize

# Gradient Descent (Library-Based)
def f(x):
    return x ** 2
result = minimize(f, x0=2)
print("Optimal x for f(x)=x^2:", result.x[0])
```