# ---------------------------------------------
# QUESTION 1: Type I and Type II Errors
# ---------------------------------------------
# Type I Error (False Positive):
#   - Null hypothesis TRUE hoti hai, par hum galti se usko reject kar dete hain.
#   - Symbol: α
# Type II Error (False Negative):
#   - Null hypothesis FALSE hoti hai, par hum usko galti se accept kar lete hain.
#   - Symbol: β
# Impact:
#   - Type I → unnecessary actions / false alarms
#   - Type II → important issues miss ho jate hain

# ---------------------------------------------
# QUESTION 2: What is P-value?
# ---------------------------------------------
# P-value = probability of getting such extreme results
#           ASSUMING null hypothesis is TRUE.
# Interpretation:
#   - p < α → Reject H0 (evidence against null)
#   - p >= α → Fail to reject H0

# ---------------------------------------------
# QUESTION 3: Z-test vs T-test
# ---------------------------------------------
# Z-test:
#   - Population SD known
#   - Large sample (n > 30)
# T-test:
#   - Population SD unknown
#   - Small sample (n < 30)
# Z-test ≈ T-test when n large

# ---------------------------------------------
# QUESTION 4: Confidence Interval & Margin of Error
# ---------------------------------------------
# Confidence Interval (CI):
#   range that likely contains the population mean.
# CI = mean ± margin_of_error
# Margin of Error large → CI wide
# Margin of Error small → CI narrow

# ---------------------------------------------
# QUESTION 5: ANOVA Purpose & Assumptions
# ---------------------------------------------
# Purpose:
#   Compare means of 3 or more groups.
# Assumptions:
#   - Normality
#   - Equal variances
#   - Independent samples
# ANOVA extends t-test from 2 groups → multiple groups.

# ---------------------------------------------
# QUESTION 7: Binomial Simulation + Histogram
# ---------------------------------------------
import numpy as np
import matplotlib.pyplot as plt

data = np.random.binomial(n=10, p=0.5, size=1000)

plt.hist(data, bins=10, edgecolor='black')
plt.title("Binomial Distribution (n=10, p=0.5)")
plt.xlabel("Values")
plt.ylabel("Frequency")
plt.show()

# ---------------------------------------------
# QUESTION 8: Central Limit Theorem Example
# ---------------------------------------------
data_non_normal = np.random.exponential(scale=1, size=100000)

sample_means = []
for i in range(500):
    sample = np.random.choice(data_non_normal, size=50)
    sample_means.append(np.mean(sample))

plt.hist(sample_means, bins=30, edgecolor="black")
plt.title("Central Limit Theorem Demonstration")
plt.xlabel("Sample Means")
plt.ylabel("Frequency")
plt.show()

# ---------------------------------------------
# QUESTION 9: Confidence Interval Function
# ---------------------------------------------
from scipy.stats import norm

def confidence_interval(data, confidence=0.95):
    mean = np.mean(data)
    sd = np.std(data, ddof=1)
    n = len(data)

    z = norm.ppf((1 + confidence) / 2)
    se = sd / np.sqrt(n)

    lower = mean - z * se
    upper = mean + z * se

    print(f"Mean: {mean}")
    print(f"{confidence*100}% Confidence Interval: ({lower}, {upper})")

    plt.axvline(lower, color='red', label="Lower Bound")
    plt.axvline(upper, color='red', label="Upper Bound")
    plt.axvline(mean, color='blue', label="Mean")
    plt.legend()
    plt.title("Confidence Interval")
    plt.show()

# Example Run
data_example = np.random.randn(100)
confidence_interval(data_example)

# ---------------------------------------------
# QUESTION 10: Chi-Square Goodness-of-Fit Test
# ---------------------------------------------
from scipy.stats import chisquare

observed = np.array([30, 50, 20])
expected = np.array([33, 44, 23])

chi_stat, p_value = chisquare(observed, f_exp=expected)

print("Chi-square Statistic:", chi_stat)
print("P-value:", p_value)

if p_value < 0.05:
    print("Reject Null: Observed distribution != Expected")
else:
    print("Fail to Reject Null: Observed distribution ≈ Expected")

