<a href="https://colab.research.google.com/github/kartikmane45/physics_wallah_skills_assignments/blob/main/stats_mod_adv2_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 **Write a Python program to perform a Z-test for comparing a sample mean to a known population mean and
interpret the results**

In [None]:
import numpy as np
from scipy.stats import norm

def z_test(sample, population_mean, population_std, alpha=0.05):
    n = len(sample)
    sample_mean = np.mean(sample)
    se = population_std / np.sqrt(n)  # Standard error

    z_score = (sample_mean - population_mean) / se
    p_value = 2 * (1 - norm.cdf(abs(z_score)))  # Two-tailed test

    print(f"Sample Mean: {sample_mean}")
    print(f"Z-score: {z_score}")
    print(f"P-value: {p_value}")

    if p_value < alpha:
        print("Result: Reject the null hypothesis (significant difference)")
    else:
        print("Result: Fail to reject the null hypothesis (no significant difference)")

# Example usage
sample_data = [52, 48, 50, 47, 53, 49, 51, 50, 48, 52]
population_mean = 50
population_std = 3  # Known population standard deviation

z_test(sample_data, population_mean, population_std)


**Simulate random data to perform hypothesis testing and calculate the corresponding P-value using Python**

In [None]:
import numpy as np
from scipy.stats import norm

np.random.seed(42)
sample = np.random.normal(loc=50, scale=5, size=30)
population_mean = 50
population_std = 5

z_score = (np.mean(sample) - population_mean) / (population_std / np.sqrt(len(sample)))
p_value = 2 * (1 - norm.cdf(abs(z_score)))

print(f"Z-score: {z_score:.4f}, P-value: {p_value:.4f}")


** Implement a one-sample Z-test using Python to compare the sample mean with the population mean**

In [None]:
from scipy.stats import norm

def one_sample_z_test(sample, population_mean, population_std):
    sample_mean = np.mean(sample)
    se = population_std / np.sqrt(len(sample))
    z_score = (sample_mean - population_mean) / se
    p_value = 2 * (1 - norm.cdf(abs(z_score)))

    return z_score, p_value

# Example Usage
sample_data = np.random.normal(50, 5, 30)
z, p = one_sample_z_test(sample_data, 50, 5)
print(f"Z-score: {z:.4f}, P-value: {p:.4f}")


**Perform a two-tailed Z-test using Python and visualize the decision region on a plot**

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

alpha = 0.05
critical_value = norm.ppf(1 - alpha/2)

x = np.linspace(-4, 4, 1000)
y = norm.pdf(x)

plt.figure(figsize=(8,5))
plt.plot(x, y, label="Normal Distribution")
plt.fill_between(x, y, where=(x < -critical_value) | (x > critical_value), color='red', alpha=0.3, label="Rejection Region")
plt.axvline(-critical_value, color='black', linestyle='dashed')
plt.axvline(critical_value, color='black', linestyle='dashed')
plt.title("Two-Tailed Z-test Decision Region")
plt.legend()
plt.show()


**Create a Python function that calculates and visualizes Type 1 and Type 2 errors during hypothesis testing**

In [None]:
def plot_type1_type2(alpha=0.05, beta=0.2):
    x = np.linspace(-4, 4, 1000)
    null_dist = norm.pdf(x, 0, 1)
    alt_dist = norm.pdf(x, 2, 1)  # Shifted mean for alternative hypothesis

    plt.figure(figsize=(8,5))
    plt.plot(x, null_dist, label="Null Distribution", color="blue")
    plt.plot(x, alt_dist, label="Alternative Distribution", color="orange")
    plt.fill_between(x, null_dist, where=(x > norm.ppf(1 - alpha)), color='red', alpha=0.3, label="Type 1 Error (α)")
    plt.fill_between(x, alt_dist, where=(x < norm.ppf(1 - beta)), color='green', alpha=0.3, label="Type 2 Error (β)")

    plt.axvline(norm.ppf(1 - alpha), color='black', linestyle='dashed')
    plt.legend()
    plt.title("Type 1 and Type 2 Errors")
    plt.show()

plot_type1_type2()


**Write a Python program to perform an independent T-test and interpret the results**

In [None]:
from scipy.stats import ttest_ind

group1 = np.random.normal(50, 5, 30)
group2 = np.random.normal(52, 5, 30)

t_stat, p_value = ttest_ind(group1, group2)
print(f"T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")

if p_value < 0.05:
    print("Reject the null hypothesis: The groups have significantly different means.")
else:
    print("Fail to reject the null hypothesis: No significant difference between the groups.")


**Perform a paired sample T-test using Python and visualize the comparison results**

In [None]:
from scipy.stats import ttest_rel

before = np.random.normal(50, 5, 30)
after = before + np.random.normal(0, 2, 30)  # Slight change

t_stat, p_value = ttest_rel(before, after)
print(f"Paired T-test: T-statistic={t_stat:.4f}, P-value={p_value:.4f}")

plt.figure(figsize=(6, 4))
plt.plot(before, label="Before", marker='o')
plt.plot(after, label="After", marker='o')
plt.legend()
plt.title("Paired Sample T-test Comparison")
plt.show()


** Simulate data and perform both Z-test and T-test, then compare the results using Python**

In [None]:
sample1 = np.random.normal(50, 5, 30)
sample2 = np.random.normal(52, 5, 30)

# Z-test
z_score, p_value_z = one_sample_z_test(sample1, 50, 5)

# T-test
t_stat, p_value_t = ttest_ind(sample1, sample2)

print(f"Z-test: Z-score={z_score:.4f}, P-value={p_value_z:.4f}")
print(f"T-test: T-statistic={t_stat:.4f}, P-value={p_value_t:.4f}")


**Write a Python function to calculate the confidence interval for a sample mean and explain its significance**

import scipy.stats as stats

def confidence_interval(sample, confidence=0.95):
    sample_mean = np.mean(sample)
    se = np.std(sample, ddof=1) / np.sqrt(len(sample))
    margin_error = stats.t.ppf((1 + confidence) / 2, len(sample) - 1) * se
    return sample_mean - margin_error, sample_mean + margin_error

# Example Usage
sample_data = np.random.normal(50, 5, 30)
ci_lower, ci_upper = confidence_interval(sample_data)
print(f"95% Confidence Interval: ({ci_lower:.4f}, {ci_upper:.4f})")


** Calculate the margin of error for a given confidence level using sample data**

In [None]:
import scipy.stats as stats
import numpy as np

def margin_of_error(sample, confidence=0.95):
    se = np.std(sample, ddof=1) / np.sqrt(len(sample))
    t_critical = stats.t.ppf((1 + confidence) / 2, len(sample) - 1)
    return t_critical * se

sample_data = np.random.normal(50, 5, 30)
me = margin_of_error(sample_data)
print(f"Margin of Error: {me:.4f}")


**Implement Bayesian inference using Bayes' Theorem in Python and explain the process**

In [None]:
def bayes_theorem(prior, likelihood, evidence):
    return (likelihood * prior) / evidence

prior_prob = 0.02
likelihood = 0.8
evidence = (likelihood * prior_prob) + ((1 - likelihood) * (1 - prior_prob))

posterior_prob = bayes_theorem(prior_prob, likelihood, evidence)
print(f"Posterior Probability: {posterior_prob:.4f}")


3. Perform a Chi-square test for independence between two categorical variables in Python
Chi-square tests if two categorical variables are related.





import pandas as pd
from scipy.stats import chi2_contingency

data = pd.DataFrame({"A": [30, 10], "B": [20, 40]})
chi2, p, _, _ = chi2_contingency(data)
print(f"Chi-square Statistic: {chi2:.4f}, P-value: {p:.4f}")
4. Calculate the expected frequencies for a Chi-square test based on observed data
python
Copy
Edit
observed = np.array([[30, 10], [20, 40]])
_, _, dof, expected = chi2_contingency(observed)

print("Expected Frequencies:\n", expected)
5. Perform a goodness-of-fit test using Python to compare observed vs. expected distribution
python
Copy
Edit
from scipy.stats import chisquare

observed = np.array([50, 30, 20])
expected = np.array([40, 40, 20])

chi2_stat, p_value = chisquare(observed, expected)
print(f"Chi-square Statistic: {chi2_stat:.4f}, P-value: {p_value:.4f}")


6. Simulate and visualize the Chi-square distribution
python
Copy
Edit
import matplotlib.pyplot as plt
import scipy.stats as stats

x = np.linspace(0, 10, 1000)
y = stats.chi2.pdf(x, df=4)

plt.plot(x, y, label="Chi-square Distribution (df=4)")
plt.title("Chi-square Distribution")
plt.legend()
plt.show()



7. Implement an F-test to compare variances of two samples
python
Copy
Edit
from scipy.stats import f

sample1 = np.random.normal(50, 5, 30)
sample2 = np.random.normal(50, 8, 30)

f_stat = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
p_value = 1 - f.cdf(f_stat, len(sample1)-1, len(sample2)-1)

print(f"F-statistic: {f_stat:.4f}, P-value: {p_value:.4f}")




8. Perform an ANOVA test to compare means between multiple groups
python
Copy
Edit
from scipy.stats import f_oneway

group1 = np.random.normal(50, 5, 30)
group2 = np.random.normal(52, 5, 30)
group3 = np.random.normal(55, 5, 30)

f_stat, p_value = f_oneway(group1, group2, group3)
print(f"ANOVA F-statistic: {f_stat:.4f}, P-value: {p_value:.4f}")


9. Perform a one-way ANOVA test and plot the results
python
Copy
Edit
import seaborn as sns

data = [group1, group2, group3]
plt.boxplot(data, labels=['Group 1', 'Group 2', 'Group 3'])
plt.title("One-way ANOVA")
plt.show()


10. Check assumptions for ANOVA (normality, independence, equal variance)
python
Copy
Edit
from scipy.stats import shapiro, levene

print("Shapiro-Wilk Normality Test:")
for i, g in enumerate([group1, group2, group3]):
    print(f"Group {i+1}: p-value = {shapiro(g)[1]:.4f}")

print("\nLevene’s Test for Equal Variance:")
print(f"P-value: {levene(group1, group2, group3)[1]:.4f}")


11. Perform a two-way ANOVA test and visualize results
python
Copy
Edit
import statsmodels.api as sm
from statsmodels.formula.api import ols

df = pd.DataFrame({"FactorA": np.tile(["A", "B"], 30),
                   "FactorB": np.tile(["X", "Y"], 30),
                   "Response": np.random.normal(50, 5, 60)})

model = ols('Response ~ C(FactorA) + C(FactorB) + C(FactorA):C(FactorB)', data=df).fit()
anova_results = sm.stats.anova_lm(model, typ=2)
print(anova_results)


12. Visualize the F-distribution
python
Copy
Edit
x = np.linspace(0, 5, 1000)
y = stats.f.pdf(x, dfn=5, dfd=10)

plt.plot(x, y, label="F-distribution (df1=5, df2=10)")
plt.legend()
plt.show()



13. One-way ANOVA with boxplots
python
Copy
Edit
sns.boxplot(data=[group1, group2, group3])
plt.title("One-way ANOVA Boxplot")
plt.show()


14. Simulate normal data and perform hypothesis testing
python
Copy
Edit
sample = np.random.normal(50, 5, 30)
t_stat, p_value = stats.ttest_1samp(sample, 50)
print(f"T-test: T-stat={t_stat:.4f}, P-value={p_value:.4f}")


15. Hypothesis test for population variance using a Chi-square test
python
Copy
Edit
n = len(sample)
sample_variance = np.var(sample, ddof=1)
pop_variance = 25

chi2_stat = (n - 1) * sample_variance / pop_variance
p_value = 1 - stats.chi2.cdf(chi2_stat, df=n-1)

print(f"Chi-square Statistic: {chi2_stat:.4f}, P-value: {p_value:.4f}")


16. Perform a Z-test for comparing proportions between two groups
python
Copy
Edit
from statsmodels.stats.proportion import proportions_ztest

count = np.array([30, 40])
nobs = np.array([100, 120])

z_stat, p_value = proportions_ztest(count, nobs)
print(f"Z-test for Proportions: Z={z_stat:.4f}, P-value={p_value:.4f}")

17. F-test for comparing variances and visualization
python
Copy
Edit
plt.hist(sample1, alpha=0.5, label="Sample 1")
plt.hist(sample2, alpha=0.5, label="Sample 2")
plt.legend()
plt.show()


18. Chi-square test for goodness of fit with simulated data
python
Copy
Edit
observed = np.random.randint(10, 50, size=5)
expected = np.full(5, np.mean(observed))

chi2_stat, p_value = chisquare(observed, expected)
print(f"Chi-square Statistic: {chi2_stat:.4f}, P-value: {p_value:.4f}")