In [None]:
#Practical Questions

In [None]:
# 1. Write a Python program to perform a Z-test for comparing a sample mean to a known population mean and interpret the results
# **Answer**:
import numpy as np
from statsmodels.stats.weightstats import ztest

# Sample data
sample_data = np.array([100, 102, 98, 101, 99, 100, 101, 102, 100, 99])
population_mean = 100
population_std = 2
sample_size = len(sample_data)

# Perform Z-test
z_stat, p_value = ztest(sample_data, value=population_mean, alternative='two-sided', usevar='pooled')

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The sample mean is significantly different from the population mean."
else:
    result = "Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean."

result


# 2. Simulate random data to perform hypothesis testing and calculate the corresponding P-value using Python
# **Answer**:
import numpy as np
from scipy import stats

# Simulate data
np.random.seed(0)
sample_data = np.random.normal(loc=100, scale=15, size=30)
population_mean = 100

# Perform one-sample t-test
t_stat, p_value = stats.ttest_1samp(sample_data, population_mean)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The sample mean is significantly different from the population mean."
else:
    result = "Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean."

result



# 3. Implement a one-sample Z-test using Python to compare the sample mean with the population mean
# **Answer**:
import numpy as np
from statsmodels.stats.weightstats import ztest

# Sample data
sample_data = np.array([100, 102, 98, 101, 99, 100, 101, 102, 100, 99])
population_mean = 100

# Perform Z-test
z_stat, p_value = ztest(sample_data, value=population_mean, alternative='two-sided', usevar='pooled')

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The sample mean is significantly different from the population mean."
else:
    result = "Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean."

result



# 4. Perform a two-tailed Z-test using Python and visualize the decision region on a plot
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.weightstats import ztest

# Sample data
sample_data = np.array([100, 102, 98, 101, 99, 100, 101, 102, 100, 99])
population_mean = 100
population_std = 2
sample_size = len(sample_data)

# Perform Z-test
z_stat, p_value = ztest(sample_data, value=population_mean, alternative='two-sided', usevar='pooled')

# Plot
x = np.linspace(-4, 4, 1000)
y = (1/np.sqrt(2 * np.pi)) * np.exp(-0.5 * x**2)
plt.plot(x, y, label='Standard Normal Distribution')
plt.fill_between(x, y, where=(x < -1.96) | (x > 1.96), color='red', alpha=0.5, label='Rejection Region')
plt.axvline(x=z_stat, color='black', linestyle='--', label=f'Z-statistic: {z_stat:.2f}')
plt.title('Two-Tailed Z-Test')
plt.xlabel('Z-Score')
plt.ylabel('Probability Density')
plt.legend()
plt.show()

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The sample mean is significantly different from the population mean."
else:
    result = "Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean."

result



# 5. Create a Python function that calculates and visualizes Type 1 and Type 2 errors during hypothesis testing
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

def visualize_errors(pop_mean, sample_mean, std_dev, sample_size, alpha=0.05):
    # Calculate standard error
    std_error = std_dev / np.sqrt(sample_size)
    
    # Critical value for Type 1 error (alpha)
    critical_value = norm.ppf(1 - alpha)
    
    # Calculate Type 2 error (beta)
    beta = norm.cdf(critical_value - (sample_mean - pop_mean) / std_error)
    
    # Plot
    x = np.linspace(pop_mean - 4*std_error, pop_mean + 4*std_error, 1000)
    y = norm.pdf(x, loc=pop_mean, scale=std_error)
    plt.plot(x, y, label='Sampling Distribution under H0')
    plt.fill_between(x, y, where=(x > critical_value), color='red', alpha=0.5, label='Type 1 Error Region')
    plt.axvline(x=critical_value, color='black', linestyle='--', label=f'Critical Value: {critical_value:.2f}')
    plt.title('Type 1 and Type 2 Errors')
    plt.xlabel('Sample Mean')
    plt.ylabel('Probability Density')
    plt.legend()
    plt.show()
    
    return beta

# Example usage
pop_mean = 100
sample_mean = 102
std_dev = 15
sample_size = 30
alpha = 0.05

beta = visualize_errors(pop_mean, sample_mean, std_dev, sample_size, alpha)
f"Type 2 Error Probability (Beta): {beta:.2f}"




# 6. Write a Python program to perform an independent T-test and interpret the results
# **Answer**:
import numpy as np
from scipy import stats

# Sample data
group1 = np.array([100, 102, 98, 101, 99, 100, 101, 102, 100, 99])
group2 = np.array([105, 107, 103, 106, 104, 105, 106, 107, 105, 104])

# Perform independent T-test
t_stat, p_value = stats.ttest_ind(group1, group2)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The means of the two groups are significantly different."
else:
    result = "Fail to reject the null hypothesis: The means of the two groups are not significantly different."

result


# 7. Perform a paired sample T-test using Python and visualize the comparison results
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Sample data
before_treatment = np.array([88, 82, 84, 93, 75, 78, 84, 87, 95, 91, 83, 89, 77, 68, 91])
after_treatment = np.array([91, 84, 88, 90, 79, 80, 88, 90, 90, 96, 88, 89, 81, 74, 92])

# Perform paired sample T-test
t_stat, p_value = stats.ttest_rel(before_treatment, after_treatment)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The means are significantly different."
else:
    result = "Fail to reject the null hypothesis: The means are not significantly different."

# Visualization
plt.figure(figsize=(8, 6))
plt.plot(before_treatment, label='Before Treatment', marker='o')
plt.plot(after_treatment, label='After Treatment', marker='o')
plt.title('Before and After Treatment Scores')
plt.xlabel('Sample Index')
plt.ylabel('Scores')
plt.legend()
plt.show()

result



# 8. Simulate data and perform both Z-test and T-test, then compare the results using Python
# **Answer**:
import numpy as np
from scipy import stats

# Simulate data
np.random.seed(0)
sample_data = np.random.normal(loc=100, scale=15, size=30)
population_mean = 100
population_std = 15

# Perform one-sample Z-test
z_stat = (np.mean(sample_data) - population_mean) / (population_std / np.sqrt(len(sample_data)))
p_value_z = 2 * (1 - stats.norm.cdf(abs(z_stat)))

# Perform one-sample T-test
t_stat, p_value_t = stats.ttest_1samp(sample_data, population_mean)

# Interpret results
alpha = 0.05
result_z = "Reject the null hypothesis: The sample mean is significantly different from the population mean." if p_value_z < alpha else "Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean."
result_t = "Reject the null hypothesis: The sample mean is significantly different from the population mean." if p_value_t < alpha else "Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean."

result_z, result_t



# 9. Write a Python function to calculate the confidence interval for a sample mean and explain its significance.
# **Answer**:
import numpy as np
from scipy import stats

def confidence_interval(data, confidence=0.95):
    """
    Calculate the confidence interval for a sample mean.
    
    Parameters:
    data (array-like): Sample data
    confidence (float): Confidence level (default is 0.95)
    
    Returns:
    tuple: Lower and upper bounds of the confidence interval
    """
    data = np.array(data)
    mean = np.mean(data)
    sem = stats.sem(data)
    margin_of_error = sem * stats.t.ppf((1 + confidence) / 2., len(data)-1)
    return mean - margin_of_error, mean + margin_of_error

# Example usage
sample_data = [100, 102, 98, 101, 99, 100, 101, 102, 100, 99]
ci_lower, ci_upper = confidence_interval(sample_data)
f"Confidence Interval: ({ci_lower:.2f}, {ci_upper:.2f})"



# 10. Write a Python program to calculate the margin of error for a given confidence level using sample data
# **Answer**:
import numpy as np
from scipy import stats

def margin_of_error(data, confidence=0.95):
    """
    Calculate the margin of error for a sample mean.
    
    Parameters:
    data (array-like): Sample data
    confidence (float): Confidence level (default is 0.95)
    
    Returns:
    float: Margin of error
    """
    data = np.array(data)
    sem = stats.sem(data)
    return sem * stats.t.ppf((1 + confidence) / 2., len(data)-1)

# Example usage
sample_data = [100, 102, 98, 101, 99, 100, 101, 102, 100, 99]
moe = margin_of_error(sample_data)
f"Margin of Error: {moe:.2f}"

# 11. Implement a Bayesian inference method using Bayes' Theorem in Python and explain the process
# **Answer**:
# Bayes' Theorem: P(A|B) = P(B|A) * P(A) / P(B)
# Example: Probability of having a disease given a positive test result

# Given probabilities
P_A = 0.01  # Probability of having the disease (prior)
P_B_given_A = 0.95  # Probability of testing positive given disease (likelihood)
P_B_given_not_A = 0.05  # Probability of testing positive without disease (false positive rate)

# Calculate P(B) - Total probability of testing positive
P_B = P_A * P_B_given_A + (1 - P_A) * P_B_given_not_A

# Apply Bayes' Theorem
P_A_given_B = (P_B_given_A * P_A) / P_B

f"Probability of having the disease given a positive test result: {P_A_given_B:.2f}"



# 12. Perform a Chi-square test for independence between two categorical variables in Python
# **Answer**:
import numpy as np
from scipy import stats

# Sample data: Contingency table
data = np.array([[30, 10], [20, 40]])

# Perform Chi-square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(data)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The variables are dependent."
else:
    result = "Fail to reject the null hypothesis: The variables are independent."

result



# 13. Write a Python program to calculate the expected frequencies for a Chi-square test based on observed data
# **Answer**:
import numpy as np

# Sample data: Contingency table
observed = np.array([[30, 10], [20, 40]])

# Calculate row and column totals
row_totals = observed.sum(axis=1)
col_totals = observed.sum(axis=0)
grand_total = observed.sum()

# Calculate expected frequencies
expected = np.outer(row_totals, col_totals) / grand_total

expected


# 14. Perform a goodness-of-fit test using Python to compare the observed data to an expected distribution
# **Answer**:
import numpy as np
from scipy import stats

# Observed data
observed = np.array([50, 60, 40, 47, 53])

# Expected data (e.g., uniform distribution)
expected = np.array([50, 50, 50, 50, 50])

# Perform Chi-square goodness-of-fit test
chi2_stat, p_value = stats.chisquare(observed, expected)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The observed data does not fit the expected distribution."
else:
    result = "Fail to reject the null hypothesis: The observed data fits the expected distribution."

result



# 15. Create a Python script to simulate and visualize the Chi-square distribution and discuss its characteristics
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import chi2

# Parameters
df = 2  # degrees of freedom
size = 1000  # number of samples

# Simulate data
data = chi2.rvs(df, size=size)

# Plot histogram
plt.hist(data, bins=30, density=True, alpha=0.6, color='g')

# Plot theoretical PDF
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = chi2.pdf(x, df)
plt.plot(x, p, 'k', linewidth=2)

title = f"Chi-Square Distribution (df={df})"
plt.title(title)
plt.show()



# 16. Implement an F-test using Python to compare the variances of two random samples
# **Answer**:
import numpy as np
from scipy import stats

# Sample data
np.random.seed(0)
sample1 = np.random.normal(loc=0, scale=1, size=100)
sample2 = np.random.normal(loc=0, scale=2, size=100)

# Perform F-test
f_stat, p_value = stats.levene(sample1, sample2)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The variances are significantly different."
else:
    result = "Fail to reject the null hypothesis: The variances are not significantly different."

result



# 17. Write a Python program to perform an ANOVA test to compare means between multiple groups and interpret the results
# **Answer**:
import numpy as np
from scipy import stats

# Sample data
np.random.seed(0)
group1 = np.random.normal(loc=0, scale=1, size=30)
group2 = np.random.normal(loc=0, scale=1, size=30)
group3 = np.random.normal(loc=0, scale=1, size=30)

# Perform one-way ANOVA
f_stat, p_value = stats.f_oneway(group1, group2, group3)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: At least one group mean is significantly different."
else:
    result = "Fail to reject the null hypothesis: All group means are equal."

result



# 18. Perform a one-way ANOVA test using Python to compare the means of different groups and plot the results
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Sample data
np.random.seed(0)
group1 = np.random.normal(loc=0, scale=1, size=30)
group2 = np.random.normal(loc=0, scale=1, size=30)
group3 = np.random.normal(loc=0, scale=1, size=30)

# Perform one-way ANOVA
f_stat, p_value = stats.f_oneway(group1, group2, group3)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: At least one group mean is significantly different."
else:
    result = "Fail to reject the null hypothesis: All group means are equal."

# Plot boxplot
plt.boxplot([group1, group2, group3], labels=['Group 1', 'Group 2', 'Group 3'])
plt.title('Comparison of Group Means')
plt.ylabel('Values')
plt.show()

result



# 19. Write a Python function to check the assumptions (normality, independence, and equal variance) for ANOVA
# **Answer**:
import numpy as np
from scipy import stats

def check_anova_assumptions(*groups):
    """
    Check the assumptions for ANOVA: normality, independence, and equal variance.
    
    Parameters:
    *groups: Groups of data to be tested
    
    Returns:
    dict: Results of normality and equal variance tests
    """
    results = {}
    
    # Normality test (Shapiro-Wilk)
    normality_results = [stats.shapiro(group) for group in groups]
    results['normality'] = normality_results
    
    # Equal variance test (Levene's test)
    levene_stat, levene_p = stats.levene(*groups)
    results['equal_variance'] = (levene_stat, levene_p)
    
    return results

# Example usage
group1 = np.random.normal(loc=0, scale=1, size=30)
group2 = np.random.normal(loc=0, scale=1, size=30)
group3 = np.random.normal(loc=0, scale=1, size=30)

assumptions = check_anova_assumptions(group1, group2, group3)
assumptions


# 20. Perform a two-way ANOVA test using Python to study the interaction between two factors and visualize the results
# **Answer**:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
np.random.seed(0)
factor1 = np.repeat(['A', 'B'], 50)
factor2 = np.tile(['X', 'Y', 'Z'], 50)
values = np.random.normal(loc=0, scale=1, size=100)

# Create DataFrame
df = pd.DataFrame({'Factor1': factor1, 'Factor2': factor2, 'Values': values})

# Fit the model
model = ols('Values ~ C(Factor1) + C(Factor2) + C(Factor1):C(Factor2)', data=df).fit()

# Perform two-way ANOVA
anova_table = sm.stats.anova_lm(model, typ=2)

# Interpret results
alpha = 0.05
if anova_table['PR(>F)']['C(Factor1):C(Factor2)'] < alpha:
    result = "Reject the null hypothesis: There is a significant interaction between Factor1 and Factor2."

::contentReference[oaicite:0]{index=0}
 


# 21. Write a Python program to visualize the F-distribution and discuss its use in hypothesis testing
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import f

# Parameters
dfn = 2  # degrees of freedom for numerator
dfd = 5  # degrees of freedom for denominator
x = np.linspace(0, 5, 1000)

# Probability density function
y = f.pdf(x, dfn, dfd)

# Plot
plt.plot(x, y, label=f'F-distribution (dfn={dfn}, dfd={dfd})')
plt.title('F-distribution')
plt.xlabel('x')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()

# The F-distribution is used in hypothesis testing, particularly in ANOVA, to compare variances across groups.


# 22. Perform a one-way ANOVA test in Python and visualize the results with boxplots to compare group means
# **Answer**:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Sample data
np.random.seed(0)
group1 = np.random.normal(loc=0, scale=1, size=30)
group2 = np.random.normal(loc=0, scale=1, size=30)
group3 = np.random.normal(loc=0, scale=1, size=30)

# Perform one-way ANOVA
f_stat, p_value = stats.f_oneway(group1, group2, group3)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: At least one group mean is significantly different."
else:
    result = "Fail to reject the null hypothesis: All group means are equal."

# Boxplot
plt.boxplot([group1, group2, group3], labels=['Group 1', 'Group 2', 'Group 3'])
plt.title('Comparison of Group Means')
plt.ylabel('Values')
plt.show()

result


# 23. Simulate random data from a normal distribution, then perform hypothesis testing to evaluate the means
# **Answer**:
import numpy as np
from scipy import stats

# Simulate data
np.random.seed(0)
sample1 = np.random.normal(loc=0, scale=1, size=100)
sample2 = np.random.normal(loc=0, scale=1, size=100)

# Perform two-sample t-test
t_stat, p_value = stats.ttest_ind(sample1, sample2)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The means are significantly different."
else:
    result = "Fail to reject the null hypothesis: The means are not significantly different."

result


# 24. Perform a hypothesis test for population variance using a Chi-square distribution and interpret the results
# **Answer**:
import numpy as np
from scipy import stats

# Sample data
np.random.seed(0)
sample = np.random.normal(loc=0, scale=1, size=100)

# Sample variance
s2 = np.var(sample, ddof=1)

# Population variance (hypothesized)
sigma2_0 = 1

# Degrees of freedom
df = len(sample) - 1

# Chi-square statistic
chi2_stat = df * s2 / sigma2_0

# p-value
p_value = 1 - stats.chi2.cdf(chi2_stat, df)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The sample variance is significantly different from the hypothesized population variance."
else:
    result = "Fail to reject the null hypothesis: The sample variance is not significantly different from the hypothesized population variance."

result


# 25. Write a Python script to perform a Z-test for comparing proportions between two datasets or groups
# **Answer**:
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

# Sample data
successes = np.array([50, 60])
totals = np.array([100, 100])

# Perform Z-test
stat, p_value = proportions_ztest(successes, totals)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The proportions are significantly different."
else:
    result = "Fail to reject the null hypothesis: The proportions are not significantly different."

result


# 26. Implement an F-test for comparing the variances of two datasets, then interpret and visualize the results
# **Answer**:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Sample data
np.random.seed(0)
sample1 = np.random.normal(loc=0, scale=1, size=100)
sample2 = np.random.normal(loc=0, scale=2, size=100)

# Variances
var1 = np.var(sample1, ddof=1)
var2 = np.var(sample2, ddof=1)

# F-statistic
f_stat = var1 / var2

# Degrees of freedom
df1 = len(sample1) - 1
df2 = len(sample2) - 1

# p-value
p_value = 1 - stats.f.cdf(f_stat, df1, df2)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The variances are significantly different."
else:
    result = "Fail to reject the null hypothesis: The variances are not significantly different."

# Plot F-distribution
x = np.linspace(0, 5, 1000)
y = stats.f.pdf(x, df1, df2)
plt.plot(x, y, label=f'F-distribution (df1={df1}, df2={df2})')
plt.title('F-distribution')
plt.xlabel('x')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()

result



# 27. Perform a Chi-square test for goodness of fit with simulated data and analyze the results
# **Answer**:
import numpy as np
from scipy import stats

# Simulate observed data
np.random.seed(0)
observed = np.random.choice([1, 2, 3], size=100, p=[0.2, 0.5, 0.3])

# Expected frequencies
expected = np.array([0.2, 0.5, 0.3]) * len(observed)

# Perform Chi-square goodness-of-fit test
chi2_stat, p_value = stats.chisquare(np.bincount(observed - 1), expected)

# Interpret results
alpha = 0.05
if p_value < alpha:
    result = "Reject the null hypothesis: The observed data does not fit the expected distribution."
else:
    result = "Fail to reject the null hypothesis: The observed data fits the expected distribution."

result

