Q1. Calculate the 95% confidence interval for a sample of data with a mean of 50 and a standard deviation 
of 5 using Python. Interpret the results

In [None]:
import scipy.stats as stats

# Given data
sample_mean = 50
sample_std_dev = 5
sample_size = 30  # You should replace this with the actual sample size

# Calculate the standard error of the mean (SEM)
sem = sample_std_dev / (sample_size ** 0.5)

# Calculate the margin of error
margin_of_error = stats.norm.ppf(0.975) * sem  # For a two-tailed 95% confidence interval

# Calculate the confidence interval
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

print(f'Sample Mean: {sample_mean}')
print(f'Standard Error of the Mean (SEM): {sem:.4f}')
print(f'Margin of Error: {margin_of_error:.4f}')
print(f'95% Confidence Interval: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})')


Q2. Conduct a chi-square goodness of fit test to determine if the distribution of colors of M&Ms in a bag 
matches the expected distribution of 20% blue, 20% orange, 20% green, 10% yellow, 10% red, and 20% 
brown. Use Python to perform the test with a significance level of 0.05.

In [None]:
import numpy as np
import scipy.stats as stats

# Observed frequencies
observed_frequencies = np.array([25, 15, 30, 10, 10, 20])  # Replace with your actual data

# Expected frequencies based on the expected distribution
expected_distribution = np.array([0.2, 0.2, 0.2, 0.1, 0.1, 0.2])
total_observation = observed_frequencies.sum()
expected_frequencies = expected_distribution * total_observation

# Perform chi-square goodness-of-fit test
chi2_stat, p_value = stats.chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)

# Print results
print(f'Chi-square statistic: {chi2_stat:.4f}')
print(f'P-value: {p_value:.4f}')

# Compare p-value with significance level
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The distribution of M&M colors is significantly different from the expected distribution.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to conclude a significant difference.")


Q3. Use  Python to calculate the chi-square statistic and p-value for a contingency table with the following 
data:

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Example contingency table data
observed_data = np.array([[30, 20, 10],
                          [15, 25, 20]])

# Perform chi-square test
chi2_stat, p_value, dof, expected = chi2_contingency(observed_data)

# Print results
print(f'Chi-square statistic: {chi2_stat:.4f}')
print(f'P-value: {p_value:.4f}')
print(f'Degrees of Freedom: {dof}')
print('Expected Frequencies:')
print(expected)


Q4. A study of the prevalence of smoking in a population of 500 individuals found that 60 individuals 
smoked. Use Python to calculate the 95% confidence interval for the true proportion of individuals in the 
population who smoke.

In [None]:
import math

# Given data
sample_proportion = 60 / 500  # 60 individuals out of 500
confidence_level = 0.95
sample_size = 500

# Calculate Z-score for the confidence level
z_score = stats.norm.ppf((1 + confidence_level) / 2)

# Calculate the margin of error
margin_of_error = z_score * math.sqrt((sample_proportion * (1 - sample_proportion)) / sample_size)

# Calculate the confidence interval
confidence_interval = (sample_proportion - margin_of_error, sample_proportion + margin_of_error)

print(f'Sample Proportion: {sample_proportion:.4f}')
print(f'Z-Score: {z_score:.2f}')
print(f'Margin of Error: {margin_of_error:.4f}')
print(f'95% Confidence Interval: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})')


Q5. Calculate the 90% confidence interval for a sample of data with a mean of 75 and a standard deviation 
of 12 using Python. Interpret the results.

In [None]:
import scipy.stats as stats

# Given data
sample_mean = 75
sample_std_dev = 12
sample_size = 30  # You should replace this with the actual sample size

# Calculate the standard error of the mean (SEM)
sem = sample_std_dev / (sample_size ** 0.5)

# Calculate the margin of error
margin_of_error = stats.norm.ppf(0.95) * sem  # For a two-tailed 90% confidence interval

# Calculate the confidence interval
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

print(f'Sample Mean: {sample_mean}')
print(f'Standard Error of the Mean (SEM): {sem:.4f}')
print(f'Margin of Error: {margin_of_error:.4f}')
print(f'90% Confidence Interval: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})')


Q6. Use Python to plot the chi-square distribution with 10 degrees of freedom. Label the axes and shade the 
area corresponding to a chi-square statistic of 15.

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Degrees of freedom
df = 10

# Create a range of values for the x-axis
x = np.linspace(0, 30, 1000)

# Plot the chi-square distribution
plt.plot(x, stats.chi2.pdf(x, df), label=f'Chi-square Distribution (df={df})')

# Shade the area corresponding to a chi-square statistic of 15
x_fill = np.linspace(15, 30, 1000)
plt.fill_between(x_fill, stats.chi2.pdf(x_fill, df), color='orange', alpha=0.3, label='Chi-square Statistic = 15')

# Label the axes
plt.xlabel('Chi-square Statistic')
plt.ylabel('Probability Density Function (PDF)')

# Add a legend
plt.legend()

# Show the plot
plt.show()


Q7. A random sample of 1000 people was asked if they preferred Coke or Pepsi. Of the sample, 520 
preferred Coke. Calculate a 99% confidence interval for the true proportion of people in the population who 
prefer Coke.

In [None]:
import math

# Given data
sample_proportion = 520 / 1000  # 520 people preferred Coke out of 1000
confidence_level = 0.99
sample_size = 1000

# Calculate Z-score for the confidence level
z_score = stats.norm.ppf((1 + confidence_level) / 2)

# Calculate the margin of error
margin_of_error = z_score * math.sqrt((sample_proportion * (1 - sample_proportion)) / sample_size)

# Calculate the confidence interval
confidence_interval = (sample_proportion - margin_of_error, sample_proportion + margin_of_error)

print(f'Sample Proportion: {sample_proportion:.4f}')
print(f'Z-Score: {z_score:.3f}')
print(f'Margin of Error: {margin_of_error:.4f}')
print(f'99% Confidence Interval: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})')


Q8. A researcher hypothesizes that a coin is biased towards tails. They flip the coin 100 times and observe 
45 tails. Conduct a chi-square goodness of fit test to determine if the observed frequencies match the 
expected frequencies of a fair coin. Use a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Given data
observed_frequencies = np.array([45, 55])  # 45 tails, 55 heads
expected_frequencies = np.array([50, 50])  # Expected frequencies for a fair coin

# Perform chi-square goodness-of-fit test
chi2_stat, p_value, dof, expected = chi2_contingency([observed_frequencies])

# Print results
print(f'Chi-square statistic: {chi2_stat:.4f}')
print(f'P-value: {p_value:.4f}')
print(f'Degrees of Freedom: {dof}')

# Compare p-value with significance level
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The coin is significantly biased towards tails.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to conclude bias towards tails.")


Q9. A study was conducted to determine if there is an association between smoking status (smoker or 
non-smoker) and lung cancer diagnosis (yes or no). The results are shown in the contingency table below. 
Conduct a chi-square test for independence to determine if there is a significant association between 
smoking status and lung cancer diagnosis.