Q1. Calculate the 95% confidence interval for a sample of data with a mean of 50 and a standard deviation
of 5 using Python. Interpret the results.

In [None]:
import scipy.stats as stats

# Given values
sample_mean = 50
sample_std = 5
sample_size = ?  # Replace with your actual sample size

# Assuming a sample size of 30 for illustration purposes
confidence_level = 0.95

# Calculate the margin of error
margin_of_error = stats.norm.ppf((1 + confidence_level) / 2) * (sample_std / (sample_size ** 0.5))

# Calculate the confidence interval
lower_limit = sample_mean - margin_of_error
upper_limit = sample_mean + margin_of_error

# Display the results
print(f"95% Confidence Interval: ({lower_limit:.2f}, {upper_limit:.2f})")


Q2. Conduct a chi-square goodness of fit test to determine if the distribution of colors of M&Ms in a bag
matches the expected distribution of 20% blue, 20% orange, 20% green, 10% yellow, 10% red, and 20%
brown. Use Python to perform the test with a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Observed frequencies of colors in the M&Ms bag
observed_frequencies = np.array([your_observed_blue_count, your_observed_orange_count, your_observed_green_count,
                                 your_observed_yellow_count, your_observed_red_count, your_observed_brown_count])

# Expected frequencies based on the given distribution
expected_frequencies = np.array([0.2, 0.2, 0.2, 0.1, 0.1, 0.2]) * sum(observed_frequencies)

# Perform the chi-square test
chi2, p_value = chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)

# Print the results
print(f"Chi-square statistic: {chi2}")
print(f"P-value: {p_value}")

# Check the significance level
alpha = 0.05
print(f"Significance level: {alpha}")

# Compare p-value with the significance level
if p_value < alpha:
    print("Reject the null hypothesis: The distribution of M&Ms colors is different from the expected distribution.")
else:
    print("Fail to reject the null hypothesis: The distribution of M&Ms colors matches the expected distribution.")


Q3. Use Python to calculate the chi-square statistic and p-value for a contingency table with the following
data:

                        Group A           Group B
Outcome 1                   20            15
Outcome 2                   10            25
Outcome 3                   15             20

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Define the contingency table
data = np.array([[20, 15],
                 [10, 25],
                 [15, 20]])

# Perform the chi-square test
chi2_stat, p_value, dof, expected = chi2_contingency(data)

# Print the results
print(f"Chi-square statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

# Check the significance level
alpha = 0.05
print(f"Significance level: {alpha}")

# Compare p-value with the significance level
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between Group and Outcome.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between Group and Outcome.")


Q4. A study of the prevalence of smoking in a population of 500 individuals found that 60 individuals
smoked. Use Python to calculate the 95% confidence interval for the true proportion of individuals in the
population who smoke.

In [None]:
import scipy.stats as stats
import numpy as np

# Given data
total_population = 500
smokers = 60

# Calculate the sample proportion
sample_proportion = smokers / total_population

# Calculate the standard error of the proportion
standard_error = np.sqrt((sample_proportion * (1 - sample_proportion)) / total_population)

# Set the confidence level
confidence_level = 0.95

# Calculate the margin of error
margin_of_error = stats.norm.ppf((1 + confidence_level) / 2) * standard_error

# Calculate the confidence interval
confidence_interval = (sample_proportion - margin_of_error, sample_proportion + margin_of_error)

# Print the results
print(f"Sample Proportion: {sample_proportion}")
print(f"Margin of Error: {margin_of_error}")
print(f"95% Confidence Interval: {confidence_interval}")


Q5. Calculate the 90% confidence interval for a sample of data with a mean of 75 and a standard deviation
of 12 using Python. Interpret the results.

In [None]:
import scipy.stats as stats

# Given data
sample_mean = 75
sample_std_dev = 12
sample_size =  # insert your sample size here

# Set the confidence level
confidence_level = 0.90

# Calculate the margin of error
margin_of_error = stats.norm.ppf((1 + confidence_level) / 2) * (sample_std_dev / (sample_size ** 0.5))

# Calculate the confidence interval
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

# Print the results
print(f"Sample Mean: {sample_mean}")
print(f"Margin of Error: {margin_of_error}")
print(f"90% Confidence Interval: {confidence_interval}")


Q6. Use Python to plot the chi-square distribution with 10 degrees of freedom. Label the axes and shade the
area corresponding to a chi-square statistic of 15.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Degrees of freedom
df = 10

# Generate x values for the chi-square distribution
x = np.linspace(0, 30, 1000)  # Adjust the range based on your data

# Plot the chi-square distribution
plt.plot(x, stats.chi2.pdf(x, df), label=f'Chi-square Distribution (df={df})')

# Shade the area corresponding to a chi-square statistic of 15
x_shaded = np.linspace(0, 15, 100)
plt.fill_between(x_shaded, stats.chi2.pdf(x_shaded, df), color='lightblue', label='Chi-square Statistic = 15', alpha=0.5)

# Label the axes
plt.xlabel('Chi-square Statistic')
plt.ylabel('Probability Density Function (PDF)')
plt.title(f'Chi-square Distribution with {df} Degrees of Freedom')
plt.legend()
plt.grid(True)
plt.show()


Q7. A random sample of 1000 people was asked if they preferred Coke or Pepsi. Of the sample, 520
preferred Coke. Calculate a 99% confidence interval for the true proportion of people in the population who
prefer Coke.

In [None]:
import scipy.stats as stats
import numpy as np

# Given data
sample_size = 1000
preferred_coke = 520

# Calculate the sample proportion
sample_proportion = preferred_coke / sample_size

# Calculate the standard error of the proportion
standard_error = np.sqrt((sample_proportion * (1 - sample_proportion)) / sample_size)

# Set the confidence level
confidence_level = 0.99

# Calculate the margin of error
margin_of_error = stats.norm.ppf((1 + confidence_level) / 2) * standard_error

# Calculate the confidence interval
confidence_interval = (sample_proportion - margin_of_error, sample_proportion + margin_of_error)

# Print the results
print(f"Sample Proportion: {sample_proportion}")
print(f"Margin of Error: {margin_of_error}")
print(f"99% Confidence Interval: {confidence_interval}")


Q8. A researcher hypothesizes that a coin is biased towards tails. They flip the coin 100 times and observe
45 tails. Conduct a chi-square goodness of fit test to determine if the observed frequencies match the
expected frequencies of a fair coin. Use a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Given data
total_flips = 100
observed_tails = 45

# Expected frequencies for a fair coin
expected_heads = total_flips / 2
expected_tails = total_flips / 2
expected_frequencies = np.array([expected_heads, expected_tails])

# Observed frequencies
observed_frequencies = np.array([total_flips - observed_tails, observed_tails])

# Perform the chi-square test
chi2, p_value = chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)

# Print the results
print(f"Chi-square statistic: {chi2}")
print(f"P-value: {p_value}")

# Check the significance level
alpha = 0.05
print(f"Significance level: {alpha}")

# Compare p-value with the significance level
if p_value < alpha:
    print("Reject the null hypothesis: The coin is biased.")
else:
    print("Fail to reject the null hypothesis: There is no evidence of bias in the coin.")


Q9. A study was conducted to determine if there is an association between smoking status (smoker or
non-smoker) and lung cancer diagnosis (yes or no). The results are shown in the contingency table below.
Conduct a chi-square test for independence to determine if there is a significant association between
smoking status and lung cancer diagnosis.


Lung Cancer: Yes
                                   Lung Cancer: Yes                  Lung Cancer: No
Smoker                                60                                               140
Non-smoker                      30                                               170

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Given data (replace with your actual data)
contingency_table = np.array([[100, 20],  # Non-smoker, No lung cancer
                              [50, 30]])  # Smoker, Lung cancer

# Perform the chi-square test for independence
chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)

# Print the results
print(f"Chi-square statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

# Check the significance level
alpha = 0.05
print(f"Significance level: {alpha}")

# Compare p-value with the significance level
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between smoking status and lung cancer diagnosis.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between smoking status and lung cancer diagnosis.")


Q10. A study was conducted to determine if the proportion of people who prefer milk chocolate, dark
chocolate, or white chocolate is different in the U.S. versus the U.K. A random sample of 500 people from
the U.S. and a random sample of 500 people from the U.K. were surveyed. The results are shown in the
contingency table below. Conduct a chi-square test for independence to determine if there is a significant
association between chocolate preference and country of origin.



               Milk Chocolate  Dark Chocolate    White chock
U.S. (n=500)      200          150               150
U.K. (n=500)      225           175              100

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Given data
contingency_table = np.array([[200, 150, 150],  # U.S.: Milk, Dark, White
                              [225, 175, 100]]) # U.K.: Milk, Dark, White

# Perform the chi-square test for independence
chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)

# Print the results
print(f"Chi-square statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

# Check the significance level
alpha = 0.01 
print(f"Significance level: {alpha}")

# Compare p-value with the significance level
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between chocolate preference and country of origin.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between chocolate preference and country of origin.")


Q11. A random sample of 30 people was selected from a population with an unknown mean and standard
deviation. The sample mean was found to be 72 and the sample standard deviation was found to be 10.
Conduct a hypothesis test to determine if the population mean is significantly different from 70. Use a
significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import ttest_1samp

# Given data
sample_mean = 72
sample_std_dev = 10
sample_size = 30
population_mean = 70

# Perform a one-sample t-test
t_stat, p_value = ttest_1samp(np.random.normal(sample_mean, sample_std_dev, sample_size), population_mean)

# Print the results
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

# Check the significance level
alpha = 0.05
print(f"Significance level: {alpha}")

# Compare p-value with the significance level
if p_value < alpha:
    print("Reject the null hypothesis: The population mean is significantly different from 70.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the population mean and 70.")
