In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from scipy.stats import chi2_contingency, chisquare

# Q1. Fair Coin Analysis
Suppose you have a coin. You toss the coin 100 times and get 48 heads and 52 tails.

Perform a test to check whether the coin is fair or biased.

You want to determine if the coin is fair at a 5% significance level.

In [4]:
chi_stat , p_value = chisquare([48,52], [50,50])
print("Chi_Stat", chi_stat)
print("P-Value", p_value)

alpha = 0.05 # 5% level of significance

if p_value < alpha:
    print("Coin is Baised")
else:
    print("Coin is Fair")


Chi_Stat 0.16
P-Value 0.6891565167793516
Coin is Fair


# Q2. Exam scores distribution
Suppose you have data on 150 students' exam scores, and you want to test if the distribution of scores falls within predefined categories.

The expected distribution is

30% in the "Excellent" category,
40% in the "Good" category, and
30% in the "Average" category.
Upon observation, you notice that there are:

45 students fall into the "Excellent" category,
50 students into the "Good" category, and
55 students into the "Average" category.
Conduct an appropriate test to see if the distribution matches expectations at a 5% significance level, and choose the correct option from below:

- a)

  P-value: 0.1625, 
  
  We fail to reject the null hypothesis, 
  
  Thus concluding that the distribution of exam scores matches the expected distribution.
- b)
  
  P-value: 0.1625
  
  The null hypothesis is rejected, 
  
  Thus concluding that the distribution of exam scores does not match the expected distribution. 
- c)

  P-value: 0.1430
  
  We fail to reject the null hypothesis
  
  Thus concluding that the distribution of exam scores matches the expected distribution.
- d)

  P-value: 0.1430
  
  The null hypothesis is rejected
  
  Thus concluding that the distribution of exam scores does not match the expected distribution.

In [6]:
observed_counts = np.array([45, 50, 55])
expected_counts = np.array([0.30 * 150, 0.40 * 150, 0.30 * 150])

chi_squared_stat, p_value = chisquare(f_obs=observed_counts, f_exp=expected_counts)

alpha = 0.05

if p_value < alpha:
    print("Reject the null hypothesis: The distribution of browsing times does not match expectations.")
else:
    print("Fail to reject the null hypothesis: The distribution of browsing times matches expectations.")

print(f"Chi-Square Statistic: {chi_squared_stat}")
print(f"P-value: {p_value}")

Fail to reject the null hypothesis: The distribution of browsing times matches expectations.
Chi-Square Statistic: 3.8888888888888893
P-value: 0.1430666827544082


# Q3. Diabetes in Native Americans
Preventable chronic diseases are increasing rapidly in Native American populations, particularly diabetes.

Below is a contingency table showing the cross-classification of educational attainment and diabetic state.

![](https://d2beiqkhq929f0.cloudfront.net/public_assets/assets/000/032/896/original/Screenshot_2023-04-27_at_1.53.31_PM.png?1682589253)

At 1% significance level, does the data provide sufficient evidence to conclude that an association exists between educational level and diabetic state for Native Americans?

Choose the correct option below :

In [7]:
data = [
  [33,218],
  [25,389],
  [20,393],
  [17,178]
]

chi_stat, p_value, dof, expected = chi2_contingency(data)
print("Chi_Stat", chi_stat)
print("P-Value", p_value)
print("Degree of Freedom", dof)
print("Expected", expected)

# Alpha
alpha = 0.01 # 1% level of significance
if p_value < alpha:
  print("Reject the null hypothesis: There is a relationship between the two categorical variables")
else:
  print("Fail to reject the null hypothesis: There is no relationship between the two categorical variables") 

Chi_Stat 17.51186847271713
P-Value 0.000554511571355531
Degree of Freedom 3
Expected [[ 18.73134328 232.26865672]
 [ 30.89552239 383.10447761]
 [ 30.82089552 382.17910448]
 [ 14.55223881 180.44776119]]
Reject the null hypothesis: There is a relationship between the two categorical variables


# Q4. Gun Control
A Nationwide survey was conducted where an independent and random sample of adults residing in urban, suburban, and rural regions, were asked a question: "Do you support or oppose the motion of requiring a background check, for all gun buyers?"

The survey results are in the table below:

![](https://d2beiqkhq929f0.cloudfront.net/public_assets/assets/000/032/903/original/Screenshot_2023-04-27_at_2.24.27_PM.png?1682592014)

At 1% significance level, does the data provide sufficient evidence to conclude that there is an association between the region that an adult resides in, and the response received from them?

Choose the correct option below :

In [8]:
data = [
  [335,348,318],
  [35,23,50],
]

chi_stat, p_value, dof, expected = chi2_contingency(data)
print("Chi_Stat", chi_stat)
print("P-Value", p_value)
print("Degree of Freedom", dof)
print("Expected", expected)

# Alpha
alpha = 0.01 # 1% level of significance
if p_value < alpha:
  print("Reject the null hypothesis: There is a relationship between the two categorical variables")
else:
  print("Fail to reject the null hypothesis: There is no relationship between the two categorical variables") 

Chi_Stat 11.519544916042339
P-Value 0.003151828690194211
Degree of Freedom 2
Expected [[333.96753832 334.87015329 332.16230839]
 [ 36.03246168  36.12984671  35.83769161]]
Reject the null hypothesis: There is a relationship between the two categorical variables
