In [2]:
from scipy.stats import norm
import numpy as np

# Q1 Short State
A country has a population average height of 65 inches with a standard deviation of 2.5. A person feels people from his state are shorter. He takes the average of 20 people and sees that it is 64.5.

At a 5% significance level (or 95% confidence level), can we conclude that people from his state are shorter, using the Z-test? What is the p-value?

In [15]:
# Population parameters
population_mean = 65  # Population average height
population_stddev = 2.5  # Population standard deviation

# Sample statistics
sample_mean = 64.5  
sample_size = 20 # Average of 20 people

# Calculate the standard error of the sample mean
standard_error = population_stddev / (sample_size ** 0.5)

# Calculate the Z-score
z_score = (sample_mean - population_mean) / standard_error

# Calculate the p-value: Left Tail Test
p_value = norm.cdf(z_score)

# Significance level
alpha = 0.05

# Compare p-value with the significance level
if p_value < alpha:
    print(f"Reject the null hypothesis. People from the state are shorter.")
else:
    print(f"Fail to reject the null hypothesis. No significant evidence that people from the state are shorter.")

print(f"Z-score: {z_score}")
print(f"P-value: {p_value}")

Fail to reject the null hypothesis. No significant evidence that people from the state are shorter.
Z-score: -0.8944271909999159
P-value: 0.18554668476134878


# Q2. Pastries Produce Per Day
A French cafe has historically maintained that their average daily pastry production is at most 500.

With the installation of a new machine, they assert that the average daily pastry production has increased. The average number of pastries produced per day over a 70-day period was found to be 530.

Assume that the population standard deviation for the pastries produced per day is 125.

Perform a z-test with the critical z-value = 1.64 at the alpha (significance level) = 0.05 to evaluate if there's sufficient evidence to support their claim of the new machine producing more than 500 pastries daily.

Note: Round off the z-score to two decimal places.

In [8]:
sample_mean = 530
population_std = 125
sample_size = 70
population_mean = 500

# Calculate z-score
z_score = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))

# Round z-score to two decimal places
z_score = round(z_score, 2)
print(f"z-score: {z_score}")

# Set critical z-value and confidence level
confidence_level = 0.95
critical_z = norm.ppf(confidence_level)
print("critical z-value:",critical_z)

# Check if the z-score is greater than the critical z-value

if z_score > critical_z:
    print("Reject the null hypothesis. The shop's claim is supported by the data.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to support the shop's claim.")

z-score: 2.01
critical z-value: 1.6448536269514722
Reject the null hypothesis. The shop's claim is supported by the data.


# Q3. India runs on Chai

The Chai Point stall at Bengaluru airport estimates that each person visiting the store drinks an average of 1.7 small cups of tea.

Assume a population standard deviation of 0.5 small cups. A sample of 30 customers collected over a few days averaged 1.85 small cups of tea per person.

Test the claim using an appropriate test at an alpha = 0.05 significance value, with a critical z-score value of ±1.96.

Note: Round off the z-score to two decimal places.

In [7]:
sample_mean = 1.85
population_std = 0.5
sample_size = 30
population_mean = 1.7

# Calculate z-score
z_score = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))

# Round z-score to two decimal places
z_score = np.round(z_score, 2)

# Set alpha and critical z-score (use two-tailed since direction is unknown)
alpha = 0.05
critical_z = 1.96

# Check if the z-score is greater than the critical z-value
if abs(z_score) > critical_z:
   print(f"z-score: {z_score}")
   print("Reject the null hypothesis. The average tea consumption is likely different from the estimate.")
else:
   print(f"z-score: {z_score}")
   print("Fail to reject the null hypothesis. There is not enough evidence to support a difference from the estimated average.")

z-score: 1.64
z-score: 1.64
Fail to reject the null hypothesis. There is not enough evidence to support a difference from the estimated average.


# Q4. Web Application Response
A data scientist is looking at how a web application responds, with an average response time of 250 milliseconds and a standard deviation of 30 milliseconds.

Find the critical value for a 96% confidence level.


In [10]:
# Given values
confidence_level = 0.96
mean = 250  # Mean response time in milliseconds
std_deviation = 30  # Standard deviation in milliseconds

# A two-tailed test, considering both possibilities: the average response time could be higher or lower than 250 milliseconds.
# Calculate the critical Z-score for a 96% confidence level
critical_z = norm.ppf(1 - (1 - confidence_level) / 2)

# Calculate the critical value using Z-score formula
critical_value = (critical_z * std_deviation) + mean


print(f"Critical Value: {critical_value:.4f}")

Critical Value: 311.6125


# Q5. CI and Conclusion
A marketing team aims to estimate the average time, visitors spend on their website.

They gathered a random sample of 100 visitors and determined that the average time spent on the website was 4.5 minutes.

The team is working under the assumption that the population's mean time spent on the website is 4.0 minutes, with a standard deviation of 1.2 minutes.

Their goal is to estimate the true time spent on the website with a 95% confidence level. Calculate the confidence interval values and make a conclusion based on the calculated interval.


In [19]:
sample_size = 100
sample_mean = 4.5
population_std = 1.2
population_mean = 4



# Calculate z-score
z1 = norm.ppf(0.025)
print(f"z1: {z1:.2f}")


z2 = 1 - norm.ppf(0.975)
print(f"z2: {z2:.2f}")

# Calculate confidence interval
confidence_interval = (sample_mean + (z1 * (population_std / np.sqrt(sample_size))),
                       sample_mean - (z2 * (population_std / np.sqrt(sample_size))))

print(f"Confidence Interval: {confidence_interval}")

pvalue = 1 - norm.cdf(z_score)
print(f"P-value: ", pvalue)

alpha = 0.05 # 95% confidence level

if pvalue < alpha:
    print("Reject the null hypothesis. the average time spent on the website was more.")
else:
    print("Fail to reject the null hypothesis. the average time spent on the website was not more.")

z1: -1.96
z2: -0.96
Confidence Interval: (4.264804321855194, 4.615195678144807)
P-value:  0.75
Fail to reject the null hypothesis. the average time spent on the website was not more.


# Q7. Institution's claim


It is known that the mean IQ of high school students is 100, and the standard deviation is 15.

A coaching institute claims that candidates who study there have more IQ than an average high school student. When the IQ of 50 candidates was calculated, the average turned out to be 110

Conduct an appropriate hypothesis test to test the institute’s claim, with a significance level of 5%

In [22]:
population_mean = 100
population_std = 15
sample_size = 50
sample_mean = 110

# Calculate z-score
z_score = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))
print(f"z-score: {z_score:.2f}")

# Calculate p-value
p_value = 1 - norm.cdf(z_score)
print(f"P-value: {p_value}")

# Set alpha and confidence level
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The Institute Students have more IQ.")
else:
    print("Fail to reject the null hypothesis.The Institute Students doesnot have more IQ.")


z-score: 4.71
P-value: 1.2142337364462463e-06
Reject the null hypothesis. The Institute Students have more IQ.


# Q8. Smokers
When smokers smoke, nicotine is transformed into cotinine, which can be tested.

The average cotinine level in a group of 50 smokers was 243.5 ng ml.

Assuming that the standard deviation is known to be 229.5 ng ml.

Test the assertion that the mean cotinine level of all smokers is equal to 300.0 ng ml, at 95% confidence.

In [25]:
sample_size = 50
sample_mean = 243.5
population_std = 229.5
population_mean = 300

# Calculate z-score
z_score = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))
print(f"z-score: {z_score:.2f}")

p_value = 2 * (1 - norm.cdf(abs(z_score)))
print(f"P-value: {p_value:.4f}")

alpha = 0.05 # 95% confidence level
if p_value < alpha:
    print("Reject the null hypothesis. The average weight of the population is less.")
else:
    print("Fail to reject the null hypothesis. The average weight of the population is not less.")

z-score: -1.74
P-value: 0.0817
Fail to reject the null hypothesis. The average weight of the population is not less.
