Statistics Assignment 2

In [None]:
#question 1

import random
import statistics
import numpy as np
from scipy import stats

def generate_integers():
    return [random.randint(90, 130) for _ in range(100)]

# Generate the list
int_list = generate_integers()

# (i) Function to calculate mean
def calculate_mean(numbers):
    return sum(numbers) / len(numbers)

# (i) Function to calculate median
def calculate_median(numbers):
    sorted_numbers = sorted(numbers)
    n = len(sorted_numbers)
    if n % 2 == 0:
        return (sorted_numbers[n // 2 - 1] + sorted_numbers[n // 2]) / 2
    else:
        return sorted_numbers[n // 2]

# (ii) Program to compute mode
def calculate_mode(numbers):
    mode_result = stats.mode(numbers, keepdims=True)
    return mode_result.mode[0]

# (iii) Function to calculate weighted mean
def calculate_weighted_mean(values, weights):
    return sum(v * w for v, w in zip(values, weights)) / sum(weights)

# (iv) Function to find geometric mean
def calculate_geometric_mean(numbers):
    product = 1
    for num in numbers:
        product *= num
    return product ** (1 / len(numbers))

# (v) Function to calculate harmonic mean
def calculate_harmonic_mean(numbers):
    return len(numbers) / sum(1 / num for num in numbers)

# (vi) Function to determine midrange
def calculate_midrange(numbers):
    return (min(numbers) + max(numbers)) / 2

# (vii) Function to calculate trimmed mean
def calculate_trimmed_mean(numbers, trim_percentage):
    return stats.trim_mean(numbers, proportiontocut=trim_percentage)

# Results
mean = calculate_mean(int_list)
median = calculate_median(int_list)
mode = calculate_mode(int_list)
weighted_mean = calculate_weighted_mean(int_list, [1] * len(int_list))  # Example with equal weights
geometric_mean = calculate_geometric_mean(int_list)
harmonic_mean = calculate_harmonic_mean(int_list)
midrange = calculate_midrange(int_list)
trimmed_mean = calculate_trimmed_mean(int_list, 0.1)  # Exclude 10% from each side

# Print results
print("Generated List:", int_list)
print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)
print("Weighted Mean (equal weights):", weighted_mean)
print("Geometric Mean:", geometric_mean)
print("Harmonic Mean:", harmonic_mean)
print("Midrange:", midrange)
print("Trimmed Mean (10%):", trimmed_mean)


In [None]:
#Question 2

import random
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# Step 1: Generate a list of 500 integers between 200 to 300
int_list2 = [random.randint(200, 300) for _ in range(500)]

# Step 2: Visualization
def visualize_data(data):
    # Frequency & Gaussian distribution
    sns.histplot(data, kde=False, bins=30, color='blue', label='Frequency')
    sns.kdeplot(data, color='red', label='Gaussian Distribution')
    plt.legend()
    plt.title("Frequency & Gaussian Distribution")
    plt.show()

    # Frequency smoothened KDE plot
    sns.histplot(data, kde=True, bins=30, color='blue', label='Frequency Smoothened KDE')
    plt.legend()
    plt.title("Frequency Smoothened KDE Plot")
    plt.show()

    # Gaussian distribution & Smoothened KDE plot
    kde = gaussian_kde(data)
    x = np.linspace(min(data), max(data), 1000)
    sns.lineplot(x=x, y=kde(x), label='KDE', color='green')
    sns.kdeplot(data, color='red', label='Gaussian Distribution')
    plt.legend()
    plt.title("Gaussian Distribution & Smoothened KDE Plot")
    plt.show()

visualize_data(int_list2)

# Step 3: Function to calculate the range of a list
def calculate_range(numbers):
    return max(numbers) - min(numbers)

range_of_list = calculate_range(int_list2)
print(f"Range of the list: {range_of_list}")

# Step 4: Variance and standard deviation
def calculate_variance_std(numbers):
    variance = np.var(numbers)
    std_deviation = np.std(numbers)
    return variance, std_deviation

variance, std_dev = calculate_variance_std(int_list2)
print(f"Variance: {variance}, Standard Deviation: {std_dev}")

# Step 5: Function to compute IQR
def calculate_iqr(numbers):
    q1 = np.percentile(numbers, 25)
    q3 = np.percentile(numbers, 75)
    return q3 - q1

iqr = calculate_iqr(int_list2)
print(f"Interquartile Range (IQR): {iqr}")

# Step 6: Coefficient of Variation
def calculate_coefficient_of_variation(numbers):
    mean = np.mean(numbers)
    std_dev = np.std(numbers)
    return (std_dev / mean) * 100

coefficient_of_variation = calculate_coefficient_of_variation(int_list2)
print(f"Coefficient of Variation: {coefficient_of_variation}%")

# Step 7: Mean Absolute Deviation (MAD)
def calculate_mad(numbers):
    mean = np.mean(numbers)
    mad = np.mean([abs(x - mean) for x in numbers])
    return mad

mad = calculate_mad(int_list2)
print(f"Mean Absolute Deviation (MAD): {mad}")

# Step 8: Quartile Deviation
def calculate_quartile_deviation(numbers):
    q1 = np.percentile(numbers, 25)
    q3 = np.percentile(numbers, 75)
    return (q3 - q1) / 2

quartile_deviation = calculate_quartile_deviation(int_list2)
print(f"Quartile Deviation: {quartile_deviation}")

# Step 9: Range-based Coefficient of Dispersion
def calculate_range_coefficient_of_dispersion(numbers):
    range_value = calculate_range(numbers)
    mean = np.mean(numbers)
    return (range_value / mean) * 100

range_coefficient_of_dispersion = calculate_range_coefficient_of_dispersion(int_list2)
print(f"Range-based Coefficient of Dispersion: {range_coefficient_of_dispersion}%")


Question 3

class DiscreteRandomVariable:
    def __init__(self, probabilities):
        """
        Initializes the random variable with given probabilities.

        :param probabilities: A dictionary where keys are the outcomes (x) and values are their probabilities (P(x)).
        """
        if not isinstance(probabilities, dict):
            raise ValueError("Probabilities must be provided as a dictionary.")
        if abs(sum(probabilities.values()) - 1.0) > 1e-6:
            raise ValueError("The probabilities must sum to 1.")
        self.probabilities = probabilities

    def expected_value(self):
        """
        Calculates the expected value (mean) of the random variable.

        :return: The expected value.
        """
        return sum(x * p for x, p in self.probabilities.items())

    def variance(self):
        """
        Calculates the variance of the random variable.

        :return: The variance.
        """
        mean = self.expected_value()
        return sum((x - mean) ** 2 * p for x, p in self.probabilities.items())

# Example usage
if __name__ == "__main__":
    # Define a discrete random variable with outcomes and probabilities
    rv = DiscreteRandomVariable({
        1: 0.2,
        2: 0.5,
        3: 0.3
    })

    # Calculate expected value and variance
    print("Expected Value:", rv.expected_value())
    print("Variance:", rv.variance())

Expected Value: 2.1
Variance: 0.49


question 4

import random

def roll_die(num_rolls):
    """
    Simulate rolling a six-sided die num_rolls times.
    
    Args:
        num_rolls (int): Number of times the die is rolled.

    Returns:
        list: Outcomes of the die rolls.
    """
    return [random.randint(1, 6) for _ in range(num_rolls)]

def calculate_expected_value(outcomes):
    """
    Calculate the expected value of the die rolls.
    
    Args:
        outcomes (list): Outcomes of the die rolls.

    Returns:
        float: Expected value of the outcomes.
    """
    return sum(outcomes) / len(outcomes)

def calculate_variance(outcomes, expected_value):
    """
    Calculate the variance of the die rolls.
    
    Args:
        outcomes (list): Outcomes of the die rolls.
        expected_value (float): Expected value of the outcomes.

    Returns:
        float: Variance of the outcomes.
    """
    return sum((x - expected_value) ** 2 for x in outcomes) / len(outcomes)

if __name__ == "__main__":
    num_rolls = int(input("Enter the number of times to roll the die: "))
    outcomes = roll_die(num_rolls)

    # Calculate expected value and variance
    expected_value = calculate_expected_value(outcomes)
    variance = calculate_variance(outcomes, expected_value)

    print(f"Results of rolling the die {num_rolls} times:")
    print(f"Outcomes: {outcomes}")
    print(f"Expected Value: {expected_value:.2f}")
    print(f"Variance: {variance:.2f}")

question 5

import numpy as np

def generate_samples_and_stats(distribution, params, sample_size=1000):
    """
    Generate random samples from a given probability distribution and calculate their mean and variance.

    Parameters:
        distribution (str): The type of probability distribution ('binomial', 'poisson').
        params (dict): Parameters for the distribution.
                       - For 'binomial': {'n': int, 'p': float}
                       - For 'poisson': {'lambda': float}
        sample_size (int): Number of samples to generate (default is 1000).

    Returns:
        dict: A dictionary containing the samples, mean, and variance.
    """
    if distribution == 'binomial':
        n = params.get('n', 1)
        p = params.get('p', 0.5)
        samples = np.random.binomial(n=n, p=p, size=sample_size)
    elif distribution == 'poisson':
        lam = params.get('lambda', 1.0)
        samples = np.random.poisson(lam=lam, size=sample_size)
    else:
        raise ValueError("Unsupported distribution type. Use 'binomial' or 'poisson'.")

    mean = np.mean(samples)
    variance = np.var(samples)

    return {
        'samples': samples,
        'mean': mean,
        'variance': variance
    }

# Example usage:
binomial_result = generate_samples_and_stats('binomial', {'n': 10, 'p': 0.5}, 1000)
print("Binomial Distribution:", binomial_result)

poisson_result = generate_samples_and_stats('poisson', {'lambda': 4.0}, 1000)
print("Poisson Distribution:", poisson_result)

QUESTION 6

import numpy as np

def generate_gaussian_stats(mean=0, std_dev=1, sample_size=1000):
    """
    Generate random numbers from a Gaussian distribution and calculate their mean, variance, and standard deviation.

    Parameters:
        mean (float): Mean of the Gaussian distribution.
        std_dev (float): Standard deviation of the Gaussian distribution.
        sample_size (int): Number of random samples to generate.

    Returns:
        dict: A dictionary containing the calculated mean, variance, and standard deviation.
    """
    # Generate random numbers from a Gaussian distribution
    samples = np.random.normal(loc=mean, scale=std_dev, size=sample_size)

    # Calculate statistics
    calculated_mean = np.mean(samples)
    calculated_variance = np.var(samples)
    calculated_std_dev = np.std(samples)

    # Return the results
    return {
        "Mean": calculated_mean,
        "Variance": calculated_variance,
        "Standard Deviation": calculated_std_dev
    }

if __name__ == "__main__":
    # Example usage
    mean = 5
    std_dev = 2
    sample_size = 1000

    stats = generate_gaussian_stats(mean, std_dev, sample_size)

    print("Generated Gaussian Distribution Statistics:")
    print(f"Mean: {stats['Mean']}")
    print(f"Variance: {stats['Variance']}")
    print(f"Standard Deviation: {stats['Standard Deviation']}")


QUESTION 7

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def calculate_skewness(column):
    """Calculate the skewness of a column."""
    return column.skew()

def determine_skewness_type(skewness):
    """Determine the type of skewness."""
    if skewness > 0:
        return "Positive skewness"
    elif skewness < 0:
        return "Negative skewness"
    else:
        return "Approximately symmetric"

def calculate_covariance(column1, column2):
    """Calculate the covariance between two columns."""
    return np.cov(column1, column2)[0, 1]

def calculate_pearson_correlation(column1, column2):
    """Calculate the Pearson correlation coefficient between two columns."""
    return column1.corr(column2)

def visualize_correlation(df, column1, column2):
    """Visualize the correlation between two columns using a scatter plot."""
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=column1, y=column2, data=df)
    plt.title(f"Scatter Plot of {column1} vs {column2}")
    plt.xlabel(column1)
    plt.ylabel(column2)
    plt.show()

# Load the Tips dataset
data = sns.load_dataset('tips')

# Columns to analyze
col1 = 'total_bill'
col2 = 'tip'

# Task (1): Skewness
skew_col1 = calculate_skewness(data[col1])
skew_col2 = calculate_skewness(data[col2])

print(f"Skewness of {col1}: {skew_col1} ({determine_skewness_type(skew_col1)})")
print(f"Skewness of {col2}: {skew_col2} ({determine_skewness_type(skew_col2)})")

# Task (iii): Covariance
covariance = calculate_covariance(data[col1], data[col2])
print(f"Covariance between {col1} and {col2}: {covariance}")

# Task (iv): Pearson correlation
pearson_corr = calculate_pearson_correlation(data[col1], data[col2])
print(f"Pearson correlation coefficient between {col1} and {col2}: {pearson_corr}")

# Task (v): Scatter plot
visualize_correlation(data, col1, col2)

QUESTION 8

import math

def normal_pdf(x, mu=0, sigma=1):
    """
    Calculate the Probability Density Function (PDF) for a normal distribution.

    Parameters:
        x (float): The value at which to calculate the PDF.
        mu (float): The mean of the distribution. Default is 0.
        sigma (float): The standard deviation of the distribution. Default is 1.

    Returns:
        float: The PDF value at x.
    """
    coeff = 1 / (math.sqrt(2 * math.pi) * sigma)
    exponent = math.exp(-((x - mu) ** 2) / (2 * sigma ** 2))
    return coeff * exponent

# Example usage:
print(normal_pdf(0))  # Standard normal distribution


QUESTION 9

import math

def exponential_cdf(x, lambd=1):
    """
    Calculate the Cumulative Distribution Function (CDF) for an exponential distribution.

    Parameters:
        x (float): The value at which to calculate the CDF (x >= 0).
        lambd (float): The rate parameter (lambda). Default is 1.

    Returns:
        float: The CDF value at x.
    """
    if x < 0:
        return 0  # CDF is 0 for x < 0 in exponential distribution
    return 1 - math.exp(-lambd * x)

# Example usage:
print(exponential_cdf(1, lambd=2))  # Lambda = 2


QUESTION 10

import math

def poisson_pmf(k, lambd):
    """
    Calculate the Probability Mass Function (PMF) for a Poisson distribution.

    Parameters:
        k (int): The number of occurrences (non-negative integer).
        lambd (float): The expected rate of occurrence (lambda).

    Returns:
        float: The PMF value at k.
    """
    if k < 0 or not isinstance(k, int):
        raise ValueError("k must be a non-negative integer.")
    return (lambd ** k * math.exp(-lambd)) / math.factorial(k)

# Example usage:
print(poisson_pmf(3, lambd=2))  # Lambda = 2, k = 3


QUESTION 11

import numpy as np
from scipy import stats

# Data for old and new layout
old_layout = np.array([1]*50 + [0]*950)  # 50 purchases out of 1000 visitors
new_layout = np.array([1]*70 + [0]*930)  # 70 purchases out of 1000 visitors

# Proportion of success (purchase) for both layouts
p_old = np.mean(old_layout)
p_new = np.mean(new_layout)

# Sample sizes
n_old = len(old_layout)
n_new = len(new_layout)

# Pooled proportion (since we are assuming no difference under null hypothesis)
p_pooled = (np.sum(old_layout) + np.sum(new_layout)) / (n_old + n_new)

# Standard error calculation
SE = np.sqrt(p_pooled * (1 - p_pooled) * (1/n_old + 1/n_new))

# Z-statistic calculation
z_stat = (p_old - p_new) / SE

# Two-tailed test (p-value)
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))

print(f"Z-statistic: {z_stat}")
print(f"P-value: {p_value}")


QUESTION 12

# Data for before and after program scores
before_program = np.array([75, 80, 85, 70, 90, 78, 92, 88, 82, 87])
after_program = np.array([80, 85, 90, 80, 92, 80, 95, 90, 85, 88])

# Mean and standard deviation of both samples
mean_before = np.mean(before_program)
mean_after = np.mean(after_program)
std_before = np.std(before_program, ddof=1)
std_after = np.std(after_program, ddof=1)

# Sample sizes
n_before = len(before_program)
n_after = len(after_program)

# Standard error for the difference in means
SE = np.sqrt((std_before**2 / n_before) + (std_after**2 / n_after))

# Z-statistic for difference in means
z_stat = (mean_after - mean_before) / SE

# Two-tailed test (p-value)
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))

print(f"Z-statistic: {z_stat}")
print(f"P-value: {p_value}")


QUESTION 13

# Data for blood pressure before and after drug
before_drug = np.array([145, 150, 140, 135, 155, 160, 152, 148, 130, 138])
after_drug = np.array([130, 140, 132, 128, 145, 148, 138, 136, 125, 130])

# Mean and standard deviation of both samples
mean_before = np.mean(before_drug)
mean_after = np.mean(after_drug)
std_before = np.std(before_drug, ddof=1)
std_after = np.std(after_drug, ddof=1)

# Sample sizes
n_before = len(before_drug)
n_after = len(after_drug)

# Standard error for the difference in means
SE = np.sqrt((std_before**2 / n_before) + (std_after**2 / n_after))

# Z-statistic for difference in means
z_stat = (mean_after - mean_before) / SE

# Two-tailed test (p-value)
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))

print(f"Z-statistic: {z_stat}")
print(f"P-value: {p_value}")


QUESTION 14

import numpy as np
from scipy import stats

# Data for response times
response_times = np.array([14.3, 3.8, 5.1, 4.9, 47, 42, 52, 45, 4.6, 4.41])

# Mean and standard deviation of the sample
mean_response = np.mean(response_times)
std_response = np.std(response_times, ddof=1)

# Sample size
n = len(response_times)

# Hypothesized population mean (from the claim)
mu_0 = 5

# Standard error of the mean
SE = std_response / np.sqrt(n)

# Z-statistic calculation
z_stat = (mean_response - mu_0) / SE

# P-value for one-tailed test (less than 5 minutes)
p_value = stats.norm.cdf(z_stat)

print(f"Z-statistic: {z_stat}")
print(f"P-value: {p_value}")


QUESTION 15

import numpy as np
from scipy import stats

def ab_test_analysis(layout_a_clicks, layout_b_clicks):
    # Data for clicks on layout A and layout B
    a = np.array(layout_a_clicks)
    b = np.array(layout_b_clicks)

    # Calculate means and standard deviations
    mean_a = np.mean(a)
    mean_b = np.mean(b)
    std_a = np.std(a, ddof=1)
    std_b = np.std(b, ddof=1)

    # Sample sizes
    n_a = len(a)
    n_b = len(b)

    # Pooled standard deviation
    pooled_std = np.sqrt(((n_a - 1) * std_a**2 + (n_b - 1) * std_b**2) / (n_a + n_b - 2))

    # Standard error of the difference in means
    SE = pooled_std * np.sqrt(1/n_a + 1/n_b)

    # T-statistic calculation
    t_stat = (mean_a - mean_b) / SE

    # Degrees of freedom
    df = n_a + n_b - 2

    # P-value for two-tailed test
    p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df))

    print(f"T-statistic: {t_stat}")
    print(f"Degrees of freedom: {df}")
    print(f"P-value: {p_value}")

# Data for layout A and layout B clicks
layout_a_clicks = [28, 32, 33, 29, 31, 34, 30, 35, 36, 37]
layout_b_clicks = [40, 41, 38, 42, 39, 44, 43, 41, 45, 47]

# Perform A/B test analysis
ab_test_analysis(layout_a_clicks, layout_b_clicks)



QUESTION 16

import numpy as np
from scipy import stats

# Data for cholesterol levels of patients using existing and new drugs
existing_drug_levels = np.array([180, 182, 175, 185, 178, 176, 172, 184, 179, 183])
new_drug_levels = np.array([170, 172, 165, 168, 175, 173, 170, 178, 172, 176])

# Calculate means and standard deviations
mean_existing = np.mean(existing_drug_levels)
mean_new = np.mean(new_drug_levels)
std_existing = np.std(existing_drug_levels, ddof=1)
std_new = np.std(new_drug_levels, ddof=1)

# Sample sizes
n_existing = len(existing_drug_levels)
n_new = len(new_drug_levels)

# Pooled standard deviation
pooled_std = np.sqrt(((n_existing - 1) * std_existing**2 + (n_new - 1) * std_new**2) / (n_existing + n_new - 2))

# Standard error of the difference in means
SE = pooled_std * np.sqrt(1/n_existing + 1/n_new)

# T-statistic calculation
t_stat = (mean_existing - mean_new) / SE

# Degrees of freedom
df = n_existing + n_new - 2

# P-value for two-tailed test
p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df))

# Output results
print(f"T-statistic: {t_stat}")
print(f"Degrees of freedom: {df}")
print(f"P-value: {p_value}")


QUESTION 17

import numpy as np
from scipy import stats

# Data for pre-intervention and post-intervention test scores
pre_intervention_scores = np.array([80, 85, 90, 75, 88, 82, 92, 78, 85, 87])
post_intervention_scores = np.array([90, 92, 88, 92, 95, 91, 96, 93, 88, 93])

# Perform paired t-test
t_stat, p_value = stats.ttest_rel(pre_intervention_scores, post_intervention_scores)

# Output results
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")


QUESTION 18

import numpy as np
from scipy import stats

# Generate synthetic salary data for male and female employees
np.random.seed(0)  # For reproducibility

male_salaries = np.random.normal(loc=50000, scale=10000, size=20)
female_salaries = np.random.normal(loc=55000, scale=9000, size=20)

# Perform independent t-test
t_stat, p_value = stats.ttest_ind(male_salaries, female_salaries)

# Output results
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")


QUESTION 19
import numpy as np
from scipy import stats

# Data for quality scores of Version 1 and Version 2
version1_scores = np.array([85, 88, 82, 80, 87, 84, 90, 88, 85, 86, 81, 83, 87, 84, 89, 86, 84, 88, 85, 86, 89, 90, 87, 88, 85])
version2_scores = np.array([80, 78, 83, 81, 79, 82, 76, 80, 78, 81, 77, 82, 80, 79, 82, 79, 80, 81, 79, 82, 79, 78, 80, 81, 82])

# Perform independent t-test
t_stat, p_value = stats.ttest_ind(version1_scores, version2_scores)

# Output results
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")



QUESTION 20

import numpy as np
from scipy import stats

# Data for customer satisfaction scores for Branch A and Branch B
branch_a_scores = np.array([4, 5, 3, 4, 5, 3.4, 4.5, 4.4, 3.4, 5.5, 4.3, 4.5, 5, 4, 3, 5, 4, 4, 5, 3, 4, 5, 4])
branch_b_scores = np.array([3, 4, 2, 3, 4, 3, 2, 3, 4, 3, 2, 4, 3, 3, 4, 2, 3, 4, 3])

# Perform independent t-test
t_stat, p_value = stats.ttest_ind(branch_a_scores, branch_b_scores)

# Output results
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")


QUESTION 21

import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency

# Generate data
np.random.seed(0)

# Age groups and voter preferences
age_groups = np.random.choice(['18-30', '31-50', '51-60', '60+'], size=30)
voter_preferences = np.random.choice(['Candidate A', 'Candidate B'], size=30)

# Create a contingency table
data = pd.crosstab(age_groups, voter_preferences)

# Perform Chi-Square test
chi2_stat, p_value, dof, expected = chi2_contingency(data)

# Output results
print(f"Contingency Table:\n{data}")
print(f"\nChi-Square Statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies: \n{expected}")


QUESTION 22
import numpy as np
from scipy.stats import chi2_contingency

# Sample data: Product satisfaction levels (rows) vs. Customer regions (columns)
data = np.array([[150, 30, 40, 20],  # Satisfied: East, West, North, South
                 [30, 40, 30, 50],   # Neutral: East, West, North, South
                 [20, 30, 40, 30]])  # Dissatisfied: East, West, North, South

# Perform Chi-Square test
chi2_stat, p_value, dof, expected = chi2_contingency(data)

# Output results
print(f"Contingency Table:\n{data}")
print(f"\nChi-Square Statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")


QUESTION 23

import numpy as np
from scipy.stats import chi2_contingency

# Sample data: Job performance levels before (rows) and after (columns) training
data = np.array([[50, 30, 20],  # Effective, Neutral, Ineffective (Before)
                 [30, 40, 30],  # Effective, Neutral, Ineffective (After)
                 [20, 30, 40]])  # Effective, Neutral, Ineffective (After)

# Perform Chi-Square test
chi2_stat, p_value, dof, expected = chi2_contingency(data)

# Output results
print(f"Contingency Table:\n{data}")
print(f"\nChi-Square Statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")


QUESTION 24

import numpy as np
from scipy.stats import f_oneway

# Sample data: Customer satisfaction scores for each product version
standard_scores = [80, 85, 90, 78, 88, 82, 92, 78, 85, 87]
premium_scores = [90, 92, 88, 92, 95, 91, 96, 93, 89, 93]
deluxe_scores = [95, 98, 92, 97, 96, 94, 98, 97, 92, 99]

# Perform ANOVA test
f_stat, p_value = f_oneway(standard_scores, premium_scores, deluxe_scores)

# Output results
print(f"F-statistic: {f_stat}")
print(f"P-value: {p_value}")
