In [16]:
#3
class DiscreteRandomVariable:
    def __init__(self, values, probabilities):
        """
        Initialize the discrete random variable.

        :param values: A list of possible values the random variable can take.
        :param probabilities: A list of probabilities corresponding to each value.
        """
        if len(values) != len(probabilities):
            raise ValueError("The length of values and probabilities must be the same.")
        if not abs(sum(probabilities) - 1) < 1e-8:
            raise ValueError("The sum of the probabilities must be 1.")
        
        self.values = values
        self.probabilities = probabilities

    def expected_value(self):
        """
        Calculate the expected value (mean) of the random variable.

        :return: The expected value.
        """
        return sum(value * probability for value, probability in zip(self.values, self.probabilities))

    def variance(self):
        """
        Calculate the variance of the random variable.

        :return: The variance.
        """
        mean = self.expected_value()
        return sum(probability * (value - mean) ** 2 for value, probability in zip(self.values, self.probabilities))

# Example usage
values = [1, 2, 3, 4, 5, 6]
probabilities = [1/6] * 6  # Uniform distribution for a fair six-sided die
random_variable = DiscreteRandomVariable(values, probabilities)

# Calculating the expected value and variance
expected_value = random_variable.expected_value()
variance = random_variable.variance()

print(f"Expected Value: {expected_value}")
print(f"Variance: {variance}")


Expected Value: 3.5
Variance: 2.9166666666666665


In [17]:
#4
import random

def roll_die(n_rolls):
    """
    Simulate rolling a fair six-sided die n times.

    :param n_rolls: Number of rolls to simulate.
    :return: A list of outcomes from the die rolls.
    """
    return [random.randint(1, 6) for _ in range(n_rolls)]

def calculate_expected_value(rolls):
    """
    Calculate the expected value (mean) of the die rolls.

    :param rolls: A list of outcomes from the die rolls.
    :return: The expected value.
    """
    return sum(rolls) / len(rolls)

def calculate_variance(rolls):
    """
    Calculate the variance of the die rolls.

    :param rolls: A list of outcomes from the die rolls.
    :return: The variance.
    """
    mean = calculate_expected_value(rolls)
    return sum((x - mean) ** 2 for x in rolls) / len(rolls)

# Simulate rolling the die
n_rolls = 10000  # Number of rolls
die_rolls = roll_die(n_rolls)

# Calculate expected value and variance
expected_value = calculate_expected_value(die_rolls)
variance = calculate_variance(die_rolls)

# Output the results
print(f"Number of Rolls: {n_rolls}")
print(f"Expected Value: {expected_value}")
print(f"Variance: {variance}")


Number of Rolls: 10000
Expected Value: 3.5033
Variance: 2.921189109999905


In [18]:
#5
import numpy as np

def generate_samples(distribution, params, size):
    """
    Generate random samples from a specified probability distribution and calculate their mean and variance.

    :param distribution: The name of the distribution ('binomial', 'poisson', etc.).
    :param params: A dictionary of parameters specific to the distribution.
    :param size: The number of samples to generate.
    :return: A tuple containing the samples, their mean, and their variance.
    """
    if distribution == 'binomial':
        n = params['n']
        p = params['p']
        samples = np.random.binomial(n, p, size)
    elif distribution == 'poisson':
        lam = params['lam']
        samples = np.random.poisson(lam, size)
    else:
        raise ValueError("Unsupported distribution type.")
    
    mean = np.mean(samples)
    variance = np.var(samples)
    
    return samples, mean, variance

# Example usage
# Binomial distribution: n = 10, p = 0.5
binomial_params = {'n': 10, 'p': 0.5}
binomial_samples, binomial_mean, binomial_variance = generate_samples('binomial', binomial_params, 10000)
print(f"Binomial Distribution: Mean = {binomial_mean}, Variance = {binomial_variance}")

# Poisson distribution: λ = 3
poisson_params = {'lam': 3}
poisson_samples, poisson_mean, poisson_variance = generate_samples('poisson', poisson_params, 10000)
print(f"Poisson Distribution: Mean = {poisson_mean}, Variance = {poisson_variance}")


Binomial Distribution: Mean = 4.9612, Variance = 2.56889456
Poisson Distribution: Mean = 3.0057, Variance = 2.91286751


In [19]:
#6
import numpy as np

def generate_gaussian_samples(mean, std_dev, size):
    """
    Generate random samples from a Gaussian (normal) distribution and calculate their mean, variance, and standard deviation.

    :param mean: The mean (μ) of the Gaussian distribution.
    :param std_dev: The standard deviation (σ) of the Gaussian distribution.
    :param size: The number of samples to generate.
    :return: A tuple containing the samples, their mean, variance, and standard deviation.
    """
    samples = np.random.normal(mean, std_dev, size)
    sample_mean = np.mean(samples)
    sample_variance = np.var(samples)
    sample_std_dev = np.std(samples)
    
    return samples, sample_mean, sample_variance, sample_std_dev

# Example usage
mean = 0          # Mean of the Gaussian distribution
std_dev = 1       # Standard deviation of the Gaussian distribution
size = 10000      # Number of samples to generate

samples, sample_mean, sample_variance, sample_std_dev = generate_gaussian_samples(mean, std_dev, size)

# Output the results
print(f"Generated {size} samples from a Gaussian distribution with mean {mean} and standard deviation {std_dev}")
print(f"Sample Mean: {sample_mean}")
print(f"Sample Variance: {sample_variance}")
print(f"Sample Standard Deviation: {sample_std_dev}")


Generated 10000 samples from a Gaussian distribution with mean 0 and standard deviation 1
Sample Mean: 0.00891192492775449
Sample Variance: 0.9882564158571675
Sample Standard Deviation: 0.994110866984748


In [20]:
#8
import math

def normal_pdf(x, mean, std_dev):
    """
    Calculate the probability density function (PDF) of a normal distribution.

    :param x: The value at which to calculate the PDF.
    :param mean: The mean (μ) of the normal distribution.
    :param std_dev: The standard deviation (σ) of the normal distribution.
    :return: The PDF value at x.
    """
    exponent = math.exp(-((x - mean) ** 2) / (2 * std_dev ** 2))
    pdf_value = (1 / (math.sqrt(2 * math.pi) * std_dev)) * exponent
    return pdf_value

# Example usage
mean = 0          # Mean of the Gaussian distribution
std_dev = 1       # Standard deviation of the Gaussian distribution
x_value = 0       # The point at which to calculate the PDF

pdf_value = normal_pdf(x_value, mean, std_dev)

# Output the result
print(f"The PDF value at x = {x_value} for a normal distribution with mean = {mean} and std_dev = {std_dev} is {pdf_value}")


The PDF value at x = 0 for a normal distribution with mean = 0 and std_dev = 1 is 0.3989422804014327


In [21]:
#9
import math

def exponential_cdf(x, rate):
    """
    Calculate the cumulative distribution function (CDF) of an exponential distribution.

    :param x: The value at which to calculate the CDF.
    :param rate: The rate parameter (λ) of the exponential distribution (λ = 1/mean).
    :return: The CDF value at x.
    """
    if x < 0:
        return 0.0
    return 1 - math.exp(-rate * x)

# Example usage
rate = 0.5  # Rate parameter (λ)
x_value = 2  # The point at which to calculate the CDF

cdf_value = exponential_cdf(x_value, rate)

# Output the result
print(f"The CDF value at x = {x_value} for an exponential distribution with rate = {rate} is {cdf_value:.4f}")


The CDF value at x = 2 for an exponential distribution with rate = 0.5 is 0.6321


In [22]:
#10
import math

def poisson_pmf(k, lam):
    """
    Calculate the probability mass function (PMF) of a Poisson distribution.

    :param k: The number of occurrences (k) for which to calculate the PMF.
    :param lam: The average number of occurrences (λ) in the Poisson distribution.
    :return: The PMF value at k.
    """
    if k < 0:
        return 0.0
    return (lam ** k * math.exp(-lam)) / math.factorial(k)

# Example usage
lam = 3    # Average number of occurrences (λ)
k_value = 2  # Number of occurrences (k)

pmf_value = poisson_pmf(k_value, lam)

# Output the result
print(f"The PMF value at k = {k_value} for a Poisson distribution with λ = {lam} is {pmf_value:.4f}")


The PMF value at k = 2 for a Poisson distribution with λ = 3 is 0.2240


In [23]:
#11
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

# Generate the data
old_layout = np.array([1] * 50 + [0] * 950)  # 50 purchases out of 1000 visitors
new_layout = np.array([1] * 70 + [0] * 930)  # 70 purchases out of 1000 visitors

# Count the number of successes (purchases) and total observations (visitors)
success_counts = np.array([old_layout.sum(), new_layout.sum()])
n_obs = np.array([len(old_layout), len(new_layout)])

# Perform the two-proportion z-test
z_stat, p_value = proportions_ztest(success_counts, n_obs)

# Output the results
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference in conversion rates between the two layouts.")
    if z_stat > 0:
        print("The new layout has a significantly higher conversion rate.")
    else:
        print("The old layout has a significantly higher conversion rate.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in conversion rates between the two layouts.")


Z-statistic: -1.8831
P-value: 0.0597
Fail to reject the null hypothesis: There is no significant difference in conversion rates between the two layouts.


In [24]:
#12
import numpy as np
from scipy import stats

# Sample data
before_program = np.array([75, 80, 85, 70, 90, 78, 92, 88, 82, 87])
after_program = np.array([80, 85, 90, 80, 92, 80, 95, 90, 85, 88])

# Calculate the differences
differences = after_program - before_program

# Perform the paired t-test
t_stat, p_value = stats.ttest_rel(after_program, before_program)

# Output the results
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant improvement in scores after the program.")
else:
    print("Fail to reject the null hypothesis: There is no significant improvement in scores after the program.")


T-statistic: 4.5932
P-value: 0.0013
Reject the null hypothesis: There is a significant improvement in scores after the program.


In [25]:
#13
import numpy as np
from scipy import stats

# Sample data
before_program = np.array([75, 80, 85, 70, 90, 78, 92, 88, 82, 87])
after_program = np.array([80, 85, 90, 80, 92, 80, 95, 90, 85, 88])

# Calculate the differences
differences = after_program - before_program

# Perform the paired t-test
t_stat, p_value = stats.ttest_rel(after_program, before_program)

# Output the results
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant improvement in scores after the program.")
else:
    print("Fail to reject the null hypothesis: There is no significant improvement in scores after the program.")


T-statistic: 4.5932
P-value: 0.0013
Reject the null hypothesis: There is a significant improvement in scores after the program.


In [26]:
#14
import numpy as np
from scipy import stats

# Sample data
response_times = np.array([4.3, 3.8, 5.1, 4.9, 4.7, 4.2, 5.2, 4.5, 4.6, 4.4])

# Known parameters
mu_0 = 5  # Hypothesized mean response time

# Calculate sample mean and standard deviation
sample_mean = np.mean(response_times)
sample_std_dev = np.std(response_times, ddof=1)
n = len(response_times)

# Perform the z-test
z_stat = (sample_mean - mu_0) / (sample_std_dev / np.sqrt(n))
p_value = stats.norm.cdf(z_stat)

# Output the results
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: The average response time is significantly less than 5 minutes.")
else:
    print("Fail to reject the null hypothesis: The average response time is not significantly less than 5 minutes.")


Z-statistic: -3.1845
P-value: 0.0007
Reject the null hypothesis: The average response time is significantly less than 5 minutes.


In [27]:
#15
import numpy as np
from scipy import stats

def ab_test_analysis(layout_a_clicks, layout_b_clicks):
    """
    Perform an A/B test analysis to compare click-through rates between two layouts.

    :param layout_a_clicks: List of click counts for layout A.
    :param layout_b_clicks: List of click counts for layout B.
    :return: A dictionary containing the t-statistic, degrees of freedom, and p-value.
    """
    # Convert lists to numpy arrays
    layout_a = np.array(layout_a_clicks)
    layout_b = np.array(layout_b_clicks)
    
    # Calculate means and variances
    mean_a = np.mean(layout_a)
    mean_b = np.mean(layout_b)
    var_a = np.var(layout_a, ddof=1)
    var_b = np.var(layout_b, ddof=1)
    n_a = len(layout_a)
    n_b = len(layout_b)
    
    # Calculate the t-statistic and degrees of freedom
    pooled_var = ((n_a - 1) * var_a + (n_b - 1) * var_b) / (n_a + n_b - 2)
    t_stat = (mean_a - mean_b) / np.sqrt(pooled_var * (1 / n_a + 1 / n_b))
    df = n_a + n_b - 2
    
    # Calculate the p-value
    p_value = stats.t.sf(np.abs(t_stat), df) * 2  # Two-tailed test
    
    return {
        't-statistic': t_stat,
        'degrees of freedom': df,
        'p-value': p_value
    }

# Example data
layout_a_clicks = [28, 32, 33, 29, 31, 34, 30, 35, 36, 37]
layout_b_clicks = [40, 41, 38, 42, 39, 44, 43, 41, 45, 47]

# Perform A/B test analysis
result = ab_test_analysis(layout_a_clicks, layout_b_clicks)

# Output the results
print(f"T-statistic: {result['t-statistic']:.4f}")
print(f"Degrees of freedom: {result['degrees of freedom']}")
print(f"P-value: {result['p-value']:.4f}")

# Interpretation
alpha = 0.05
if result['p-value'] < alpha:
    print("Reject the null hypothesis: There is a significant difference in click-through rates between the two layouts.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in click-through rates between the two layouts.")


T-statistic: -7.2981
Degrees of freedom: 18
P-value: 0.0000
Reject the null hypothesis: There is a significant difference in click-through rates between the two layouts.


In [28]:
#16
import numpy as np
from scipy import stats

# Data for cholesterol levels
existing_drug_levels = [180, 182, 175, 185, 178, 176, 172, 184, 179, 183]
new_drug_levels = [170, 172, 165, 168, 175, 173, 170, 178, 172, 176]

def t_test_independent(group1, group2):
    """
    Perform an independent two-sample t-test to compare the means of two groups.

    :param group1: List of data for the first group.
    :param group2: List of data for the second group.
    :return: A dictionary containing the t-statistic, degrees of freedom, and p-value.
    """
    group1 = np.array(group1)
    group2 = np.array(group2)
    
    # Perform the independent t-test
    t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)  # Welch's t-test
    
    # Degrees of freedom for Welch's t-test
    df = len(group1) + len(group2) - 2
    
    return {
        't-statistic': t_stat,
        'degrees of freedom': df,
        'p-value': p_value
    }

# Perform the t-test
result = t_test_independent(existing_drug_levels, new_drug_levels)

# Output the results
print(f"T-statistic: {result['t-statistic']:.4f}")
print(f"Degrees of freedom: {result['degrees of freedom']}")
print(f"P-value: {result['p-value']:.4f}")

# Interpretation
alpha = 0.05
if result['p-value'] < alpha:
    print("Reject the null hypothesis: There is a significant difference in cholesterol levels between the two drugs.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in cholesterol levels between the two drugs.")


T-statistic: 4.1405
Degrees of freedom: 18
P-value: 0.0006
Reject the null hypothesis: There is a significant difference in cholesterol levels between the two drugs.


In [29]:
#17
import numpy as np
from scipy import stats

# Data for test scores
pre_intervention_scores = [80, 85, 90, 75, 88, 82, 92, 78, 85, 87]
post_intervention_scores = [90, 92, 88, 92, 95, 91, 96, 93, 89, 93]

def t_test_paired(before, after):
    """
    Perform a paired t-test to compare the means of two related groups.

    :param before: List of scores before the intervention.
    :param after: List of scores after the intervention.
    :return: A dictionary containing the t-statistic, degrees of freedom, and p-value.
    """
    before = np.array(before)
    after = np.array(after)
    
    # Perform the paired t-test
    t_stat, p_value = stats.ttest_rel(after, before)
    
    # Degrees of freedom
    df = len(before) - 1
    
    return {
        't-statistic': t_stat,
        'degrees of freedom': df,
        'p-value': p_value
    }

# Perform the t-test
result = t_test_paired(pre_intervention_scores, post_intervention_scores)

# Output the results
print(f"T-statistic: {result['t-statistic']:.4f}")
print(f"Degrees of freedom: {result['degrees of freedom']}")
print(f"P-value: {result['p-value']:.4f}")

# Interpretation
alpha = 0.05
if result['p-value'] < alpha:
    print("Reject the null hypothesis: The intervention had a significant impact on math scores.")
else:
    print("Fail to reject the null hypothesis: The intervention did not have a significant impact on math scores.")


T-statistic: 4.4284
Degrees of freedom: 9
P-value: 0.0017
Reject the null hypothesis: The intervention had a significant impact on math scores.


In [30]:
#18
import numpy as np
from scipy import stats

# Generate synthetic salary data for male and female employees
np.random.seed(0)  # For reproducibility

male_salaries = np.random.normal(loc=50000, scale=10000, size=20)
female_salaries = np.random.normal(loc=55000, scale=9000, size=20)

def t_test_independent(group1, group2):
    """
    Perform an independent two-sample t-test to compare the means of two groups.

    :param group1: List of data for the first group.
    :param group2: List of data for the second group.
    :return: A dictionary containing the t-statistic, degrees of freedom, and p-value.
    """
    group1 = np.array(group1)
    group2 = np.array(group2)
    
    # Perform the independent t-test
    t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)  # Welch's t-test
    
    # Degrees of freedom for Welch's t-test
    df = len(group1) + len(group2) - 2
    
    return {
        't-statistic': t_stat,
        'degrees of freedom': df,
        'p-value': p_value
    }

# Perform the t-test
result = t_test_independent(male_salaries, female_salaries)

# Output the results
print(f"T-statistic: {result['t-statistic']:.4f}")
print(f"Degrees of freedom: {result['degrees of freedom']}")
print(f"P-value: {result['p-value']:.4f}")

# Interpretation
alpha = 0.05
if result['p-value'] < alpha:
    print("Reject the null hypothesis: There is a significant gender-based salary gap.")
else:
    print("Fail to reject the null hypothesis: There is no significant gender-based salary gap.")


T-statistic: 0.0611
Degrees of freedom: 38
P-value: 0.9516
Fail to reject the null hypothesis: There is no significant gender-based salary gap.


In [31]:
#19
import numpy as np
from scipy import stats

# Data for quality scores
version1_scores = [85, 88, 82, 89, 87, 84, 90, 88, 85, 86, 91, 83, 87, 84, 89, 86, 84, 88, 85, 86, 89, 90, 87, 88, 85]
version2_scores = [80, 78, 83, 81, 79, 82, 76, 80, 78, 81, 77, 82, 80, 79, 82, 79, 80, 81, 79, 82, 79, 78, 80, 81, 82]

# Perform the t-test
result = t_test_independent(version1_scores, version2_scores)

# Output the results
print(f"T-statistic: {result['t-statistic']:.4f}")
print(f"Degrees of freedom: {result['degrees of freedom']}")
print(f"P-value: {result['p-value']:.4f}")

# Interpretation
alpha = 0.05
if result['p-value'] < alpha:
    print("Reject the null hypothesis: There is a significant difference in quality between the two versions.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in quality between the two versions.")


T-statistic: 11.3258
Degrees of freedom: 48
P-value: 0.0000
Reject the null hypothesis: There is a significant difference in quality between the two versions.


In [32]:
#20
import numpy as np
from scipy import stats

# Data for customer satisfaction scores
branch_a_scores = [4, 5, 3, 4, 5, 4, 5, 3, 4, 4, 5, 4, 4, 3, 4, 5, 5, 4, 3, 4, 5, 4, 3, 5, 4, 4, 5, 3, 4, 5, 4]
branch_b_scores = [3, 4, 2, 3, 4, 3, 4, 2, 3, 3, 4, 3, 3, 2, 3, 4, 4, 3, 2, 3, 4, 3, 2, 4, 3, 3, 4, 2, 3, 4, 3]

def t_test_independent(group1, group2):
    """
    Perform an independent two-sample t-test to compare the means of two groups.
    """
    group1 = np.array(group1)
    group2 = np.array(group2)
    
    # Perform the independent t-test
    t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)  # Welch's t-test
    
    # Degrees of freedom for Welch's t-test
    df = len(group1) + len(group2) - 2
    
    return {
        't-statistic': t_stat,
        'degrees of freedom': df,
        'p-value': p_value
    }

# Perform the t-test
result = t_test_independent(branch_a_scores, branch_b_scores)

# Output the results
print(f"T-statistic: {result['t-statistic']:.4f}")
print(f"Degrees of freedom: {result['degrees of freedom']}")
print(f"P-value: {result['p-value']:.4f}")

# Interpretation
alpha = 0.05
if result['p-value'] < alpha:
    print("Reject the null hypothesis: There is a significant difference in customer satisfaction between the two branches.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in customer satisfaction between the two branches.")


T-statistic: 5.4801
Degrees of freedom: 60
P-value: 0.0000
Reject the null hypothesis: There is a significant difference in customer satisfaction between the two branches.


In [33]:
#21
import numpy as np
from scipy import stats
import pandas as pd

# Generate synthetic data
np.random.seed(0)
age_groups = np.random.choice(['18-30', '31-50', '51+'], size=500)
voter_preferences = np.random.choice(['Candidate A', 'Candidate B'], size=500)

# Create a contingency table
contingency_table = pd.crosstab(age_groups, voter_preferences)

# Perform Chi-Square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table)

# Output the results
print(f"Chi-square Statistic: {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies Table:")
print(expected)

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between age groups and voter preferences.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between age groups and voter preferences.")


Chi-square Statistic: 0.8780
P-value: 0.6447
Degrees of Freedom: 2
Expected Frequencies Table:
[[96.824 85.176]
 [89.908 79.092]
 [79.268 69.732]]
Fail to reject the null hypothesis: There is no significant association between age groups and voter preferences.


In [34]:
#22
import numpy as np
from scipy import stats

# Sample data: Product satisfaction levels (rows) vs. Customer regions (columns)
data = np.array([[50, 30, 40, 20], [30, 40, 30, 50], [20, 30, 40, 30]])

# Perform Chi-Square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(data)

# Output the results
print(f"Chi-square Statistic: {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies Table:")
print(expected)

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant relationship between product satisfaction levels and customer regions.")
else:
    print("Fail to reject the null hypothesis: There is no significant relationship between product satisfaction levels and customer regions.")


Chi-square Statistic: 27.7771
P-value: 0.0001
Degrees of Freedom: 6
Expected Frequencies Table:
[[34.14634146 34.14634146 37.56097561 34.14634146]
 [36.58536585 36.58536585 40.24390244 36.58536585]
 [29.26829268 29.26829268 32.19512195 29.26829268]]
Reject the null hypothesis: There is a significant relationship between product satisfaction levels and customer regions.


In [35]:
#23
import numpy as np
from scipy import stats

# Sample data: Job performance levels before (rows) and after (columns) training
data = np.array([[50, 30, 20], [30, 40, 30], [20, 30, 40]])

# Perform Chi-Square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(data)

# Output the results
print(f"Chi-square Statistic: {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies Table:")
print(expected)

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference in job performance levels before and after the training.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in job performance levels before and after the training.")


Chi-square Statistic: 22.1617
P-value: 0.0002
Degrees of Freedom: 4
Expected Frequencies Table:
[[34.48275862 34.48275862 31.03448276]
 [34.48275862 34.48275862 31.03448276]
 [31.03448276 31.03448276 27.93103448]]
Reject the null hypothesis: There is a significant difference in job performance levels before and after the training.
