# ðŸ”¹ 1. Test if two product sales are significantly different (t-test)

In [2]:
import numpy as np
from scipy import stats

# Simulated sales data (units sold per week for 10 weeks)
product_A = np.array([120, 115, 123, 130, 128, 119, 117, 121, 125, 118])
product_B = np.array([110, 108, 112, 115, 113, 109, 111, 114, 116, 107])

print("Mean sales - Product A:", product_A.mean())
print("Mean sales - Product B:", product_B.mean())

# Independent two-sample t-test
t_stat, p_val = stats.ttest_ind(product_A, product_B, equal_var=False)

print("\nT-test results:")
print("t-statistic = %.3f, p-value = %.4f" % (t_stat, p_val))

if p_val < 0.05:
    print("Significant difference in sales between products.")
else:
    print("No significant difference in sales between products.")


Mean sales - Product A: 121.6
Mean sales - Product B: 111.5

T-test results:
t-statistic = 5.580, p-value = 0.0001
Significant difference in sales between products.


#### ðŸ‘‰ Interpretation:

- If p < 0.05, we conclude sales differ significantly.

- If p â‰¥ 0.05, we say thereâ€™s no strong evidence of difference.

# ðŸ”¹ 2. Analyze survey data: Does gender affect purchase behavior? (Chi-square test)

In [4]:
import pandas as pd
from scipy.stats import chi2_contingency

# Survey data: Gender Ã— Purchase Behavior
# Rows = Gender (Male/Female)
# Cols = Purchase (Yes/No)
data = [[50, 30],  # Male: 50 Yes, 30 No
        [70, 20]]  # Female: 70 Yes, 20 No

# Make into a DataFrame for clarity
df = pd.DataFrame(data, 
                  index=["Male", "Female"], 
                  columns=["Purchase_Yes", "Purchase_No"])

print("Survey Contingency Table:\n")
print(df)

# Chi-square test of independence
chi2, p, dof, expected = chi2_contingency(df)

print("\nChi-square results:")
print("Chi2 = %.3f, p-value = %.4f, dof = %d" % (chi2, p, dof))
print("Expected frequencies:\n", expected)

if p < 0.05:
    print("Gender significantly affects purchase behavior.")
else:
    print("No significant effect of gender on purchase behavior.")


Survey Contingency Table:

        Purchase_Yes  Purchase_No
Male              50           30
Female            70           20

Chi-square results:
Chi2 = 4.054, p-value = 0.0441, dof = 1
Expected frequencies:
 [[56.47058824 23.52941176]
 [63.52941176 26.47058824]]
Gender significantly affects purchase behavior.


#### ðŸ‘‰ Interpretation:

- If p < 0.05, gender and purchase behavior are not independent (gender matters).

- If p â‰¥ 0.05, no significant effect of gender on purchase behavior.

# ðŸ”¹ 3: Training Effect on Test Scores (Paired t-test)

In [5]:
from scipy import stats
import numpy as np

before = np.array([60, 65, 58, 62, 61, 59, 63, 64])
after  = np.array([68, 70, 65, 72, 69, 66, 71, 74])

t_stat, p_val = stats.ttest_rel(after, before)
print("t=%.3f, p=%.4f" % (t_stat, p_val))


t=13.564, p=0.0000


# ðŸ”¹4: Marketing Campaign Effectiveness (Two-sample t-test)

In [6]:
campaign_A = np.array([12, 15, 14, 16, 13, 17])
campaign_B = np.array([10, 9, 11, 12, 8, 9])

t_stat, p_val = stats.ttest_ind(campaign_A, campaign_B, equal_var=False)
print("t=%.3f, p=%.4f" % (t_stat, p_val))


t=4.802, p=0.0008


# ðŸ”¹5: Customer Satisfaction by Store (ANOVA)

In [7]:
from scipy import stats

store1 = [4, 5, 4, 3, 4]
store2 = [2, 3, 2, 3, 2]
store3 = [5, 4, 5, 4, 5]

f_stat, p_val = stats.f_oneway(store1, store2, store3)
print("F=%.3f, p=%.4f" % (f_stat, p_val))


F=17.636, p=0.0003


# ðŸ”¹6: Dice Fairness (Chi-square Goodness of Fit)

In [8]:
import numpy as np
from scipy.stats import chisquare

observed = np.array([8, 12, 10, 11, 9, 10])  # observed counts
expected = np.array([10, 10, 10, 10, 10, 10])  # fair die

chi2, p = chisquare(observed, expected)
print("Chi2=%.3f, p=%.4f" % (chi2, p))


Chi2=1.000, p=0.9626


# ðŸ”¹7: Titanic Dataset (Chi-square Test)

In [9]:
import seaborn as sns
import pandas as pd
from scipy.stats import chi2_contingency

titanic = sns.load_dataset("titanic")
table = pd.crosstab(titanic["sex"], titanic["survived"])

chi2, p, dof, expected = chi2_contingency(table)
print("Chi2=%.3f, p=%.4f" % (chi2, p))


Chi2=260.717, p=0.0000


# ðŸ”¹8: Confidence Interval for Average Height

In [10]:
import numpy as np
import scipy.stats as stats

heights = np.array([160, 165, 170, 175, 168, 172, 169, 174])
mean = np.mean(heights)
sem = stats.sem(heights)
ci = stats.t.interval(0.95, len(heights)-1, loc=mean, scale=sem)
print("Mean=%.2f, 95%% CI=%s" % (mean, ci))


Mean=169.12, 95% CI=(np.float64(165.01869875358156), np.float64(173.23130124641844))
