In [1]:
import pandas as pd
from math import factorial

In [2]:
# Load the dataset
dataset_path = "supermarket_sales -.csv"
df = pd.read_csv(dataset_path)

In [3]:
# Total transactions
total_transactions = len(df)

In [4]:
# Define events
A = df['Customer type'] == 'Member'  # Event A: Member customer
B = df['Payment'] == 'Credit card'   # Event B: Credit card payment

In [12]:
# Compute probabilities
P_A = A.sum() / total_transactions
P_B = B.sum() / total_transactions
P_A_inter_B = (A & B).sum() / total_transactions
P_A_union_B = P_A + P_B - P_A_inter_B
P_A_given_B = P_A_inter_B / P_B
print(f"P(A): {P_A}")
print(f"P(B): {P_B}")
print(f"P(A ∪ B): {P_A_union_B}")
print(f"P(A ∩ B): {P_A_inter_B}")
print(f"P(A | B): {P_A_given_B}")

P(A): 0.501
P(B): 0.311
P(A ∪ B): 0.6400000000000001
P(A ∩ B): 0.172
P(A | B): 0.5530546623794211


In [16]:
P_health_beauty = (df['Product line'] == 'Health and beauty').sum() / total_transactions
print(f"P(Health & Beauty): {P_health_beauty}")

P(Health & Beauty): 0.152


In [17]:
P_more_than_5_given_ewallet = ((df['Quantity'] > 5) & (df['Payment'] == 'Ewallet')).sum() / (df['Payment'] == 'Ewallet').sum()
print(f"P(More than 5 | Ewallet): {P_more_than_5_given_ewallet}")

P(More than 5 | Ewallet): 0.48695652173913045


In [18]:
P_cash_given_yangon = ((df['Branch'] == 'A') & (df['Payment'] == 'Cash')).sum() / (df['Branch'] == 'A').sum()
print(f"P(Cash | Yangon): {P_cash_given_yangon}")

P(Cash | Yangon): 0.3235294117647059


In [19]:
P_member_rating_above_8 = ((df['Customer type'] == 'Member') & (df['Rating'] > 8)).sum() / total_transactions
print(f"P(Member & Rating > 8): {P_member_rating_above_8}")

P(Member & Rating > 8): 0.157


In [7]:
# Functions for factorial, permutations, and combinations
def permutations(n, r):
    return factorial(n) // factorial(n - r)

In [8]:
def combinations(n, r):
    return factorial(n) // (factorial(r) * factorial(n - r)) if r <= n else 0

In [20]:
# Compute combinatorial values
perm_5_of_20 = permutations(20, 5)
comb_3_of_6 = combinations(6, 3)
fact_branches = factorial(df['Branch'].nunique())
print(f"Permutation (5 of 20): {perm_5_of_20}")
print(f"Combination (3 of 6): {comb_3_of_6}")
print(f"Factorial (Branches): {fact_branches}")

Permutation (5 of 20): 1860480
Combination (3 of 6): 20
Factorial (Branches): 6


In [21]:
comb_5_of_15 = combinations(15, 5)
print(f"Combination (5 of 15): {comb_5_of_15}")

Combination (5 of 15): 3003


In [22]:
perm_invoice_codes = permutations(26, 6)
print(f"Permutation (6-letter Invoice): {perm_invoice_codes}")

Permutation (6-letter Invoice): 165765600
