In [1]:
import zipfile
import pandas as pd

# Define the path to the uploaded file
zip_path = "/content/archive (4).zip"
extract_path = "/mnt/data/"

# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
    file_names = zip_ref.namelist()  # Get the list of extracted files

# Display the extracted file names
file_names


['supermarket_sales -.csv']

In [2]:
# Load the dataset
file_path = extract_path + "supermarket_sales -.csv"
df = pd.read_csv(file_path)

# Display the first few rows to inspect the data structure
df.head()


Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Tax 5%,Total,Date,Time,Payment,cogs,gross margin percentage,gross income,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,1/5/2019,13:08,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,3/8/2019,10:29,Cash,76.4,4.761905,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,3/3/2019,13:23,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,1/27/2019,20:33,Ewallet,465.76,4.761905,23.288,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2/8/2019,10:37,Ewallet,604.17,4.761905,30.2085,5.3


In [3]:
# Define Events A and B
total_transactions = len(df)

# Event A: Transaction by a Member customer
count_A = len(df[df["Customer type"] == "Member"])
P_A = count_A / total_transactions

# Event B: Transaction using Credit Card payment
count_B = len(df[df["Payment"] == "Credit card"])
P_B = count_B / total_transactions

# Intersection: Transaction by a Member customer using Credit Card
count_A_and_B = len(df[(df["Customer type"] == "Member") & (df["Payment"] == "Credit card")])
P_A_and_B = count_A_and_B / total_transactions

# Union: P(A ∪ B) = P(A) + P(B) - P(A ∩ B)
P_A_union_B = P_A + P_B - P_A_and_B

# Conditional Probability: P(A | B) = P(A ∩ B) / P(B)
P_A_given_B = P_A_and_B / P_B if P_B != 0 else 0

# Probability of a transaction belonging to Health & Beauty product line
count_health_beauty = len(df[df["Product line"] == "Health and beauty"])
P_health_beauty = count_health_beauty / total_transactions

# Probability that given Ewallet payment, more than 5 items were purchased
ewallet_transactions = df[df["Payment"] == "Ewallet"]
count_ewallet_more_than_5 = len(ewallet_transactions[ewallet_transactions["Quantity"] > 5])
P_more_than_5_given_ewallet = count_ewallet_more_than_5 / len(ewallet_transactions) if len(ewallet_transactions) != 0 else 0

# Probability that a randomly selected transaction from Yangon used Cash
yangon_transactions = df[df["City"] == "Yangon"]
count_yangon_cash = len(yangon_transactions[yangon_transactions["Payment"] == "Cash"])
P_yangon_cash = count_yangon_cash / len(yangon_transactions) if len(yangon_transactions) != 0 else 0

# Probability of selecting a Member who gave a rating above 8
count_member_high_rating = len(df[(df["Customer type"] == "Member") & (df["Rating"] > 8)])
P_member_high_rating = count_member_high_rating / total_transactions

# Store results
probabilities = {
    "P(A) - Member Transaction": P_A,
    "P(B) - Credit Card Payment": P_B,
    "P(A ∪ B) - Member or Credit Card": P_A_union_B,
    "P(A ∩ B) - Member and Credit Card": P_A_and_B,
    "P(A | B) - Member given Credit Card": P_A_given_B,
    "P(Health & Beauty)": P_health_beauty,
    "P(More than 5 | Ewallet)": P_more_than_5_given_ewallet,
    "P(Cash | Yangon)": P_yangon_cash,
    "P(Member & Rating > 8)": P_member_high_rating
}

probabilities


{'P(A) - Member Transaction': 0.501,
 'P(B) - Credit Card Payment': 0.311,
 'P(A ∪ B) - Member or Credit Card': 0.6400000000000001,
 'P(A ∩ B) - Member and Credit Card': 0.172,
 'P(A | B) - Member given Credit Card': 0.5530546623794211,
 'P(Health & Beauty)': 0.152,
 'P(More than 5 | Ewallet)': 0.48695652173913045,
 'P(Cash | Yangon)': 0.3235294117647059,
 'P(Member & Rating > 8)': 0.157}

In [4]:
from math import factorial

# Function for permutations
def permutations(n, r):
    return factorial(n) // factorial(n - r)

# Function for combinations
def combinations(n, r):
    return factorial(n) // (factorial(r) * factorial(n - r))

# Permutation: Arranging 5 transactions out of 20
perm_5_of_20 = permutations(20, 5)

# Combination: Selecting 3 product lines from 6
comb_3_of_6 = combinations(6, 3)

# Factorial: Number of different branches (A, B, C) -> 3!
fact_branches = factorial(3)

# Supermarket wants to display 4 payment methods from 3 types (impossible, so 0)
comb_4_of_3 = combinations(3, 4) if 4 <= 3 else 0

# Selecting 5 employees from 15 for a survey
comb_5_of_15 = combinations(15, 5)

# Unique invoice codes using 6 letters (26 choices)
perm_6_letters = permutations(26, 6)

# Store results
combinatorics_results = {
    "Permutation (5 from 20)": perm_5_of_20,
    "Combination (3 from 6)": comb_3_of_6,
    "Factorial (Branches)": fact_branches,
    "Combination (4 from 3, not possible)": comb_4_of_3,
    "Combination (5 from 15)": comb_5_of_15,
    "Permutation (6 letters from 26)": perm_6_letters
}

combinatorics_results


{'Permutation (5 from 20)': 1860480,
 'Combination (3 from 6)': 20,
 'Factorial (Branches)': 6,
 'Combination (4 from 3, not possible)': 0,
 'Combination (5 from 15)': 3003,
 'Permutation (6 letters from 26)': 165765600}

In [6]:
from math import factorial

# Function for permutations
def permutations(n, r):
    return factorial(n) // factorial(n - r)

# Function for combinations
def combinations(n, r):
    return factorial(n) // (factorial(r) * factorial(n - r))

# Permutation: Arranging 5 transactions out of 20
perm_5_of_20 = permutations(20, 5)

# Combination: Selecting 3 product lines from 6
comb_3_of_6 = combinations(6, 3)

# Factorial: Number of different branches (A, B, C) -> 3!
fact_branches = factorial(3)

# Supermarket wants to display 4 payment methods from 3 types (not possible)
comb_4_of_3 = combinations(3, 4) if 4 <= 3 else 0

# Selecting 5 employees from 15 for a survey
comb_5_of_15 = combinations(15, 5)

# Unique invoice codes using 6 letters (26 choices)
perm_6_letters = permutations(26, 6)

# Print results
print("Permutation (5 from 20):", perm_5_of_20)
print("Combination (3 from 6):", comb_3_of_6)
print("Factorial (Branches):", fact_branches)
print("Combination (4 from 3, not possible):", comb_4_of_3)
print("Combination (5 from 15):", comb_5_of_15)
print("Permutation (6 letters from 26):", perm_6_letters)


Permutation (5 from 20): 1860480
Combination (3 from 6): 20
Factorial (Branches): 6
Combination (4 from 3, not possible): 0
Combination (5 from 15): 3003
Permutation (6 letters from 26): 165765600
