In [4]:
#Load the Supermarket Sales Dataset, which includes attributes such as Invoice ID, Branch, City, Customer Type, Gender, Product Line, Unit Price, Quantity, Payment Method, and Rating.
# use the file supermarket_sales

import pandas as pd

# Assuming the file is named 'supermarket_sales.csv' and is in the current directory
try:
  df = pd.read_csv('supermarket_sales.csv')
  print("Successfully loaded the dataset.")
  # Print some info about the dataset
  print(df.head())
  print(df.info())
except FileNotFoundError:
  print("Error: 'supermarket_sales.csv' not found. Please ensure the file exists in the current directory or provide the correct path.")
except Exception as e:
  print(f"An error occurred: {e}")


Successfully loaded the dataset.
    Invoice ID Branch       City Customer type  Gender  \
0  750-67-8428      A     Yangon        Member  Female   
1  226-31-3081      C  Naypyitaw        Normal  Female   
2  631-41-3108      A     Yangon        Normal    Male   
3  123-19-1176      A     Yangon        Member    Male   
4  373-73-7910      A     Yangon        Normal    Male   

             Product line  Unit price  Quantity   Tax 5%     Total       Date  \
0       Health and beauty       74.69         7  26.1415  548.9715   1/5/2019   
1  Electronic accessories       15.28         5   3.8200   80.2200   3/8/2019   
2      Home and lifestyle       46.33         7  16.2155  340.5255   3/3/2019   
3       Health and beauty       58.22         8  23.2880  489.0480  1/27/2019   
4       Sports and travel       86.31         7  30.2085  634.3785   2/8/2019   

    Time      Payment    cogs  gross margin percentage  gross income  Rating  
0  13:08      Ewallet  522.83                 4.7619

In [5]:
# Define two events based on the dataset features. Examples:
# Event A: A transaction is made by a Member customer.
# Event B: A transaction was made using Credit Card payment.

# Define events based on dataset features
def event_a(row):
  """Checks if a transaction was made by a member customer."""
  return row['Customer type'] == 'Member'

def event_b(row):
  """Checks if a transaction was made using Credit Card payment."""
  return row['Payment'] == 'Credit card'

# Apply the functions to create new columns indicating the events
try:
  df['Event A'] = df.apply(event_a, axis=1)
  df['Event B'] = df.apply(event_b, axis=1)

  # Print some examples to verify the new columns
  print(df[['Customer type', 'Payment', 'Event A', 'Event B']].head())

except KeyError as e:
    print(f"Error: Column '{e}' not found in the DataFrame. Please ensure the file contains the required columns.")
except Exception as e:
    print(f"An error occurred: {e}")


  Customer type      Payment  Event A  Event B
0        Member      Ewallet     True    False
1        Normal         Cash    False    False
2        Normal  Credit card    False     True
3        Member      Ewallet     True    False
4        Normal      Ewallet    False    False


In [6]:
# Write a Python program to:
# Compute P(A), P(B), P(A ∪ B), P(A ∩ B), and P(A | B).
# Use the formulae:
# Union: P(A ∪ B) = P(A) + P(B) - P(A ∩ B)
# Intersection: P(A ∩ B) = count(A and B) / total transactions
# Conditional: P(A | B) = P(A ∩ B) / P(B)

# Calculate probabilities
total_transactions = len(df)
count_a = df['Event A'].sum()
count_b = df['Event B'].sum()
count_a_and_b = len(df[(df['Event A'] == True) & (df['Event B'] == True)])

p_a = count_a / total_transactions
p_b = count_b / total_transactions
p_a_and_b = count_a_and_b / total_transactions
p_a_union_b = p_a + p_b - p_a_and_b

if p_b > 0 :
  p_a_given_b = p_a_and_b / p_b
else:
  p_a_given_b = 0 # Handle the case where P(B) is zero to avoid division by zero


# Print the results
print(f"P(A): {p_a}")
print(f"P(B): {p_b}")
print(f"P(A ∩ B): {p_a_and_b}")
print(f"P(A ∪ B): {p_a_union_b}")
print(f"P(A | B): {p_a_given_b}")


P(A): 0.501
P(B): 0.311
P(A ∩ B): 0.172
P(A ∪ B): 0.6400000000000001
P(A | B): 0.5530546623794211


In [7]:
# What is the probability that a randomly chosen transaction belongs to the Health & Beauty product line?

# Calculate the probability of a transaction belonging to the Health & Beauty product line.
try:
    health_beauty_count = len(df[df['Product line'] == 'Health and beauty'])
    p_health_beauty = health_beauty_count / total_transactions
    print(f"P(Health & Beauty): {p_health_beauty}")
except KeyError as e:
    print(f"Error: Column '{e}' not found in the DataFrame. Please ensure the file contains the required columns.")
except Exception as e:
    print(f"An error occurred: {e}")


P(Health & Beauty): 0.152


In [8]:
# Given that a customer used Ewallet, what is the probability that they purchased more than 5 items?

# Calculate the probability that a customer purchased more than 5 items given they used Ewallet.

try:
    # Filter transactions where payment method is Ewallet
    ewallet_transactions = df[df['Payment'] == 'Ewallet']

    # Count transactions with more than 5 items and Ewallet payment
    more_than_5_and_ewallet = len(ewallet_transactions[ewallet_transactions['Quantity'] > 5])

    # Calculate the probability
    if len(ewallet_transactions) > 0:
        p_more_than_5_given_ewallet = more_than_5_and_ewallet / len(ewallet_transactions)
        print(f"P(Quantity > 5 | Payment = Ewallet): {p_more_than_5_given_ewallet}")
    else:
        print("No transactions found with Ewallet payment method.")

except KeyError as e:
    print(f"Error: Column '{e}' not found in the DataFrame. Please ensure the file contains the required columns.")
except Exception as e:
    print(f"An error occurred: {e}")


P(Quantity > 5 | Payment = Ewallet): 0.48695652173913045


In [9]:
# What is the probability that a randomly selected transaction from Yangon used Cash as the payment method?

# Calculate the probability that a randomly selected transaction from Yangon used Cash as the payment method.
try:
    yangon_transactions = df[df['City'] == 'Yangon']
    yangon_cash_transactions = yangon_transactions[yangon_transactions['Payment'] == 'Cash']

    if len(yangon_transactions) > 0:
        p_yangon_cash = len(yangon_cash_transactions) / len(yangon_transactions)
        print(f"P(Cash | Yangon): {p_yangon_cash}")
    else:
        print("No transactions found for Yangon.")
except KeyError as e:
    print(f"Error: Column '{e}' not found in the DataFrame.")
except Exception as e:
    print(f"An error occurred: {e}")


P(Cash | Yangon): 0.3235294117647059


In [10]:
# Compute the probability of selecting a customer who is a Member and has given a rating above 8.

# Calculate the probability of selecting a customer who is a Member and has given a rating above 8.
try:
    member_high_rating = len(df[(df['Customer type'] == 'Member') & (df['Rating'] > 8)])
    total_customers = len(df)
    probability = member_high_rating / total_customers
    print(f"Probability of a Member with rating > 8: {probability}")
except KeyError as e:
    print(f"Error: Column '{e}' not found in the DataFrame.")
except Exception as e:
    print(f"An error occurred: {e}")


Probability of a Member with rating > 8: 0.157


In [11]:
# Implement functions in Python to compute:
# Factorial of a number.
# Permutations: P(n, r) = n! / (n-r)!
# Combinations: C(n, r) = n! / (r!(n-r)!)

def factorial(n):
  """Computes the factorial of a non-negative integer."""
  if n < 0:
    return "Factorial is not defined for negative numbers."
  elif n == 0:
    return 1
  else:
    fact = 1
    for i in range(1, n + 1):
      fact *= i
    return fact

def permutations(n, r):
  """Computes the number of permutations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
    return "Invalid input for permutations."
  else:
    return factorial(n) // factorial(n - r)

def combinations(n, r):
  """Computes the number of combinations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
    return "Invalid input for combinations."
  else:
    return factorial(n) // (factorial(r) * factorial(n - r))


In [12]:
# Use your functions to solve dataset-based scenarios.
# Permutation: In how many different ways can 5 different transactions be arranged from a day's sales of 20 transactions?
# Combination: How many ways can 3 different product lines be selected from the 6 available product categories?
# Factorial: Compute the factorial of the total number of different branches in the dataset.

# Calculate Permutation: In how many different ways can 5 different transactions be arranged from a day's sales of 20 transactions?
n_transactions = 20
r_transactions = 5
permutation_result = permutations(n_transactions, r_transactions)
print(f"Permutation of transactions: {permutation_result}")

# Calculate Combination: How many ways can 3 different product lines be selected from the 6 available product categories?
n_product_lines = 6
r_product_lines = 3
combination_result = combinations(n_product_lines, r_product_lines)
print(f"Combination of product lines: {combination_result}")

# Calculate Factorial: Compute the factorial of the total number of different branches in the dataset.
try:
  unique_branches = df['Branch'].nunique()
  factorial_branches = factorial(unique_branches)
  print(f"Factorial of unique branches: {factorial_branches}")
except KeyError as e:
  print(f"Error: Column '{e}' not found in the DataFrame.")
except Exception as e:
  print(f"An error occurred: {e}")


Permutation of transactions: 1860480
Combination of product lines: 20
Factorial of unique branches: 6


In [13]:
# If the supermarket wants to display 4 different payment methods on a promotion banner from the available 3 types, in how many ways can this be done?

def combinations(n, r):
  """Computes the number of combinations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
    return "Invalid input for combinations."
  else:
    return factorial(n) // (factorial(r) * factorial(n - r))

def factorial(n):
  """Computes the factorial of a non-negative integer."""
  if n < 0:
    return "Factorial is not defined for negative numbers."
  elif n == 0:
    return 1
  else:
    fact = 1
    for i in range(1, n + 1):
      fact *= i
    return fact

# Number of ways to choose 4 payment methods from 3 available types.
# Since we are choosing 4 from 3 and replacement is implied (we can choose the same method multiple times), this is a combination with repetition problem.
n = 3  # Number of payment method types
r = 4  # Number of payment methods to display

# Calculate combinations with repetition using the formula: (n + r - 1) C r
ways = combinations(n + r - 1, r)
print(f"The number of ways to display 4 payment methods from 3 types is: {ways}")


The number of ways to display 4 payment methods from 3 types is: 15


In [14]:
# In how many different ways can 5 employees be selected to participate in a customer feedback survey from a total of 15 employees?

def combinations(n, r):
  """Computes the number of combinations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
    return "Invalid input for combinations."
  else:
    return factorial(n) // (factorial(r) * factorial(n - r))

def factorial(n):
  """Computes the factorial of a non-negative integer."""
  if n < 0:
    return "Factorial is not defined for negative numbers."
  elif n == 0:
    return 1
  else:
    fact = 1
    for i in range(1, n + 1):
      fact *= i
    return fact

# Calculate the number of ways to choose 5 employees from 15
n = 15  # Total number of employees
r = 5   # Number of employees to select

ways = combinations(n, r)
print(f"The number of ways to select 5 employees from 15 is: {ways}")


The number of ways to select 5 employees from 15 is: 3003


In [15]:
# If the supermarket wants to create unique invoice codes using 6 letters from the English alphabet, in how many ways can this be achieved?

import math

# Number of ways to create unique invoice codes using 6 letters from the English alphabet
num_letters = 26  # There are 26 letters in the English alphabet
code_length = 6    # Each invoice code has 6 letters

# Using permutations since the order of letters matters for a unique code
num_ways = math.pow(num_letters, code_length)

print(f"The number of ways to create unique invoice codes is: {int(num_ways)}")


The number of ways to create unique invoice codes is: 308915776
