In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
sales_df = pd.read_csv('/content/gdrive/MyDrive/Hackniche24/sales.csv')
sales_df.head()

Unnamed: 0,Date,Timestamp,Invoice No.,Payment Type,Order Type,Area,Item Name,Price,Qty.,Sub Total,...,Assign To,Non Taxable,CGST Rate,CGST Amount,SGST Rate,SGST Amount,VAT Rate,VAT Amount,Service Charge Rate,Service Charge Amount
0,2024-01-22,2024-01-22 23:57:19,21006,CARD,Dine In,Dining,Aeropress,247.62,1,247.62,...,,0.0,2.5,6.19,2.5,6.19,10,24.76,,
1,2024-01-22,2024-01-22 23:57:19,21006,CARD,Dine In,Dining,Iced Americano (350 ML),176.19,1,176.19,...,,0.0,2.5,4.4,2.5,4.4,10,17.62,,
2,2024-01-22,2024-01-22 23:57:19,21006,CARD,Dine In,Dining,Add On Syrup (Add On Tiramisu Syrup),47.62,1,47.62,...,,0.0,2.5,1.19,2.5,1.19,10,4.76,,
3,2024-01-22,2024-01-22 23:57:19,21006,CARD,Dine In,Dining,Baked Vada Pav,76.19,2,152.38,...,,0.0,2.5,3.81,2.5,3.81,10,15.24,,
4,2024-01-22,2024-01-22 23:43:28,21005,CARD,Dine In,Dining,South Indian Filter Kaapi (250 ML),176.19,2,352.38,...,NARESH RATHOD,0.0,2.5,8.81,2.5,8.81,10,35.24,,


In [4]:
sales_df.shape

(686, 34)

In [5]:
sales_df.columns

Index(['Date', 'Timestamp', 'Invoice No.', 'Payment Type', 'Order Type',
       'Area', 'Item Name', 'Price', 'Qty.', 'Sub Total', 'Discount', 'Tax',
       'Final Total', 'Status', 'Table No.', 'Server Name', 'Covers',
       'Variation', 'Category', 'HSN', 'Phone', 'Name', 'Address', 'GST',
       'Assign To', 'Non Taxable', 'CGST Rate', 'CGST Amount', 'SGST Rate',
       'SGST Amount', 'VAT Rate', 'VAT Amount', 'Service Charge Rate',
       'Service Charge Amount'],
      dtype='object')

In [6]:
sales_df['Category'].unique()

array(['Manual Brew', 'Cold Coffee', 'Extra Toppings', 'Food Menu',
       'Hot Coffee', 'Hot Chocolate', 'Milk', 'Savouries', 'Sweet',
       'SEASONAL MENU', 'Coffee Coolers'], dtype=object)

In [7]:
for index, row in sales_df.iterrows():
    # Access row data
    invoice_no = row['Invoice No.']
    if (invoice_no == '20746'):
      item = row['Item Name']
      # Print or process row data
      print("Invoice Number:", invoice_no)
      print("Item Bought:", item)
      print()
    # break

Invoice Number: 20746
Item Bought: Papparoti (Add On Nutella sauce)

Invoice Number: 20746
Item Bought: Baked Pav Bhaji

Invoice Number: 20746
Item Bought: South Indian Filter Kaapi (250 ML)



In [9]:
import re
def remove_parentheses(df):
    # Define a regular expression pattern to match parentheses and their contents
    pattern = re.compile(r'\s*\([^)]*\)\s*')

    # Apply the regular expression pattern to remove parentheses and their contents from each string in the 'Transaction' column
    df['Transaction'] = df['Transaction'].apply(lambda x: [re.sub(pattern, '', item) for item in x])

    return df

In [10]:
import re

def post_process_columns(df):

    # Define a regular expression pattern to match parentheses and their contents
    pattern = re.compile(r'\([^)]*\)')

    # Apply the regular expression pattern to remove parentheses and their contents from each cell in the DataFrame
    df = df.applymap(lambda x: re.sub(pattern, '', str(x)))

    # Remove parentheses and their contents from column names
    df.columns = [re.sub(r'\([^)]*\)', '', col).strip() for col in df.columns]

    # Remove `Add On` from end
    df.columns = [col.replace(' Add On', '') for col in df.columns]

    # Remove `y Gm` from end
    df.columns = [re.sub(r'\s*\d+\s*Gm$', '', col) for col in df.columns]

    # Remove `y Pc` from end
    df.columns = [re.sub(r'\s*\d+\s*Pc(s)?$', '', col) for col in df.columns]

    # Remove word `With` and after `With`
    df.columns = [re.sub(r'\s*With.*', '', col) for col in df.columns]

    return df


In [11]:
import pandas as pd

def create_transaction_df(df,col_name):
    # Group items by Invoice_no and aggregate them into lists
    transaction_df = df.groupby('Invoice No.')[col_name].apply(list).reset_index()
    # Rename the columns
    transaction_df.columns = ['Transaction_id', 'Transaction']
    return transaction_df

# Apply the function
item_df = create_transaction_df(sales_df,'Item Name')
cleaned_item_df = post_process_columns(item_df)
print(cleaned_item_df)


    Transaction_id                                        Transaction
0            20744                                ['Classic Frappe ']
1            20745                                      ['Cold Brew']
2            20746  ['Papparoti ', 'Baked Pav Bhaji', 'South India...
3            20747  ['Cappucino ', 'Origanal South Indian Frappe '...
4            20748                                    ['Vietnamese ']
..             ...                                                ...
297          C2311              ['Baked Pav Bhaji', 'Baked Vada Pav']
298          C2312                  ['Origanal South Indian Frappe ']
299          C2313  ['Rosella Jam With Filter Coffee Ganache Macar...
300          C2314  ['Baked Pav Bhaji', 'Hyderabadi Chicken Keema ...
301          C2315  ['Rosella Jam With Filter Coffee Ganache Macar...

[302 rows x 2 columns]


In [12]:
cleaned_item_df.to_csv('/content/gdrive/MyDrive/Hackniche24/cleaned_item_df.csv', index=False)

In [13]:
cat_df = create_transaction_df(sales_df,'Category')
print(cat_df)

    Transaction_id                                        Transaction
0            20744                                      [Cold Coffee]
1            20745                                      [Manual Brew]
2            20746                     [Sweet, Food Menu, Hot Coffee]
3            20747  [Hot Coffee, Cold Coffee, Hot Coffee, Hot Choc...
4            20748                                      [Cold Coffee]
..             ...                                                ...
297          C2311                             [Food Menu, Food Menu]
298          C2312                                      [Cold Coffee]
299          C2313                     [SEASONAL MENU, SEASONAL MENU]
300          C2314     [Food Menu, Food Menu, Hot Coffee, Hot Coffee]
301          C2315                                    [SEASONAL MENU]

[302 rows x 2 columns]


In [14]:
def get_nominal_data(col_name):
  items = list(sales_df[col_name].unique())
  grouped = sales_df.groupby('Invoice No.')
  transaction_level = grouped.aggregate(lambda x: tuple(x)).reset_index()[['Invoice No.',col_name]]
  transaction_dict = {item:0 for item in items}
  output_dict = dict()
  temp = dict()
  for rec in transaction_level.to_dict('records'):
      invoice_num = rec['Invoice No.']
      items_list = rec[col_name]
      transaction_dict = {item:0 for item in items}
      transaction_dict.update({item:1 for item in items if item in items_list})
      temp.update({invoice_num:transaction_dict})

  new = [v for k,v in temp.items()]
  transaction_df2 = pd.DataFrame(new)
  return transaction_df2

In [15]:
item_nominal = get_nominal_data('Item Name')
item_nominal = post_process_columns(item_nominal)
item_nominal = item_nominal.astype(str).replace({'1':True,'0':False})
item_nominal.head()

Unnamed: 0,Aeropress,Iced Americano,Add On Syrup,Baked Vada Pav,South Indian Filter Kaapi,Origanal South Indian Frappe,Origanal South Indian Frappe.1,Spicy Banana Chips,Classic Frappe,Cold Brew,...,Almond Frappe,Madagascar Hot Chocolate,Coconut Nankhatai,Irish Americano,Hazelnut Frappe,Kaapicino,Calzone Mix,Cashew Nuts Nankhatai Veg,ALMOND MILK,Cafe Mocha
0,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [16]:
item_nominal.shape

(302, 113)

In [17]:
item_nominal.columns.unique()

Index(['Aeropress', 'Iced Americano', 'Add On Syrup', 'Baked Vada Pav',
       'South Indian Filter Kaapi', 'Origanal South Indian Frappe',
       'Spicy Banana Chips', 'Classic Frappe', 'Cold Brew', 'Berliners',
       'Cafe Latte', 'Cappucino', 'Almond Frappe', 'Madagascar Hot Chocolate',
       'Sea Salt Dark Mocha', 'ALMOND MILK', 'Whole Wheat Ladi Pav',
       'Madagascar Chocochip Frappe', 'Caramel Frappe', 'Chicken Calzone',
       'Mix Tartlet', 'Bon Bon', 'Almond Honey Latte', 'Hazelnut Frappe',
       'Papparoti', 'Nariyal Irish Cream Frappe', 'Kaapicino',
       'Choco-crinkle-cookies', 'Rosella Jam', 'Hyderabadi Soya Keema Pav',
       'Yellow Banana Chips', 'Vietnamese', 'Iced Latte', 'Espresso Tonic',
       'OAT MILK', 'Baked Pav Bhaji', 'Hyderabadi Chicken Keema Pav',
       'Americano', 'Calzones Veg', 'Mix Berliner', 'Hot Chocolate',
       'Strawberry White Chocolate Ganache', 'White Loaf Bread',
       'Flat White 250 Ml', 'Cafe Mocha', 'Tartlets', 'Berliner Mix',
 

In [18]:
cat_nomial = get_nominal_data('Category')
cat_nominal = cat_nomial.astype(str).replace({'1':True,'0':False})
cat_nominal.head()

Unnamed: 0,Manual Brew,Cold Coffee,Extra Toppings,Food Menu,Hot Coffee,Hot Chocolate,Milk,Savouries,Sweet,SEASONAL MENU,Coffee Coolers
0,False,True,False,False,False,False,False,False,False,False,False
1,True,False,False,False,False,False,False,False,False,False,False
2,False,False,False,True,True,False,False,False,True,False,False
3,False,True,False,True,True,True,False,False,False,True,False
4,False,True,False,False,False,False,False,False,False,False,False


In [21]:
import mlxtend
from mlxtend.frequent_patterns import fpgrowth, apriori,association_rules

# Create an FP-Growth object
# support is used to measure the abundance or frequency (often interpreted as significance or importance) of an itemset in a database.
fp = fpgrowth(item_nominal, min_support=0.008, use_colnames=True, verbose=1, max_len=5)

# Filter frequent itemsets to keep only those with length > 1
fp_filtered = fp[fp['itemsets'].apply(lambda x: len(x)) > 1]

association_rules = association_rules(fp_filtered, min_threshold=0.01, support_only=True, metric='confidence')

print("\n\n------------------------------------------\n\n")


# Assuming 'frequent_itemsets_df' is your DataFrame containing the frequent itemsets
for index, row in fp_filtered.iterrows():
    support = row['support']
    itemset = list(row['itemsets'])[0]
    print(f"Frequent Itemset: {itemset}, Support: {support}")

print("\n\n------------------------------------------\n\n")

def print_association_rules(association_rules_df):
    for index, rule in association_rules_df.iterrows():
        antecedents = ', '.join(rule['antecedents'])
        consequents = ', '.join(rule['consequents'])
        support = rule['support']
        confidence = rule['confidence']
        lift = rule['lift']

        print(f"Rule: {antecedents} -> {consequents}")
        # print(f" - Support: {confidence:.4f}")
        print(f" - Confidence: {support:.4f}")
        # print(f" - Lift: {lift:.4f}")
        print()

# Example usage:
print_association_rules(association_rules)



65 itemset(s) from tree conditioned on items ()
0 itemset(s) from tree conditioned on items (Classic Frappe)
0 itemset(s) from tree conditioned on items (Cold Brew)
1 itemset(s) from tree conditioned on items (South Indian Filter Kaapi)
2 itemset(s) from tree conditioned on items (Baked Pav Bhaji)
0 itemset(s) from tree conditioned on items (Baked Pav Bhaji, Baked Vada Pav)
0 itemset(s) from tree conditioned on items (Baked Pav Bhaji, South Indian Filter Kaapi)
0 itemset(s) from tree conditioned on items (Papparoti)
0 itemset(s) from tree conditioned on items (Cappucino)
0 itemset(s) from tree conditioned on items (Hot Chocolate)
0 itemset(s) from tree conditioned on items (Origanal South Indian Frappe)
1 itemset(s) from tree conditioned on items (Rosella Cheesecake Berliner)
0 itemset(s) from tree conditioned on items (Vietnamese)
0 itemset(s) from tree conditioned on items (Cafe Latte)
0 itemset(s) from tree conditioned on items (Add On Syrup)
0 itemset(s) from tree c

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Formatting

In [22]:
frequent_itemsets = []
for index, row in fp_filtered.iterrows():
    support = row['support']
    itemsets = list(row['itemsets'])
    for item in itemsets:
      frequent_itemsets.append({"itemset":item, "support":support})
print(frequent_itemsets)

[{'itemset': 'South Indian Filter Kaapi', 'support': 0.009933774834437087}, {'itemset': 'Origanal South Indian Frappe', 'support': 0.009933774834437087}, {'itemset': 'Baked Vada Pav', 'support': 0.013245033112582781}, {'itemset': 'Baked Pav Bhaji', 'support': 0.013245033112582781}, {'itemset': 'South Indian Filter Kaapi', 'support': 0.013245033112582781}, {'itemset': 'Baked Pav Bhaji', 'support': 0.013245033112582781}, {'itemset': 'Papparoti', 'support': 0.009933774834437087}, {'itemset': 'Rosella Cheesecake Berliner', 'support': 0.009933774834437087}, {'itemset': 'Baked Vada Pav', 'support': 0.009933774834437087}, {'itemset': 'Hyderabadi Chicken Keema Pav', 'support': 0.009933774834437087}, {'itemset': 'Chicken Calzone', 'support': 0.009933774834437087}, {'itemset': 'Almond Honey Latte', 'support': 0.009933774834437087}, {'itemset': 'Baked Vada Pav', 'support': 0.009933774834437087}, {'itemset': 'South Indian Filter Kaapi', 'support': 0.009933774834437087}, {'itemset': 'Cappucino', 's

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [23]:
import random

def generate_random_color():
    # Generate random RGB values
    r = random.randint(0, 255)
    g = random.randint(0, 255)
    b = random.randint(0, 255)
    # Convert RGB values to hexadecimal color code
    color_code = "#{:02x}{:02x}{:02x}".format(r, g, b)
    return color_code

def generate_color_map(itemsets):
    color_map = {}
    for item in itemsets:
        if item['itemset'] not in color_map:
            color_map[item['itemset']] = generate_random_color()
    return color_map

color_map = generate_color_map(frequent_itemsets)
print(color_map)


{'South Indian Filter Kaapi': '#022def', 'Origanal South Indian Frappe': '#b3a73e', 'Baked Vada Pav': '#29b2dd', 'Baked Pav Bhaji': '#66fe15', 'Papparoti': '#6a9896', 'Rosella Cheesecake Berliner': '#d7d64f', 'Hyderabadi Chicken Keema Pav': '#7a77f4', 'Chicken Calzone': '#79c7e6', 'Almond Honey Latte': '#ba8cd3', 'Cappucino': '#293340', 'Kaapicino': '#1dd9dd', 'Iced Americano': '#a9a995', 'Madagascar Chocochip Frappe': '#454e49', 'Calzones Veg': '#dc3f9a', 'Strawberry White Chocolate Ganache': '#87694f', 'Rosella Jam': '#37f9e9'}


  and should_run_async(code)


In [24]:
import json

# Dictionary to store node data
nodes = []

# Assigning color to each node
color_map = generate_color_map(frequent_itemsets)

# Mapping support to node size
min_support = min(item['support'] for item in frequent_itemsets)
max_support = max(item['support'] for item in frequent_itemsets)

for item in frequent_itemsets:
    node = {
        "color": color_map.get(item['itemset'], "#000000"),
        "label": item['itemset'],
        "attributes": {},
        "y": random.randint(-1000,1000),  # You can assign y-coordinate as per your requirement
        "x": random.randint(-1000,1000),  # You can assign x-coordinate as per your requirement
        "id": item['itemset'],  # Unique identifier for the node
        "size": ((item['support'] - min_support) / (max_support - min_support)) + 5
    }
    nodes.append(node)

# Constructing the final dictionary
final_data = {"nodes": nodes}

# Converting to JSON format
# final_json = json.dumps(data, indent=4)
print(final_data)


{'nodes': [{'color': '#fe28e2', 'label': 'South Indian Filter Kaapi', 'attributes': {}, 'y': -321, 'x': 80, 'id': 'South Indian Filter Kaapi', 'size': 5.0}, {'color': '#5d8fe6', 'label': 'Origanal South Indian Frappe', 'attributes': {}, 'y': 852, 'x': 614, 'id': 'Origanal South Indian Frappe', 'size': 5.0}, {'color': '#62a75f', 'label': 'Baked Vada Pav', 'attributes': {}, 'y': -904, 'x': -258, 'id': 'Baked Vada Pav', 'size': 5.166666666666667}, {'color': '#185570', 'label': 'Baked Pav Bhaji', 'attributes': {}, 'y': 487, 'x': -296, 'id': 'Baked Pav Bhaji', 'size': 5.166666666666667}, {'color': '#fe28e2', 'label': 'South Indian Filter Kaapi', 'attributes': {}, 'y': -917, 'x': 128, 'id': 'South Indian Filter Kaapi', 'size': 5.166666666666667}, {'color': '#185570', 'label': 'Baked Pav Bhaji', 'attributes': {}, 'y': 196, 'x': 767, 'id': 'Baked Pav Bhaji', 'size': 5.166666666666667}, {'color': '#1dc2ef', 'label': 'Papparoti', 'attributes': {}, 'y': 204, 'x': -953, 'id': 'Papparoti', 'size': 

  and should_run_async(code)


In [25]:
association_rules = [
    {'antecedent': 'Iced Americano', 'consequent': 'Baked Vada Pav', 'confidence': 0.0166},
    {'antecedent': 'Baked Vada Pav', 'consequent': 'Iced Americano', 'confidence': 0.0166},
    {'antecedent': 'Origanal South Indian Frappe', 'consequent': 'Baked Vada Pav', 'confidence': 0.0132},
    {'antecedent': 'Baked Vada Pav', 'consequent': 'Origanal South Indian Frappe', 'confidence': 0.0132},
    {'antecedent': 'Baked Vada Pav', 'consequent': 'Baked Pav Bhaji', 'confidence': 0.0132},
    {'antecedent': 'Baked Pav Bhaji', 'consequent': 'Baked Vada Pav', 'confidence': 0.0132},
    {'antecedent': 'Cappucino', 'consequent': 'Madagascar Chocochip Frappe', 'confidence': 0.0132},
    {'antecedent': 'Madagascar Chocochip Frappe', 'consequent': 'Cappucino', 'confidence': 0.0132},
    {'antecedent': 'Papparoti', 'consequent': 'Kaapicino', 'confidence': 0.0132},
    {'antecedent': 'Kaapicino', 'consequent': 'Papparoti', 'confidence': 0.0132},
    {'antecedent': 'South Indian Filter Kaapi', 'consequent': 'Papparoti', 'confidence': 0.0132},
    {'antecedent': 'Papparoti', 'consequent': 'South Indian Filter Kaapi', 'confidence': 0.0132},
    {'antecedent': 'Rosella Jam', 'consequent': 'Strawberry White Chocolate Ganache', 'confidence': 0.0298},
    {'antecedent': 'Strawberry White Chocolate Ganache', 'consequent': 'Rosella Jam', 'confidence': 0.0298},
    {'antecedent': 'South Indian Filter Kaapi', 'consequent': 'Baked Pav Bhaji', 'confidence': 0.0132},
    {'antecedent': 'Baked Pav Bhaji', 'consequent': 'South Indian Filter Kaapi', 'confidence': 0.0132}
]

edges = []
for rule in association_rules:
    edges.append({
        "sourceID": rule['antecedent'],
        "targetID": rule['consequent'],
        "size": 1,
        "confidence": rule['confidence'] * 500  # Adjusting confidence for visualization
    })

association_rules_data = {"edges": edges}

# Converting to JSON format
# association_rules_data = json.dumps(association_rules_data, indent=4)
print(association_rules_data["edges"])


[{'sourceID': 'Iced Americano', 'targetID': 'Baked Vada Pav', 'size': 1, 'confidence': 8.3}, {'sourceID': 'Baked Vada Pav', 'targetID': 'Iced Americano', 'size': 1, 'confidence': 8.3}, {'sourceID': 'Origanal South Indian Frappe', 'targetID': 'Baked Vada Pav', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Baked Vada Pav', 'targetID': 'Origanal South Indian Frappe', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Baked Vada Pav', 'targetID': 'Baked Pav Bhaji', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Baked Pav Bhaji', 'targetID': 'Baked Vada Pav', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Cappucino', 'targetID': 'Madagascar Chocochip Frappe', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Madagascar Chocochip Frappe', 'targetID': 'Cappucino', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Papparoti', 'targetID': 'Kaapicino', 'size': 1, 'confidence': 6.6}, {'sourceID': 'Kaapicino', 'targetID': 'Papparoti', 'size': 1, 'confidence': 6.6}, {'sourceID': 'South Indian Filter Kaapi', 'targetID':

  and should_run_async(code)


In [26]:
final_data["edges"] = association_rules_data["edges"]

  and should_run_async(code)


In [27]:
final_json = json.dumps(final_data, indent=4)
print(final_json)

{
    "nodes": [
        {
            "color": "#fe28e2",
            "label": "South Indian Filter Kaapi",
            "attributes": {},
            "y": -321,
            "x": 80,
            "id": "South Indian Filter Kaapi",
            "size": 5.0
        },
        {
            "color": "#5d8fe6",
            "label": "Origanal South Indian Frappe",
            "attributes": {},
            "y": 852,
            "x": 614,
            "id": "Origanal South Indian Frappe",
            "size": 5.0
        },
        {
            "color": "#62a75f",
            "label": "Baked Vada Pav",
            "attributes": {},
            "y": -904,
            "x": -258,
            "id": "Baked Vada Pav",
            "size": 5.166666666666667
        },
        {
            "color": "#185570",
            "label": "Baked Pav Bhaji",
            "attributes": {},
            "y": 487,
            "x": -296,
            "id": "Baked Pav Bhaji",
            "size": 5.166666666666667
       

  and should_run_async(code)


In [309]:
file_path = "/content/gdrive/MyDrive/Hackniche24/association_rules_data.json"

# Write JSON data to the file
with open(file_path, "w") as file:
    json.dump(final_data, file)

print("JSON data saved to:", file_path)

JSON data saved to: /content/gdrive/MyDrive/Hackniche24/association_rules_data.json


  and should_run_async(code)
