In [1]:
import redis
import json

# Define connection variables
host = 'localhost'
port =  6379
password = None #'password'

# Connect to Redis
r = redis.Redis(host=host, port=port, password=password, decode_responses=True)
print('Connected to Redis')

r.flushdb()

Connected to Redis


True

# Efficient Duplicate Product Detection

## Description of the use case
This use case involves detecting duplicate product listings efficiently. By identifying duplicates, businesses can maintain a clean catalog and improve the user experience.

## Redis Data Structures Used
- **Bloom Filters**: To efficiently check for the existence of product IDs or attributes and detect duplicates.

In [2]:
# Mock data for the use case
mock_data = [
    {'product_id': 'prod_001', 'name': 'Product 1', 'category': 'home_loan'},
    {'product_id': 'prod_002', 'name': 'Product 2', 'category': 'personal_loan'},
    {'product_id': 'prod_003', 'name': 'Product 3', 'category': 'FD(short duration)'},
    {'product_id': 'prod_004', 'name': 'Product 1', 'category': 'home_loan'},  # Duplicate
    {'product_id': 'prod_005', 'name': 'Product 4', 'category': 'FD(long duration)'},
    {'product_id': 'prod_006', 'name': 'Product 2', 'category': 'personal_loan'}  # Duplicate
]

# Initialize Bloom Filter
bloom_filter_key = 'product_bloom_filter'
r.delete(bloom_filter_key)  # Clear existing filter if any

# Function to add products to the Bloom Filter
def add_to_bloom_filter(product):
    product_identifier = f"{product['name']}:{product['category']}"
    if not r.bf().exists(bloom_filter_key, product_identifier):
        r.bf().add(bloom_filter_key, product_identifier)
        return False  # Not a duplicate
    return True  # Duplicate

# Check for duplicates and add products to the Bloom Filter
duplicates = []
for product in mock_data:
    if add_to_bloom_filter(product):
        duplicates.append(product)

# Print the detected duplicates
print('Detected duplicates:')
for duplicate in duplicates:
    print(duplicate)

# Print the Bloom Filter status
print('Bloom Filter contains:')
for product in mock_data:
    product_identifier = f"{product['name']}:{product['category']}"
    print(f"{product_identifier}: {r.bf().exists(bloom_filter_key, product_identifier)}")


Detected duplicates:
{'product_id': 'prod_004', 'name': 'Product 1', 'category': 'home_loan'}
{'product_id': 'prod_006', 'name': 'Product 2', 'category': 'personal_loan'}
Bloom Filter contains:
Product 1:home_loan: 1
Product 2:personal_loan: 1
Product 3:FD(short duration): 1
Product 1:home_loan: 1
Product 4:FD(long duration): 1
Product 2:personal_loan: 1
