📒 1. Personalized Offers Notebook

In [18]:
import redis, numpy as np, json, hashlib
from redis.commands.search.query import Query
from redis.commands.search.field import VectorField, TextField, TagField
from redis.commands.search.index_definition import IndexDefinition, IndexType

r = redis.Redis(host='localhost', port=6379, decode_responses=True)

# Schema for vector + tags
schema = [
    TextField("$.category", as_name="category"),
    TagField("$.city", as_name="city"),
    TagField("$.gender", as_name="gender"),
    VectorField("$.embedding", "HNSW", {
        "TYPE": "FLOAT32",
        "DIM": 384,
        "DISTANCE_METRIC": "COSINE"
    }, as_name="embedding")
]

INDEX_NAME = "idx:offers"

# Drop index if exists
try:
    r.ft(INDEX_NAME).dropindex(delete_documents=False)
except:
    pass

# Create index
r.ft(INDEX_NAME).create_index(
    schema, definition=IndexDefinition(prefix=["offer:"], index_type=IndexType.JSON)
)


# --------------------------------------
# Helper: deterministic embedding generator
# based on city+gender+category string
# --------------------------------------
def generate_embedding(city, gender, category, dim=384):
    seed_str = f"{city}_{gender}_{category}"
    seed = int(hashlib.md5(seed_str.encode()).hexdigest(), 16) % (2**32)
    rng = np.random.default_rng(seed)
    return rng.random(dim).tolist()


# --------------------------------------
# Function to generate dummy offers
# --------------------------------------
def generate_dummy_offers(num_offers: int = 10):
    cities = ["Mumbai", "Delhi", "Bangalore", "Chennai", "Hyderabad"]
    genders = ["male", "female"]
    categories = ["lifestyle", "fashion", "travel", "electronics", "dining", "grocery"]

    for i in range(1, num_offers + 1):
        city = np.random.choice(cities)
        gender = np.random.choice(genders)
        category = np.random.choice(categories)

        offer_id = f"offer:{i:03d}"
        offer_data = {
            "city": city,
            "gender": gender,
            "category": category,
            "embedding": generate_embedding(city, gender, category)
        }
        r.execute_command("JSON.SET", offer_id, "$", json.dumps(offer_data))

    print(f"✅ Inserted {num_offers} dummy offers into Redis")


# --------------------------------------
# Function to query offers by user profile
# --------------------------------------
def query_offers(user_city, user_gender, user_category, k: int = 3):
    # Create query vector from profile
    user_vector = np.array(
        generate_embedding(user_city, user_gender, user_category),
        dtype=np.float32
    ).tobytes()

    q = (
        Query(f"*=>[KNN {k} @embedding $vec AS score]")
        .sort_by("score")
        .return_fields("city", "gender", "category", "score")
        .dialect(2)
    )

    results = r.ft(INDEX_NAME).search(q, query_params={"vec": user_vector})
    print(f"🔎 Top {k} offers for profile [city={user_city}, gender={user_gender}, category={user_category}]:")
    for doc in results.docs:
        print(f"  - City: {doc.city}, Gender: {doc.gender}, Category: {doc.category}, Score: {doc.score}")
    return results


# --------------------------------------
# Main demo
# --------------------------------------
if __name__ == "__main__":
    # Step 1: Generate 15 dummy offers
    generate_dummy_offers(150)

    # Step 2: Simulate a user profile
    user_city = "Mumbai"
    user_gender = "male"
    user_category = "travel"

    # Step 3: Query top 5 relevant offers
    query_offers(user_city, user_gender, user_category, k=5)


✅ Inserted 150 dummy offers into Redis
🔎 Top 5 offers for profile [city=Mumbai, gender=male, category=travel]:
  - City: Mumbai, Gender: male, Category: travel, Score: -3.57627868652e-07
  - City: Mumbai, Gender: male, Category: travel, Score: -3.57627868652e-07
  - City: Mumbai, Gender: female, Category: travel, Score: 0.207103610039
  - City: Mumbai, Gender: female, Category: fashion, Score: 0.21957886219
  - City: Mumbai, Gender: female, Category: fashion, Score: 0.21957886219


📒 2. Fraud Detection Notebook

In [27]:
#!/usr/bin/env python3
import redis, numpy as np, json, random, time
from redis.commands.search.field import VectorField, TextField, NumericField, TagField
from redis.commands.search.index_definition import IndexDefinition, IndexType
from redis.commands.search.query import Query

# -----------------------------
# Redis Connection
# -----------------------------
r = redis.Redis(host="localhost", port=6379, decode_responses=False)

# -----------------------------
# Constants
# -----------------------------
INDEX_NAME = "idx:txns"
VECTOR_DIM = 128   # embedding size

# -----------------------------
# Drop and recreate index
# -----------------------------
try:
    r.ft(INDEX_NAME).dropindex(delete_documents=True)
except Exception:
    pass

schema = (
    TextField("$.txn_id", as_name="txn_id"),
    TextField("$.user_id", as_name="user_id"),
    TagField("$.user_city", as_name="user_city"),
    TagField("$.txn_city", as_name="txn_city"),
    TagField("$.gender", as_name="gender"),
    TextField("$.device", as_name="device"),
    TextField("$.ip_address", as_name="ip_address"),
    NumericField("$.amount", as_name="amount"),
    TagField("$.txn_type", as_name="txn_type"),
    TagField("$.is_fraud", as_name="is_fraud"),
    VectorField("$.embedding", "FLAT", {
        "TYPE": "FLOAT32",
        "DIM": VECTOR_DIM,
        "DISTANCE_METRIC": "COSINE",
        "INITIAL_CAP": 1000
    }, as_name="embedding")
)

definition = IndexDefinition(prefix=["txn:"], index_type=IndexType.JSON)
r.ft(INDEX_NAME).create_index(fields=schema, definition=definition)

# -----------------------------
# Embedding Creator
# -----------------------------
def create_embedding(txn, dim=128):
    """
    Generate a meaningful embedding from transaction metadata.
    """
    vec = np.zeros(dim, dtype=np.float32)

    # 1. City mismatch
    vec[0] = 1.0 if txn["user_city"] != txn["txn_city"] else 0.0

    # 2. Amount scaled (assume 100k max)
    vec[1] = min(txn["amount"] / 100000.0, 1.0)

    # 3. Transaction type (one-hot)
    txn_type_map = {"POS": 2, "ONLINE": 3, "ATM": 4, "TRANSFER": 5}
    if txn["txn_type"] in txn_type_map:
        vec[txn_type_map[txn["txn_type"]]] = 1.0

    # 4. Device encoding (simplified grouping)
    device_map = {
        "iPhone": 6, "Samsung": 7, "Windows": 8,
        "Macbook": 9, "iPad": 10, "Pixel": 11
    }
    for k, idx in device_map.items():
        if k.lower() in txn["device"].lower():
            vec[idx] = 1.0

    # 5. Gender
    gender_map = {"M": 12, "F": 13, "O": 14}
    if txn["gender"] in gender_map:
        vec[gender_map[txn["gender"]]] = 1.0

    # 6. Fraud flag (historical)
    if txn["is_fraud"] == "YES":
        vec[15] = 1.0

    # 7. Add small noise for realism
    noise = np.random.normal(0, 0.05, dim-16).astype(np.float32)
    vec[16:] = noise

    return [float(x) for x in vec]

# -----------------------------
# Dummy Data Generator
# -----------------------------
def random_ip():
    return ".".join(str(random.randint(0, 255)) for _ in range(4))

def random_device():
    return random.choice(["iPhone 14", "Samsung S22", "Windows Laptop", 
                          "Macbook Pro", "iPad", "Pixel 7"])

def random_txn_type():
    return random.choice(["POS", "ONLINE", "ATM", "TRANSFER"])

def generate_dummy_transactions(n=50):
    cities = ["Mumbai", "Delhi", "London", "New York", "Singapore"]
    genders = ["M", "F", "O"]

    for i in range(n):
        txn_id = f"txn:{1000+i}"
        user_id = f"user_{random.randint(1, 20)}"
        user_city = random.choice(cities)
        txn_city = random.choice(cities)   # can differ
        gender = random.choice(genders)
        device = random_device()
        ip_address = random_ip()
        txn_type = random_txn_type()

        # Fraud logic
        is_fraud = False
        if random.random() < 0.25:  # baseline fraud probability
            is_fraud = True
        if user_city != txn_city:   # suspicious mismatch
            is_fraud = True

        if is_fraud:
            amount = random.choice([4999.99, 10000.0, 25000.5, 75000.75])
        else:
            amount = round(random.uniform(10.0, 5000.0), 2)

        txn_data = {
            "txn_id": txn_id,
            "user_id": user_id,
            "user_city": user_city,
            "txn_city": txn_city,
            "gender": gender,
            "device": device,
            "ip_address": ip_address,
            "txn_type": txn_type,
            "amount": amount,
            "timestamp": int(time.time()),
            "is_fraud": "YES" if is_fraud else "NO",
        }

        # meaningful embedding
        txn_data["embedding"] = create_embedding(txn_data, VECTOR_DIM)

        r.execute_command("JSON.SET", txn_id, "$", json.dumps(txn_data))

# -----------------------------
# Query Similar Transactions
# -----------------------------
def query_similar_transactions(input_txn, k=5, threshold=0.6):
    query_vector = np.array(input_txn["embedding"], dtype=np.float32).tobytes()

    q = (
        Query("@txn_type:{%s} => [KNN %d @embedding $vec AS score]" % (input_txn["txn_type"], k))
        .sort_by("score")
        .return_fields("txn_id", "amount", "user_city", "txn_city", "is_fraud", "score")
        .dialect(2)
    )

    results = r.ft(INDEX_NAME).search(q, query_params={"vec": query_vector})

    # ---------------------------
    # Fraud Risk Calculation
    # ---------------------------
    fraud_neighbors = 0
    for doc in results.docs:
        if doc.is_fraud == "YES":
            fraud_neighbors += 1
    fraud_ratio = fraud_neighbors / max(1, len(results.docs))

    # city mismatch
    city_mismatch = 1.0 if input_txn["user_city"] != input_txn["txn_city"] else 0.0

    # high amount anomaly
    amount_anomaly = 1.0 if input_txn["amount"] > 5000 else 0.0

    # final fraud risk score
    fraud_score = (0.6 * fraud_ratio) + (0.25 * city_mismatch) + (0.15 * amount_anomaly)

    verdict = "LIKELY FRAUD" if fraud_score >= threshold else "LIKELY LEGIT"

    return {
        "neighbors": results.docs,
        "fraud_score": round(fraud_score * 100, 2),
        "verdict": verdict
    }


# -----------------------------
# Main Execution
# -----------------------------
if __name__ == "__main__":
    # Generate sample data
    generate_dummy_transactions(100)
    print("✅ Dummy transactions generated.")

    # Create a test input transaction
    test_txn = {
        "txn_id": "txn:test",
        "user_id": "user_999",
        "user_city": "Mumbai",   # expected home city
        "txn_city": "Delhi",    # different → suspicious
        "gender": "M",
        "device": "iPhone 14",
        "ip_address": "192.168.1.25",
        "txn_type": "ONLINE",
        "amount": 5000.0,
        "timestamp": int(time.time()),
        "is_fraud": "UNKNOWN",
    }

    # generate embedding for the input txn
    test_txn["embedding"] = create_embedding(test_txn, VECTOR_DIM)
    print("\n🔍 Evaluating transaction...")
    result = query_similar_transactions(test_txn, k=5)

    print(f"Verdict: {result['verdict']}")
    print(f"Fraud Risk Score: {result['fraud_score']}%")

    print("\nTop Neighbors:")
    for doc in result["neighbors"]:
        print(f"TxnID={doc.txn_id}, Amount={doc.amount}, "
              f"UserCity={doc.user_city}, TxnCity={doc.txn_city}, "
              f"Fraud={doc.is_fraud}, Score={doc.score}")


✅ Dummy transactions generated.

🔍 Evaluating transaction...
Verdict: LIKELY FRAUD
Fraud Risk Score: 85.0%

Top Neighbors:
TxnID=txn:1083, Amount=10000, UserCity=Delhi, TxnCity=London, Fraud=YES, Score=0.36502790451
TxnID=txn:1061, Amount=10000, UserCity=Mumbai, TxnCity=New York, Fraud=YES, Score=0.365520834923
TxnID=txn:1056, Amount=25000.5, UserCity=London, TxnCity=Mumbai, Fraud=YES, Score=0.368060588837
TxnID=txn:1090, Amount=25000.5, UserCity=Mumbai, TxnCity=London, Fraud=YES, Score=0.374779224396
TxnID=txn:1078, Amount=75000.75, UserCity=Singapore, TxnCity=New York, Fraud=YES, Score=0.388195574284


📒 3. Customer Support Matching Notebook

In [34]:
import redis, numpy as np, json, hashlib
from redis.commands.search.query import Query
from redis.commands.search.field import VectorField, TextField
from redis.commands.search.index_definition import IndexDefinition, IndexType

# Connect to Redis
r = redis.Redis(host='localhost', port=6379, decode_responses=True)

# Parameters
INDEX_NAME = "idx:qna"
VECTOR_DIM = 384  # embedding dimension

# Helper: Create deterministic embedding from text
def text_to_embedding(text, dim=VECTOR_DIM):
    """Convert text into a deterministic pseudo-embedding using hashing."""
    h = hashlib.sha256(text.encode("utf-8")).digest()
    np.random.seed(int.from_bytes(h[:4], "little"))  # deterministic seed from hash
    return np.random.rand(dim).astype(np.float32).tolist()

# Reset index if exists
try:
    r.ft(INDEX_NAME).dropindex(delete_documents=False)
except:
    pass

# Define schema
schema = (
    TextField("$.question", as_name="question"),
    TextField("$.answer", as_name="answer"),
    VectorField("$.embedding", "FLAT", {
        "TYPE": "FLOAT32",
        "DIM": VECTOR_DIM,
        "DISTANCE_METRIC": "COSINE",
        "INITIAL_CAP": 1000
    }, as_name="embedding")
)
definition = IndexDefinition(prefix=["qna:"], index_type=IndexType.JSON)

# Create index
r.ft(INDEX_NAME).create_index(fields=schema, definition=definition)

# Function: Generate dummy Q&A pairs
def generate_dummy_qna():
    qna_data = [
        ("How to block my card?", "You can block your card instantly via mobile app or by calling customer support."),
        ("How to increase my credit card limit?", "Submit a request via internet banking or mobile app, subject to eligibility."),
        ("How to reset my credit card PIN?", "You can reset your PIN from the mobile app under 'Card Settings'."),
        ("How to check my reward points?", "Reward points can be checked via your monthly statement or app dashboard."),
        ("What should I do if my card is lost?", "Immediately block your card and request a replacement from customer support."),
        ("How can I dispute a fraudulent transaction?", "Raise a dispute through mobile app or helpline within 30 days of the transaction."),
        ("How to update my registered mobile number?", "Update mobile number by visiting the nearest branch or via internet banking."),
    ]
    
    for i, (q, a) in enumerate(qna_data, start=1):
        qid = f"qna:{i:02d}"
        qa_doc = {
            "question": q,
            "answer": a,
            "embedding": text_to_embedding(q)
        }
        r.execute_command("JSON.SET", qid, "$", json.dumps(qa_doc))

# Function: Query for most similar Q&A
def query_similar_question(sample_question, top_k=2):
    query_vector = np.array(text_to_embedding(sample_question), dtype=np.float32).tobytes()
    q = (
        Query(f"*=>[KNN {top_k} @embedding $vec AS score]")
        .sort_by("score")
        .return_fields("question", "answer", "score")
        .dialect(2)
    )
    results = r.ft(INDEX_NAME).search(q, query_params={"vec": query_vector})
    return results

# MAIN
if __name__ == "__main__":
    # Generate dummy Q&A set
    generate_dummy_qna()
    
    # Run a sample query
    user_question = "I lost my credit card, what should I do?"
    user_question = "I want to block my card?"
    user_question = "Where can I see my reward points"
    user_question = "How to reset my credit card PIN?"
    results = query_similar_question(user_question, top_k=2)
    
    print("User Question:", user_question)
    for doc in results.docs:
        print(f"Matched Q: {doc.question}\nAnswer: {doc.answer}\nScore: {doc.score}\n")


User Question: How to reset my credit card PIN?
Matched Q: How to reset my credit card PIN?
Answer: You can reset your PIN from the mobile app under 'Card Settings'.
Score: -2.38418579102e-07

Matched Q: How to block my card?
Answer: You can block your card instantly via mobile app or by calling customer support.
Score: 0.250017523766

