<a href="https://colab.research.google.com/github/sudarshan-360/Machine-Learning/blob/main/Matching_Function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install sentence-transformers faiss-cpu




In [None]:
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
import random
from datetime import datetime, timedelta
from pprint import pprint

# --- Data generation ---
ITEMS = ["wallet", "phone", "id card", "keys", "earbuds"]
COLORS = ["black", "brown", "blue", "red"]
LOCATIONS = ["Library", "Hostel", "Cafeteria", "Gym"]

def fake_description(item):
    return f"{random.choice(COLORS)} {item}, with mark"

def gen_reports(n_found=20, n_lost=5):
    now = datetime.now()
    found, lost = [], []

    for i in range(n_found):
        item = random.choice(ITEMS)
        found.append({
            "id": f"F{i}",
            "item": item,
            "description": fake_description(item),
            "location": random.choice(LOCATIONS),
            "date": (now - timedelta(days=random.randint(0,7))).isoformat()
        })

    for i in range(n_lost):
        item = random.choice(ITEMS)
        lost.append({
            "id": f"L{i}",
            "item": item,
            "description": fake_description(item),
            "location": random.choice(LOCATIONS),
            "date": (now - timedelta(days=random.randint(0,7))).isoformat()
        })

    return found, lost

found, lost = gen_reports()
print("===== FOUND ITEMS =====")
pprint(found)
print("\n===== LOST ITEMS =====")
pprint(lost)

# --- Sentence embeddings + FAISS ---
model = SentenceTransformer("all-MiniLM-L6-v2")
texts = [f"{it['item']} {it['description']}" for it in found]
embeddings = model.encode(texts, convert_to_numpy=True)
faiss.normalize_L2(embeddings)

index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)

# --- Normalization helper ---
def normalize_scores(scores):
    min_s, max_s = min(scores), max(scores)
    if max_s - min_s == 0:
        return [1.0 for _ in scores]
    return [(s - min_s)/(max_s - min_s) for s in scores]

# --- Enhanced weighted matching ---
def weighted_match(lost_item, top_k=5):
    # Text embedding
    query_text = f"{lost_item['item']} {lost_item['description']}"
    q_emb = model.encode([query_text], convert_to_numpy=True)
    faiss.normalize_L2(q_emb)

    # FAISS search
    D, I = index.search(q_emb, k=len(found))

    # Extract raw scores
    text_scores = list(D[0])

    # Normalize text similarity
    text_scores_norm = normalize_scores(text_scores)

    results = []
    lost_date = datetime.fromisoformat(lost_item['date'])

    for idx, ts in zip(I[0], text_scores_norm):
        f = found[idx]
        # Location score (0 or 1)
        loc_score = 1.0 if f['location'] == lost_item['location'] else 0.0

        # Date score: closer = higher
        f_date = datetime.fromisoformat(f['date'])
        days_diff = abs((lost_date - f_date).days)
        date_score = max(0, 1 - (days_diff / 7))  # normalized 0–1

        # Weighted combined score
        combined_score = ts*0.7 + loc_score*0.2 + date_score*0.1
        results.append((combined_score, f))

    # Sort descending
    results.sort(reverse=True, key=lambda x: x[0])

    # Print top matches
    print("\n==============================")
    print("LOST ITEM BEING TESTED:")
    pprint(lost_item)
    print("\nTOP MATCHES:")
    for score, f in results[:top_k]:
        print(f"Match Score={score:.2f} | Found={f}")

# --- Test all lost items ---
for li in lost:
    weighted_match(li)


===== FOUND ITEMS =====
[{'date': '2025-09-05T13:43:51.279100',
  'description': 'red phone, with mark',
  'id': 'F0',
  'item': 'phone',
  'location': 'Gym'},
 {'date': '2025-09-06T13:43:51.279100',
  'description': 'black id card, with mark',
  'id': 'F1',
  'item': 'id card',
  'location': 'Library'},
 {'date': '2025-09-05T13:43:51.279100',
  'description': 'red wallet, with mark',
  'id': 'F2',
  'item': 'wallet',
  'location': 'Cafeteria'},
 {'date': '2025-09-01T13:43:51.279100',
  'description': 'red wallet, with mark',
  'id': 'F3',
  'item': 'wallet',
  'location': 'Gym'},
 {'date': '2025-09-07T13:43:51.279100',
  'description': 'red phone, with mark',
  'id': 'F4',
  'item': 'phone',
  'location': 'Library'},
 {'date': '2025-09-01T13:43:51.279100',
  'description': 'black keys, with mark',
  'id': 'F5',
  'item': 'keys',
  'location': 'Gym'},
 {'date': '2025-09-05T13:43:51.279100',
  'description': 'red keys, with mark',
  'id': 'F6',
  'item': 'keys',
  'location': 'Gym'},
 {