In [125]:
import pandas as pd

In [126]:
data = pd.read_json("Data.json")
data.head(5)

Unnamed: 0,id,city,budget_min,budget_max,smoking,drinking,food,sleep_schedule,cleanliness_level,work_style,personality,lifestyle_notes,roommate_expectations
0,1,Bangalore,8000,18000,False,False,non-veg,normal,4,office,Quiet and thoughtful,Enjoys yoga and cooking. Prefers calm mornings.,"Respectful, quiet after 10 PM"
1,2,Bangalore,9000,16000,False,True,non-veg,late,3,hybrid,Outgoing and social,Loves music and weekend hangouts with friends.,Someone open to occasional guests
2,3,Pune,7000,14000,False,False,veg,early,5,office,Disciplined and organized,"Marathon runner, strict routine, home by 9 PM.","Clean, structured, non-smoker"
3,4,Hyderabad,8500,17000,True,True,non-veg,late,2,remote,Casual and laid-back,"Works odd hours, game enthusiast, enjoys poker...","Flexible person, doesn't mind noise"
4,5,Mumbai,10000,20000,False,False,veg,normal,4,office,Friendly and helpful,"Volunteers on weekends, loves reading, early s...","Honest, reliable, values cleanliness"


In [127]:
new_user = {
  "id": 12,
  "city": "Bangalore",
  "budget_min": 9000,
  "budget_max": 13000,
  "smoking": "false",
  "drinking": "true",
  "food": "non-veg",
  "sleep_schedule": "early",
  "cleanliness_level": 4,
  "work_style": "hybrid",
  "personality": "Calm and organized, prefers quiet evenings",
  "lifestyle_notes": "Works weekdays, enjoys reading and light workouts",
  "roommate_expectations": "Looking for someone clean, respectful, and non-smoker"
}


In [128]:

def find_matching_users(old_users_df: pd.DataFrame, new_user: dict) -> pd.DataFrame:
    filtered_df = old_users_df.copy()
    # 1. City match (mandatory)
    filtered_df = filtered_df[
        filtered_df["city"].str.lower() == new_user["city"].lower()
    ]
    # 2. Food preference match
    filtered_df = filtered_df[
        filtered_df["food"].str.lower() == new_user["food"].lower()
    ]

    return filtered_df.reset_index(drop=True)


In [129]:
filtered_Roommates = find_matching_users(data,new_user)
filtered_Roommates

Unnamed: 0,id,city,budget_min,budget_max,smoking,drinking,food,sleep_schedule,cleanliness_level,work_style,personality,lifestyle_notes,roommate_expectations
0,1,Bangalore,8000,18000,False,False,non-veg,normal,4,office,Quiet and thoughtful,Enjoys yoga and cooking. Prefers calm mornings.,"Respectful, quiet after 10 PM"
1,2,Bangalore,9000,16000,False,True,non-veg,late,3,hybrid,Outgoing and social,Loves music and weekend hangouts with friends.,Someone open to occasional guests
2,6,Bangalore,9500,19000,False,True,non-veg,normal,3,hybrid,Ambitious and career-focused,"Fitness enthusiast, works long hours, weekends...","Independent person, respects privacy"
3,8,Bangalore,8000,15000,False,False,non-veg,late,3,remote,Creative and spontaneous,"Freelance designer, irregular hours, coffee ad...",Understanding about irregular schedule
4,13,Bangalore,8000,14000,False,False,non-veg,early,3,student,Energetic and friendly,"MBA student, morning person, occasional study ...","Supportive, doesn't mind academic discussions"
5,16,Bangalore,9000,16000,True,True,non-veg,late,2,office,Fun-loving and adventurous,"Works in startup, frequent travel, casual abou...","Flexible, adventure-seeking partner"
6,19,Bangalore,10000,19000,False,True,non-veg,late,3,hybrid,Ambitious startup founder,"Passionate entrepreneur, works irregular hours...",Understands startup hustle mentality
7,22,Bangalore,8500,16000,False,False,non-veg,late,2,remote,Introspective coder,"Systems engineer, works nights, minimalist lif...","Respects alone time, minimal interaction"
8,29,Bangalore,8500,15000,False,True,non-veg,normal,3,hybrid,Creative thinker,"UX designer, artistic, weekend gallery explorer.","Appreciates creativity, open-minded"
9,32,Bangalore,9500,17500,False,False,non-veg,early,4,office,Focused and determined,"Management consultant, frequent traveler, valu...","Respectful, keeps common areas clean"


In [130]:
def build_embedding_text(user: dict) -> str:
    return (
        f"Personality: {user.get('personality', '')}. "
        f"Lifestyle: {user.get('lifestyle_notes', '')}. "
        f"Roommate expectations: {user.get('roommate_expectations', '')}."
    ).strip()

embedding_texts = []

for _, row in filtered_Roommates.iterrows():
    user_dict = row.to_dict()
    text = build_embedding_text(user_dict)
    embedding_texts.append(text)

embedding_texts




['Personality: Quiet and thoughtful. Lifestyle: Enjoys yoga and cooking. Prefers calm mornings.. Roommate expectations: Respectful, quiet after 10 PM.',
 'Personality: Outgoing and social. Lifestyle: Loves music and weekend hangouts with friends.. Roommate expectations: Someone open to occasional guests.',
 'Personality: Ambitious and career-focused. Lifestyle: Fitness enthusiast, works long hours, weekends for relaxation.. Roommate expectations: Independent person, respects privacy.',
 'Personality: Creative and spontaneous. Lifestyle: Freelance designer, irregular hours, coffee addict.. Roommate expectations: Understanding about irregular schedule.',
 "Personality: Energetic and friendly. Lifestyle: MBA student, morning person, occasional study groups at home.. Roommate expectations: Supportive, doesn't mind academic discussions.",
 'Personality: Fun-loving and adventurous. Lifestyle: Works in startup, frequent travel, casual about home management.. Roommate expectations: Flexible, a

In [131]:
vectors = model.encode(
    embedding_texts,
    convert_to_numpy=True,
    normalize_embeddings=True
)


In [132]:
import faiss

dimension = vectors.shape[1]

index = faiss.IndexFlatIP(dimension)  # cosine similarity
index.add(vectors.astype("float32"))

print("Total vectors in index:", index.ntotal)


Total vectors in index: 14


In [133]:
id_map = filtered_Roommates["id"].tolist()
id_map


[1, 2, 6, 8, 13, 16, 19, 22, 29, 32, 34, 39, 44, 50]

In [134]:
new_user_text = build_embedding_text(new_user)

new_user_vector = model.encode(
    [new_user_text],
    convert_to_numpy=True,
    normalize_embeddings=True
).astype("float32")


In [135]:
TOP_K = 3

scores, indices = index.search(new_user_vector, TOP_K)


In [136]:
vector_results = []

for rank, idx in enumerate(indices[0]):
    vector_results.append({
        "userId": id_map[idx],
        "score": float(scores[0][rank])
    })
vector_results

[{'userId': 2, 'score': 0.8959038257598877},
 {'userId': 6, 'score': 0.8538482189178467},
 {'userId': 32, 'score': 0.8498193025588989}]

In [137]:
candidate_ids = [r["userId"] for r in vector_results]

candidates = filtered_Roommates[
    filtered_Roommates["id"].isin(candidate_ids)
].to_dict(orient="records")



candidate_ids


[2, 6, 32]