In [None]:
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install --upgrade transformers
!pip install diffusers

## Table of Contents
1. [Setup and Installation](#SETUP)
2. [LLM Prompt and Listing Generation](#LLM)
3. [Writing Listings to Database](#CHROMA-DB)
4. [Getting Buyer Profile](#PROFILE)
5. [Generate Personalized Listings](#P-LISTINGS)
6. [Image Search with CLIP](#CLIP)

## Setup and Installation <a id='SETUP'></a>

In [1]:
import os
import json
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch

In [None]:
os.environ["OPENAI_API_KEY"] = "  "
os.environ["OPENAI_API_BASE"] = "  "

In [3]:
#Initializing the required models
llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.8)
embedding_model = OpenAIEmbeddings()
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
##sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to("cuda")



config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

## LLM Prompt and Listing Generation <a id='LLM'></a>

In [3]:
def generate_listings(llm, prompt_template, n=10):
    return [llm(prompt_template).strip() for _ in range(n)]

In [4]:
prompt_template = """
You are a helpful real estate assistant.

Please generate a realistic real estate listing in the following format:
Neighborhood: <neighborhood name>
Price: <$xxx,xxx>
Bedrooms: <int>
Bathrooms: <int>
House Size: <sqft>

Description: <A paragraph with vivid and enticing details about the house>

Neighborhood Description: <A paragraph that describes the neighborhood and nearby amenities>
"""

## Writing Listings to Database <a id='CHROMA-DB'></a>

In [5]:
def save_listings_to_json(listings, filename="listings.json"):
    with open(filename, "w") as f:
        json.dump(listings, f, indent=2)

def load_listings_from_json(filename="listings.json"):
    with open(filename, "r") as f:
        return json.load(f)

def write_to_chromadb(listings, embedding_model, persist_directory="chromadb", collection_name="real_estate"):
    os.makedirs(persist_directory, exist_ok=True)
    db = Chroma(
        persist_directory=persist_directory,
        collection_name=collection_name,
        embedding_function=embedding_model
    )
    metadatas = [{"id": i} for i in range(len(listings))]
    db.add_texts(listings, metadatas=metadatas)
    db.persist()
    return db

## Getting Buyer Profile <a id='PROFILE'></a>

In [6]:
def get_buyer_profile():
    buyer_questions = [
        "How big do you want your house to be?",
        "What are 3 most important things for you in choosing this property?", 
        "Which amenities would you like?", 
        "Which transportation options are important to you?",
        "How urban do you want your neighborhood to be?",  
    ]
    answers = [input(q + " ") for q in buyer_questions]
    return " ".join(answers)

## Generate Personalized Listings <a id='P-LISTINGS'></a>

In [7]:
def personalize_listings(llm, db, buyer_profile, k=3):
    results = db.similarity_search(buyer_profile, k=k)
    personalized = []
    for res in results:
        prompt = f"""
You are a helpful real estate assistant.

Buyer's Preferences:
{buyer_profile}

Original Listing:
{res.page_content}

Please rewrite the listing description and neighborhood description to subtly emphasize aspects aligned with the buyer's preferences, without changing facts.
"""
        personalized.append(llm(prompt).strip())
    return personalized

## Image Search with CLIP <a id='CLIP'></a>

In [9]:
def find_best_matching_image(buyer_profile, image_folder="images"):
    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    
    image_paths = [os.path.join(image_folder, fname) for fname in os.listdir(image_folder) if fname.endswith(".jpg")]
    
    image_embeddings = []
    for img_path in image_paths:
        image = Image.open(img_path)
        inputs = clip_processor(images=image, return_tensors="pt")
        with torch.no_grad():
            image_emb = clip_model.get_image_features(**inputs)
            image_embeddings.append((img_path, image_emb))

    text_inputs = clip_processor(text=buyer_profile, return_tensors="pt", padding=True)
    with torch.no_grad():
        text_embedding = clip_model.get_text_features(**text_inputs)

    similarities = [(path, torch.nn.functional.cosine_similarity(text_embedding, img_emb).item())
                    for path, img_emb in image_embeddings]
    similarities.sort(key=lambda x: x[1], reverse=True)
    
    top_image_path = similarities[0][0] if similarities else None
    print("\n Top Matching Image:", top_image_path)
    return top_image_path

def save_personalized_listings(personalized, filename="personalized_listings.txt"):
    with open(filename, "w") as f:
        for i, listing in enumerate(personalized):
            f.write(f"\n=== Personalized Listing {i+1} ===\n{listing}\n")