# Load Dataset

In [None]:
import json

input_file = "real estate dataset.json" 
output_file = "real_estate_chatbot.jsonl"

examples = []

try:
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)
        for q in data:
            examples.append({
                "messages": [
                    {"role": "system", "content": "You are a professional global real estate assistant. Provide accurate and polite answers."},
                    {"role": "user", "content": q},
                    {"role": "assistant", "content": "Provide a clear, professional, and helpful answer to this question."}
                ]
            })
except json.JSONDecodeError:
    with open(input_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            q = json.loads(line)  
            if isinstance(q, dict) and "question" in q:
                question_text = q["question"]
            else:
                question_text = q
            examples.append({
                "messages": [
                    {"role": "system", "content": "You are a professional global real estate assistant. Provide accurate and polite answers."},
                    {"role": "user", "content": question_text},
                    {"role": "assistant", "content": "Provide a clear, professional, and helpful answer to this question."}
                ]
            })

# Save all examples as JSONL
with open(output_file, "w", encoding="utf-8") as f:
    for ex in examples:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

print(f"JSONL file saved successfully: {output_file}")


JSONL file saved successfully: real_estate_chatbot.jsonl


### API Key

In [18]:
open_ai_key="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"

In [19]:
import os
from openai import OpenAI
from time import sleep

client = OpenAI(api_key= open_ai_key)

### Models

In [20]:
models = client.models.list()
for m in models.data[:5]:
    print(m.id)

dall-e-2
gpt-4o-mini-search-preview-2025-03-11
omni-moderation-latest
gpt-4o-mini-search-preview
gpt-4-turbo


In [21]:
def upload_training_file(file_path):
    with open("real estate dataset.json" , "rb") as file:
        response = client.files.create(
            file = file,
            purpose = "fine-tune"
        )
        return response.id

In [22]:
training_file_id = upload_training_file("training_data.jsonl")
validation_file_id = upload_training_file("validation_data.jsonl")
training_file_id , validation_file_id

('file-1RuWBAghsLp9FTwWTVeKBt', 'file-7YeFw6Pfm6WD1zXJVgA9wp')

In [23]:
def create_fine_tuning_job(training_file_id , validation_file_id=None , model = "gpt-4o-mini-2024-07-18" ):
    response = client.fine_tuning.jobs.create(
        training_file = training_file_id,
        validation_file = validation_file_id,
        model = model
    )
    return response.id


In [24]:
model = "gpt-4o-mini-2024-07-18"
job_id = create_fine_tuning_job(training_file_id , validation_file_id , model)
job_id

'ftjob-E9F8Hm4jdqLtSKz4k6oeMZqg'

In [25]:
def monitor_job(job_id):
    while True:
        job = client.fine_tuning.jobs.retrieve(job_id)
        print(f"Status: {job.status}")

        if job.status in ["succeeded" , "failed"]:
            return job
        
        # List latest event
        events = client.fine_tuning.jobs.list_events(
            fine_tuning_job_id=job_id,
            limit=5
        )
        
        for event in events.data:
            print(f"Event: {event.message}")

        sleep(30)

## Fine tuning

In [26]:
job = monitor_job(job_id)

if job.status == "succeeded":
    fine_tune_model = job.fine_tuned_model
    print(f"Fine-tuned Model ID: {fine_tune_model}")
else:
    print(f"Fine tuning failed.")

Status: validating_files
Event: Validating training file: file-1RuWBAghsLp9FTwWTVeKBt and validation file: file-7YeFw6Pfm6WD1zXJVgA9wp
Event: Created fine-tuning job: ftjob-E9F8Hm4jdqLtSKz4k6oeMZqg
Status: validating_files
Event: Validating training file: file-1RuWBAghsLp9FTwWTVeKBt and validation file: file-7YeFw6Pfm6WD1zXJVgA9wp
Event: Created fine-tuning job: ftjob-E9F8Hm4jdqLtSKz4k6oeMZqg
Status: validating_files
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-1RuWBAghsLp9FTwWTVeKBt and validation file: file-7YeFw6Pfm6WD1zXJVgA9wp
Event: Created fine-tuning job: ftjob-E9F8Hm4jdqLtSKz4k6oeMZqg
Status: running
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-1RuWBAghsLp9FTwWTVeKBt and validation file: file-7YeFw6Pfm6WD1zXJVgA9wp
Event: Created fine-tuning job: ftjob-E9F8Hm4jdqLtSKz4k6oeMZqg
Status: running
Event: Fine-tuning job started
Event:

In [27]:
def test_model(model_id , test_input):
    completion = client.chat.completions.create(
        model=model_id,
        messages=[
            {
                "role" : "system",
                "content" : "You are a professional global real estate assistant. Provide accurate and polite answers."
            },
            {"role" : "user", "content": test_input}
        ]
    )
    return completion.choices[0].message

# RESULT

In [28]:
result = test_model(fine_tune_model , "Is it better to rent or buy a home in todayâ€™s market?")
print(result)

ChatCompletionMessage(content='It depends on your financial goals and lifestyle. Buying builds equity over time, while renting offers flexibility and lower upfront costs. Evaluate interest rates, your savings, and long-term plans before deciding.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None)


In [29]:
result.content

'It depends on your financial goals and lifestyle. Buying builds equity over time, while renting offers flexibility and lower upfront costs. Evaluate interest rates, your savings, and long-term plans before deciding.'