In [1]:
print("hello")

hello


In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

# template = """Question: {question}

# Answer: Let's think step by step."""

# prompt = ChatPromptTemplate.from_template(template)

# model = OllamaLLM(model="llama3")

# chain = prompt | model

# response = chain.invoke({"question": "How do I pick outfit for today?"})

In [5]:
# print(response)

Let's break it down together.

Step 1: Consider the occasion. What are you planning to do today? Are you going to work, school, running errands, or something else?

Please respond with your answer, and we'll move on to the next step!


In [9]:
import sqlite3
import random
import string
from datetime import datetime, timedelta

# Create SQLite database and connection
conn = sqlite3.connect('properties.db')
cursor = conn.cursor()

# Create table with the selected schema
cursor.execute('''CREATE TABLE IF NOT EXISTS properties (
    property_id TEXT,
    list_price REAL,
    status TEXT,
    city TEXT,
    list_date TEXT
)''')

# Function to generate random dummy data
def random_string(length=10):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

def random_date(start, end):
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds())),
    )

# Insert 20 dummy records
for _ in range(20):
    property_id = random_string(10)
    list_price = round(random.uniform(100000, 1000000), 2)
    status = random.choice(["active", "sold", "pending"])
    city = random.choice(["Los Angeles", "New York", "Houston", "Miami"])
    list_date = random_date(datetime(2021, 1, 1), datetime(2024, 8, 1)).strftime("%Y-%m-%d")

    # Insert the dummy record into the database
    cursor.execute('''INSERT INTO properties (property_id, list_price, status, city, list_date) 
                      VALUES (?, ?, ?, ?, ?)''',
                   (property_id, list_price, status, city, list_date))

# Commit and close the connection
conn.commit()
conn.close()


In [3]:
import requests
import json
import datetime
import pandas as pd
import sqlite3
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define constants
url = "https://www.realtor.com/api/v1/hulk?client_id=rdc-x&schema=vesta"
headers = {"content-type": "application/json"}
offset = 0
limit = 100
max_offset = 1000  # Set the maximum offset value
file_name = str(datetime.date.today()) + ".csv"
db_name = "real_estate_data.db"

# Initialize session
session = requests.Session()

# Initialize an empty list to collect rows
rows = []

try:
    while offset < max_offset:
        # Prepare the request payload
        body = json.dumps({
            "query": "query ConsumerSearchQuery($query: HomeSearchCriteria!, $limit: Int, $offset: Int, $sort: [SearchAPISort], $sort_type: SearchSortType, $client_data: JSON, $bucket: SearchAPIBucket) {home_search: home_search(query: $query, sort: $sort, limit: $limit, offset: $offset, sort_type: $sort_type, client_data: $client_data, bucket: $bucket) {count total results {property_id list_price primary_photo (https: true) {href} listing_id virtual_tours{href type} status permalink price_reduced_amount description{beds baths baths_full baths_3qtr baths_half sqft lot_sqft baths_max baths_min beds_max sqft_min sqft_max type sold_price sold_date} location{street_view_url address{line postal_code state state_code city coordinate{lat lon}}} open_houses {start_date end_date} flags{is_coming_soon is_new_listing (days: 14) is_price_reduced (days: 30) is_foreclosure is_new_construction is_pending is_contingent} list_date photos(limit: 1, https: true) {href}}}}",
            "variables": {
                "query": {
                    "status": ["for_sale", "ready_to_build"],
                    "primary": True,
                    "state_code": "CA",
                    "boundary": {
                        "type": "Polygon",
                        "coordinates": [
                            [
                                [-128.710184, 44.968352],
                                [-108.978738, 44.968352],
                                [-108.978738, 28.540688],
                                [-128.710184, 28.540688],
                                [-128.710184, 44.968352]
                            ]
                        ]
                    }
                },
                "client_data": {
                    "device_data": {
                        "device_type": "web"
                    },
                    "user_data": {
                        "last_view_timestamp": -1
                    }
                },
                "limit": limit,
                "offset": offset,
                "zohoQuery": {
                    "silo": "search_result_page",
                    "location": "California",
                    "property_status": "for_sale",
                    "filters": {}
                },
                "geoSupportedSlug": "",
                "zoom": 5,
                "sort": [
                    {"field": "list_date", "direction": "desc"},
                    {"field": "photo_count", "direction": "desc"}
                ],
                "by_prop_type": ["home"]
            },
            "callfrom": "SRP",
            "nrQueryType": "MAP_MAIN_SRP",
            "isClient": True
        })

        # Make the POST request
        try:
            response = session.post(url=url, data=body, headers=headers)
            response.raise_for_status()  # Raise an HTTPError for bad responses
            data = response.json()
        except requests.exceptions.RequestException as e:
            logging.error(f"Request failed at offset {offset}: {e}")
            break

        # Process the results
        for result in data["data"]["home_search"]["results"]:
            rows.append({
                "property_id": result.get("property_id", ""),
                "primary_photo_href": result["primary_photo"].get("href", "")
                if result.get("primary_photo") else "",
                "list_price": result.get("list_price", ""),
                "listing_id": result.get("listing_id", ""),
                "status": result.get("status", ""),
                "permalink": result.get("permalink", ""),
                "price_reduced_amount": result.get("price_reduced_amount", ""),
                "beds": result["description"].get("beds", ""),
                "baths": result["description"].get("baths", ""),
                "sqft": result["description"].get("sqft", ""),
                "lot_sqft": result["description"].get("lot_sqft", ""),
                "type": result["description"].get("type", ""),
                "sold_price": result["description"].get("sold_price", ""),
                "sold_date": result["description"].get("sold_date", ""),
                "line": result["location"]["address"].get("line", "")
                if result.get("location") and result["location"].get("address") else "",
                "postal_code": result["location"]["address"].get("postal_code", "")
                if result.get("location") and result["location"].get("address") else "",
                "state": result["location"]["address"].get("state", "")
                if result.get("location") and result["location"].get("address") else "",
                "state_code": result["location"]["address"].get("state_code", "")
                if result.get("location") and result["location"].get("address") else "",
                "city": result["location"]["address"].get("city", "")
                if result.get("location") and result["location"].get("address") else "",
                "lat": result["location"]["address"]["coordinate"].get("lat", "")
                if result.get("location") and result["location"].get("address") and result["location"]["address"][
                    "coordinate"] else "",
                "lon": result["location"]["address"]["coordinate"].get("lon", "")
                if result.get("location") and result["location"].get("address") and result["location"]["address"][
                    "coordinate"] else "",
                "is_new_listing": result["flags"].get("is_new_listing", ""),
                "is_price_reduced": result["flags"].get("is_price_reduced", ""),
                "list_date": result.get("list_date", "")
            })

        logging.info(f"Processed offset {offset}")
        offset += limit

    # Convert the list of rows to a DataFrame
    df = pd.DataFrame(rows)

    # Save the DataFrame to an SQLite database
    with sqlite3.connect(db_name) as conn:
        df.to_sql('properties', conn, if_exists='replace', index=False)
        logging.info(f"Data successfully saved to {db_name}")

except Exception as e:
    logging.error(f"An error occurred: {e}")

finally:
    session.close()

2024-08-15 16:52:15,443 - INFO - Processed offset 0
2024-08-15 16:52:16,292 - INFO - Processed offset 100
2024-08-15 16:52:17,142 - INFO - Processed offset 200
2024-08-15 16:52:18,021 - INFO - Processed offset 300
2024-08-15 16:52:19,069 - INFO - Processed offset 400
2024-08-15 16:52:20,009 - INFO - Processed offset 500
2024-08-15 16:52:21,039 - INFO - Processed offset 600
2024-08-15 16:52:21,884 - INFO - Processed offset 700
2024-08-15 16:52:22,808 - INFO - Processed offset 800
2024-08-15 16:52:23,671 - INFO - Processed offset 900
2024-08-15 16:52:23,692 - INFO - Data successfully saved to real_estate_data.db


In [10]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('properties.db')
cursor = conn.cursor()

# Query to select all records from the properties table
cursor.execute('''SELECT * FROM properties''')

# Fetch all rows from the executed query
rows = cursor.fetchall()

# Print the column names
columns = [description[0] for description in cursor.description]
print(" | ".join(columns))

# Print each row in the table
for row in rows:
    print(" | ".join(str(value) for value in row))

# Close the connection
conn.close()


property_id | list_price | status | city | list_date
EE3KUSWMD9 | 709533.83 | sold | Miami | 2021-06-09
3QW39WRTTL | 901498.28 | sold | Miami | 2024-02-09
YOA0336IMP | 390973.57 | active | New York | 2022-07-30
Y1LLDGS0C3 | 348274.1 | active | Miami | 2023-07-13
NQVXHOS74P | 207258.9 | sold | New York | 2022-02-11
69N9769441 | 395974.56 | pending | Houston | 2023-10-17
YYEFU7QPWH | 307788.58 | active | New York | 2022-01-09
V89LEPHSPR | 360683.07 | sold | Houston | 2021-07-15
P2GU7D5EK4 | 924844.74 | active | Los Angeles | 2024-05-20
KWCH2P2X0I | 581983.98 | active | New York | 2021-08-28
HCM4CHT8MQ | 128762.77 | sold | Miami | 2022-08-08
5P4FJD4ST0 | 475173.47 | active | Miami | 2024-06-08
WDAQWL7M40 | 831178.72 | sold | Los Angeles | 2021-12-30
P8YF99O7ZE | 784826.76 | pending | Miami | 2022-07-21
0AS8KV90R8 | 210300.21 | pending | Miami | 2023-10-18
DN7LFWJNGG | 516470.42 | active | Los Angeles | 2021-10-25
GVJXEH7AU0 | 327436.07 | pending | Los Angeles | 2021-03-10
52XD7PUXVP | 896

In [4]:
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///real_estate_data.db")
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM properties LIMIT 10;")

sqlite
['properties']


"[('2505288754', 'https://ap.rdcpix.com/0db6900101216a95d0877dd2ff3c7926l-m3582143663s.jpg', 990000, '2970762845', 'for_sale', '3099-Laurel-Dr_Sacramento_CA_95864_M25052-88754', None, 4.0, 3.0, 2322.0, 29673.0, 'single_family', None, None, '3099 Laurel Dr', '95864', 'California', 'CA', 'Sacramento', 38.576453, -121.392646, 1, None, '2024-08-15T14:48:46.000000Z'), ('9254812795', 'https://ap.rdcpix.com/003fc216a9294c9521c9796298114aafl-m984706222s.jpg', 550000, '2970762834', 'for_sale', '54020-Linger-Ln_Idyllwild_CA_92549_M92548-12795', None, 3.0, 3.0, 1150.0, 12197.0, 'single_family', None, None, '54020 Linger Ln', '92549', 'California', 'CA', 'Idyllwild', 33.73531, -116.719536, 1, None, '2024-08-15T14:48:46.000000Z'), ('2620081522', 'https://ap.rdcpix.com/2d345ab57ce5fad310a5f3f0feeb9d48l-m3447157138s.jpg', 60000000, '2970762847', 'for_sale', '1160-Nunneley-Rd_Paradise_CA_95969_M26200-81522', None, None, 0.0, None, 40511.0, 'land', 160000.0, '2011-10-21', '1160 Nunneley Rd', '95969', '

In [8]:
llm = OllamaLLM(model="llama3")

In [9]:
from langchain.chains import create_sql_query_chain


chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "Which is the most expensive property?"})


2024-08-15 16:54:49,693 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


In [10]:
x = response.split(":")[-1].strip()

In [11]:
db.run(x)

"[('2861542175', 144900000)]"