In [1]:
import pandas as pd
from pathlib import Path
import os
import sys
import json

# get the parent dir (on local machine)
notebook_path = Path(os.getcwd())
project_root = notebook_path.parent

# Get src dir
src_dir = project_root / 'src'

# Insert the src dir path into sys.path for module imports
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Define path usage
data_file = "AB_NYC_2019.csv"
final_data_path = project_root / 'data' / data_file

# Import components from source 
from data_processing import load_dataset, init_prompt_db
from train import AirbnbAgent, LogRecorder

In [2]:
# Initialize the prompt database
init_prompt_db()

# Load and clean the dataset
print("Loading Dataset...")
try:
    # load_dataset expects a full path
    df = load_dataset(final_data_path)
    print("Dataset Head:")
    display(df.head())
except FileNotFoundError as e:
    print(f"ERROR: {e}")
    print("Please ensure 'AB_NYC_2019.csv' is in the 'data/' folder.")
    df = None

âœ” Prompt DB initialized: prompts.db
Loading Dataset...
[INFO] Loaded 48868 listings after cleaning.
Dataset Head:


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,text
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365,clean & quiet apt home by the park brooklyn ke...
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355,skylit midtown castle manhattan midtown entire...
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365,the village of harlem....new york ! manhattan ...
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194,cozy entire floor of brownstone brooklyn clint...
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0,entire apt: spacious studio/loft by central pa...


In [3]:
if df is not None:
    print("Initializing AirbnbAgent (This will load the LLM/Tfidf model)...")
    agent = AirbnbAgent(df)
    print("Agent Initialized successfully.")

Initializing AirbnbAgent (This will load the LLM/Tfidf model)...


Device set to use cpu


Agent Initialized successfully.


In [4]:
if 'agent' in locals():
    print("--- Running Analytical Query (Average Price) ---")
    query_1 = "What is the average price in Manhattan for a private room under $150?"
    print(f"Q: {query_1}")
    response_1 = agent.answer(query_1)
    print("\nAgent Response:")
    print("--------------------------------------------------")
    print(response_1)
    print("--------------------------------------------------")

--- Running Analytical Query (Average Price) ---
Q: What is the average price in Manhattan for a private room under $150?


Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Agent Response:
--------------------------------------------------
The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 listings. The average price in Manhattan for a private room under $150 is $98, based on 11808 li

In [None]:
if 'agent' in locals():
    print("\n--- Running Search Query (Cheapest Listings) ---")
    query_2 = "Show me the 5 cheapest listings in Brooklyn that have good reviews."
    print(f"Q: {query_2}")
    response_2 = agent.answer(query_2)
    print("\nAgent Response:")
    print("--------------------------------------------------")
    print(response_2)
    print("--------------------------------------------------")

In [None]:
# exploration.ipynb (Cell 6: Run Statistical Query)

if 'agent' in locals():
    print("\n--- Running Statistical Query (High Demand) ---")
    query_3 = "Where are the highest demand areas in Queens?"
    print(f"Q: {query_3}")
    
    # Capture the full log dictionary
    log_entry_3 = agent.answer(query_3)
    log_records.append(log_entry_3)
    response_3 = log_entry_3['final_response']
    
    print("Agent Response:")
    print("--------------------------------------------------")
    print(response_3)
    print("--------------------------------------------------")

In [None]:
# Initialize the recorder 
recorder = LogRecorder(base_dir="../results/logs") 

if 'log_records' in locals() and log_records:
    # Use the recorder to write the file
    recorder.record_run(log_records)
else:
    print("No log records found to export.")