In [11]:
import os
import pandas as pd
import json
from io import StringIO



data_dir = './'
if not os.path.exists(data_dir):
    print(f"Error: Data directory '{data_dir}' not found.")
    
standardized_data = []
# These are the standard names we will use for the DataFrame columns.
standard_columns = ['timestamp', 'co2', 'humidity', 'temperature']

for filename in os.listdir(data_dir):
    if filename.endswith('.ndjson'):
        # Create a clean room name, e.g., 'room_a.txt' -> 'Room A'
        room_name = filename.split('.')[0].replace('_', ' ').title()
        file_path = os.path.join(data_dir, filename)
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                for i, line in enumerate(f):
                    # Skip empty or whitespace-only lines
                    if not line.strip():
                        continue
                    
                    try:
                        # Load the JSON object from the line
                        data_dict = json.loads(line)
                        
                        # Get values in their insertion order (works in Python 3.7+)
                        values = list(data_dict.values())
                        
                        # Validate that we have the expected number of fields
                        if len(values) != 4:
                            print(f"Warning: Skipping malformed line {i+1} in {filename} (expected 4 fields, got {len(values)}).")
                            continue

                        # Create a standardized dictionary by zipping keys and values
                        record = dict(zip(standard_columns, values))
                        record['room'] = room_name  # Add the room name
                        standardized_data.append(record)
                        
                    except (json.JSONDecodeError, IndexError) as e:
                        print(f"Warning: Skipping corrupted line {i+1} in {filename}. Error: {e}")
                        continue

        except Exception as e:
            print(f"Error reading file {filename}: {e}")
            continue
            
if not standardized_data:
    print("Error: No data could be loaded. Check file contents and paths.")

# Create the DataFrame from our list of standardized records
df = pd.DataFrame(standardized_data)

# Convert relevant columns to the correct data types, coercing errors
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
for col in ['co2', 'humidity', 'temperature']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop any rows where critical data conversion failed
df.dropna(subset=['timestamp', 'co2', 'humidity', 'temperature'], inplace=True)



In [None]:
exec

Unnamed: 0,timestamp,co2,humidity,temperature,room
0,2025-07-03 13:21:58.171539+00:00,753.28,35.8,23.13,Sensor Data Room 1
1,2025-07-03 13:36:58.171539+00:00,955.72,57.85,23.53,Sensor Data Room 1
2,2025-07-03 13:51:58.171539+00:00,566.9,53.83,21.71,Sensor Data Room 1
3,2025-07-03 14:06:58.171539+00:00,849.97,31.39,25.68,Sensor Data Room 1
4,2025-07-03 14:21:58.171539+00:00,427.32,56.84,25.06,Sensor Data Room 1


In [17]:
import os
import io
import json
import pandas as pd
from contextlib import redirect_stdout
from thefuzz import process, fuzz

def _find_mentioned_rooms(query: str) -> list[str]:
    """Identifies rooms mentioned in a query, even with small typos."""

    if not os.path.exists(data_dir):
        return []

    # Get canonical room names from filenames (e.g., 'Room 1', 'Room 2')
    available_rooms = [f.split('.')[0].replace('_', ' ')[-6:].title() for f in os.listdir(data_dir) if f.endswith('.ndjson')]
    
    # Create aliases to improve matching
    aliases = {room: [room, room.replace("Room ", ""), room.lower()] for room in available_rooms}
    print(aliases)
    # E.g., {'Room 1': ['Room 1', '1', 'room 1']}
    
    mentioned_rooms = set()
    for room, room_aliases in aliases.items():
        # Use fuzzy matching to see if any alias appears in the query
        match = process.extractOne(query, room_aliases, scorer=fuzz.partial_ratio)
        if match and match[1] > 85:  # Using a confidence threshold of 85
            mentioned_rooms.add(room)
            
    return list(mentioned_rooms)


In [20]:
from google.adk.code_executors import VertexAiCodeExecutor

In [None]:
session_service = InMemorySessionService()

# Define constants for identifying the interaction context
APP_NAME = "data_analyst_app"
USER_ID = "user_1"
SESSION_ID = "session_001" # Using a fixed ID for simplicity

# Create the specific session where the conversation will happen
session = await session_service.create_session(
    app_name=APP_NAME,
    user_id=USER_ID,
    session_id=SESSION_ID
)
print(f"Session created: App='{APP_NAME}', User='{USER_ID}', Session='{SESSION_ID}'")

# --- Runner ---
# Key Concept: Runner orchestrates the agent execution loop.
runner = Runner(
    agent=data_analyst_agent, # The agent we want to run
    app_name=APP_NAME,   # Associates runs with our app
    session_service=session_service # Uses our session manager
)
print(f"Runner created for agent '{runner.agent.name}'.")