# Get Flood Contextual Data

### This notebook extracts locations from an user prompt and queries the FLAI_Map PostgreSQL database to retrieve flood event data, Social Vulnerability Index (SVI) data, and precipitation data. 

In [104]:
# Import libraries
from dotenv import load_dotenv
import os
import pandas as pd
import json
import psycopg2
import openai
import requests

In [105]:
# Environment variables
load_dotenv()

PG_HOST = os.getenv("POSTGRES_HOST")
PG_DB = os.getenv("POSTGRES_DB")
PG_USER = os.getenv("POSTGRES_USER")
PG_PASS = os.getenv("POSTGRES_PASSWORD")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_MAPS_API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")

## Extract Geographic Locations from User Input

In [106]:
class GoogleMapsClient:
    """
    A client to interact with various Google Maps Platform APIs.
    """
    GEOCODE_API_URL = "https://maps.googleapis.com/maps/api/geocode/json"
    ELEVATION_API_URL = "https://maps.googleapis.com/maps/api/elevation/json"
    TIMEZONE_API_URL = "https://maps.googleapis.com/maps/api/timezone/json"
    WEATHER_API_URL = "https://weather.googleapis.com/v1"

    def __init__(self, api_key):
        """
        Initializes the client with a Google Maps API Key.
        """
        if not api_key:
            raise ValueError("Google Maps API Key not found. Ensure your .env file is set up correctly.")
        self.api_key = api_key

    def _make_request(self, url, params):
        """
        Internal method to perform API requests, handle errors, and return JSON.
        """
        params['key'] = self.api_key
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()  # Raise an exception for HTTP error codes
            data = response.json()
            if 'error' in data:
                print(f"API Error: {data['error']['message']}")
                return None
            if 'status' in data and data['status'] != 'OK':
                print(f"API Error: {data['status']} - {data.get('error_message', '')}")
                return None
            return data
        except requests.exceptions.RequestException as e:
            print(f"Connection Error: {e}")
            return None
        except ValueError:
             print("Error: A valid JSON response was not received. Response received:")
             print(response.text)
             return None

    def geocode_by_address(self, address, language='en'):
        """
        Gets geolocation data from a text-based address.
        """
        params = {'address': address, 'language': language}
        return self._make_request(self.GEOCODE_API_URL, params)

    def reverse_geocode(self, lat, lng, language='en'):
        """
        Gets geolocation data (reverse geocoding) from coordinates.
        """
        params = {'latlng': f"{lat},{lng}", 'language': language}
        return self._make_request(self.GEOCODE_API_URL, params)

    def get_elevation(self, lat, lng):
        """
        Gets the elevation for a pair of coordinates.
        """
        params = {'locations': f"{lat},{lng}"}
        return self._make_request(self.ELEVATION_API_URL, params)

    def get_timezone(self, lat, lng):
        """
        Gets the time zone information for a pair of coordinates.
        """
        params = {
            'location': f"{lat},{lng}",
            'timestamp': int(time.time()) 
        }
        return self._make_request(self.TIMEZONE_API_URL, params)

    def get_current_conditions(self, lat, lng, units='IMPERIAL'):
        """
        Gets the current weather conditions.
        Units can be 'IMPERIAL' or 'METRIC'.
        """
        url = f"{self.WEATHER_API_URL}/currentConditions:lookup"
        params = {'location.latitude': lat, 'location.longitude': lng, 'unitsSystem': units}
        return self._make_request(url, params)

    def get_daily_forecast(self, lat, lng, units='IMPERIAL', days=None):
        """
        Gets the daily weather forecast.
        Can specify the number of days (e.g., days=6).
        """
        url = f"{self.WEATHER_API_URL}/forecast/days:lookup"
        params = {'location.latitude': lat, 'location.longitude': lng, 'unitsSystem': units}
        # Add the 'days' parameter only if it's provided
        if days is not None:
            params['days'] = days
        return self._make_request(url, params)

    def get_hourly_forecast(self, lat, lng, units='IMPERIAL', hours=None):
        """
        Gets the hourly weather forecast.
        Can specify the number of hours (e.g., hours=25).
        """
        url = f"{self.WEATHER_API_URL}/forecast/hours:lookup"
        params = {'location.latitude': lat, 'location.longitude': lng, 'unitsSystem': units}
        # Add the 'hours' parameter only if it's provided
        if hours is not None:
            params['hours'] = hours
        return self._make_request(url, params)

    def get_hourly_history(self, lat, lng, units='IMPERIAL', hours=None):
        """
        Gets the hourly weather history.
        Can specify the number of hours (e.g., hours=3).
        """
        url = f"{self.WEATHER_API_URL}/history/hours:lookup"
        params = {'location.latitude': lat, 'location.longitude': lng, 'unitsSystem': units}
        # Add the 'hours' parameter only if it's provided
        if hours is not None:
            params['hours'] = hours
        return self._make_request(url, params)


In [107]:
# Initialize OpenAI client
client = openai.OpenAI(api_key=OPENAI_API_KEY)

# Initialize Google Maps client
try:
    maps_client = GoogleMapsClient(api_key=GOOGLE_MAPS_API_KEY)
except ValueError as e:
    print(e)


In [108]:
def extract_locations(user_input):
    """
    Uses OpenAI to extract and consolidate location names from a user's natural language input.
    """
    try:
        client = openai.OpenAI(api_key=OPENAI_API_KEY)
        prompt = f"""
        You are an expert geographer at identifying and consolidating location information from text.
        Your task is to extract locations and combine them into the most specific strings possible
        for geocoding. If a specific place (like a building, park, or address) is mentioned
        with its city or region, you MUST combine them into a single string. Do not split
        a single conceptual place into multiple parts.

        Your answer MUST be a JSON object with a single key named "result", which contains an
        array of the final location strings.

        Example 1:
        - User query: 'What is the weather forecast for the area around the Northeast Medical Building in Tuscaloosa?'
        - Correct output: {{"result": ["Northeast Medical Building, Tuscaloosa"]}}

        Example 2:
        - User query: 'I want to know the elevation of the Eiffel Tower and the weather in Rome.'
        - Correct output: {{"result": ["Eiffel Tower, Paris", "Rome"]}}

        Now, process the following query:
        User query: '{user_input}'
        """

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful and precise location extraction assistant that consolidates location information."},
                {"role": "user", "content": prompt}
            ],
            response_format={"type": "json_object"},
        )

        content = response.choices[0].message.content
        locations = json.loads(content)
        return locations
    except openai.APIError as e:
        print(f"OpenAI API Error: {e}")
        return None
    except json.JSONDecodeError:
        print(f"Error: OpenAI did not return valid JSON. Response was: {content}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred with OpenAI: {e}")
        return None

In [109]:
def extract_coordinates(user_query):
    """
    Extracts locations from a query, geocodes them, and returns a list of
    dictionaries containing location information.
    """
    print(f"Processing user query: '{user_query}'\n")
    
    geocoded_locations = []

    locations = extract_locations(user_query)
    if not locations or 'result' not in locations or not locations['result']:
        print("No locations were identified in the user query.")
        return geocoded_locations

    print(f"Locations identified by OpenAI: {locations['result']}\n")

    for location_name in locations['result']:
        print(f"--- Geocoding: {location_name} ---")
        
        geo_data = maps_client.geocode_by_address(location_name)
        if not geo_data or not geo_data.get('results'):
            print(f"Could not geocode '{location_name}'. Moving to the next location.\n")
            continue
            
        first_result = geo_data['results'][0]
        lat = first_result['geometry']['location']['lat']
        lng = first_result['geometry']['location']['lng']
        formatted_address = first_result.get('formatted_address', 'N/A')

        print(f"Coordinates: Lat={lat}, Lng={lng}")
        print(f"Formatted Address: {formatted_address}\n")

        location_info = {
            'name': location_name,
            'formatted_address': formatted_address,
            'latitude': lat,
            'longitude': lng
        }
        
        geocoded_locations.append(location_info)
    
    return geocoded_locations

## Query Database for Flood Contextual Data

In [110]:
# Databes connection
conn = psycopg2.connect(
    host=PG_HOST,
    database=PG_DB,
    user=PG_USER,
    password=PG_PASS
)

def execute_query(conn, query, params=None, fetch=False):
    cur = conn.cursor()
    """Execute a SQL query with optional parameters."""
    try:
        if params:
            cur.execute(query, params)
        else:
            cur.execute(query)
        if fetch:
            result = cur.fetchall()
            return result
        else:
            conn.commit()
            return cur.rowcount
    except Exception as e:
        print(f"Error executing query: {e}")
        conn.rollback()
        return None
    finally:
        cur.close()


In [111]:
def get_county_info(connection, lat, lon):
    """
    Finds the county that contains the given coordinates.
    """
    query = """
        SELECT c.fips_county_code, c.County, s.State, c.areaSQMI
        FROM flai.TCLCounties c
        JOIN flai.TCLStates s ON c.idState = s.idState
        WHERE ST_Intersects(c.geometry, ST_Transform(ST_SetSRID(ST_MakePoint(%s, %s), 4326), 5070));
    """
    # Note: ST_MakePoint expects (longitude, latitude)
    result = execute_query(connection, query, params=(lon, lat), fetch=True)
    
    if result:
        row = result[0]
        return {
            "fips_code": row[0],
            "county_name": row[1],
            "state_name": row[2],
            "area_sqmi": float(row[3])
        }
    return None

In [112]:
def get_precipitation_history(connection, fips_code):
    """
    Retrieves the monthly precipitation history for a given county.
    """
    query = """
        SELECT year, month, totalPrecipitation_in
        FROM flai.TBLMonthlyPrecipitation
        WHERE fips_county_code = %s
        ORDER BY year, month;
    """
    results = execute_query(connection, query, params=(fips_code,), fetch=True)
    return [
        {"year": row[0], "month": row[1], "precipitation_in": float(row[2])}
        for row in results
    ]

In [113]:
def get_flood_history(connection, fips_code, maps_client, user_lat, user_lon):
    """
    Retrieves a detailed list of historical flood events for a given county,
    calculates the distance from a user-specified point, and sorts the results
    by proximity (nearest first).
    """
    # This query now calculates the distance in meters between the user's point
    # and each flood event's point, then sorts by that distance.
    query = """
        SELECT
            et.EventType,
            e.beginDate,
            e.warning_zone,
            c.County,
            ST_Y(e.geometry) AS latitude,
            ST_X(e.geometry) AS longitude,
            -- Calculate distance in meters using the geography type for accuracy
            ST_Distance(
                e.geometry::geography,
                ST_SetSRID(ST_MakePoint(%s, %s), 4326)::geography
            ) as distance_meters
        FROM flai.TBLFloodEvents e
        JOIN flai.TCLEventTypes et ON e.idEventType = et.idEventType
        LEFT JOIN flai.TCLCounties c ON e.fips_county_code = c.fips_county_code
        WHERE e.fips_county_code = %s
        -- Order by the calculated distance, nearest first
        ORDER BY distance_meters ASC;
    """
    # Note the new order of parameters to match the query: user_lon, user_lat, then fips_code
    params = (user_lon, user_lat, fips_code)
    results = execute_query(connection, query, params=params, fetch=True)
    
    event_list = []
    if not results:
        return event_list

    print(f"Found {len(results)} historical flood events. Sorting by distance and reverse geocoding...")
    for row in results:
        lat = row[4]
        lon = row[5]
        distance_meters = row[6]
        
        # Convert meters to miles
        distance_miles = distance_meters * 0.000621371
        
        address = "N/A"
        if lat and lon:
            geo_data = maps_client.reverse_geocode(lat, lon)
            if geo_data and geo_data.get('results'):
                address = geo_data['results'][0]['formatted_address']

        event_details = {
            "type": row[0],
            "date": row[1].isoformat(),
            "distance_from_query_point_miles": round(distance_miles, 2), # Add the new field
            "warning_zone": row[2],
            "county": row[3] if row[3] else "Not Assigned (e.g., Offshore)",
            "location": {
                "latitude": lat,
                "longitude": lon
            },
            "nearest_address": address
        }
        event_list.append(event_details)
        
    return event_list

In [114]:
def get_flood_history_old(connection, fips_code, maps_client):
    """
    Retrieves a detailed, chronological list of historical flood events for a given county,
    enriching each event with a reverse-geocoded address.
    """
    query = """
        SELECT
            et.EventType,
            e.beginDate,
            e.warning_zone,
            c.County,
            ST_Y(e.geometry) AS latitude,  -- Extract Latitude (Y coordinate)
            ST_X(e.geometry) AS longitude -- Extract Longitude (X coordinate)
        FROM flai.TBLFloodEvents e
        JOIN flai.TCLEventTypes et ON e.idEventType = et.idEventType
        -- Use LEFT JOIN to include events that may not have a county (e.g., offshore)
        LEFT JOIN flai.TCLCounties c ON e.fips_county_code = c.fips_county_code
        WHERE e.fips_county_code = %s
        ORDER BY e.beginDate ASC; -- Order chronologically
    """
    results = execute_query(connection, query, params=(fips_code,), fetch=True)
    
    event_list = []
    if not results:
        return event_list

    print(f"Found {len(results)} historical flood events. Reverse geocoding...")
    for row in results:
        lat = row[4]
        lon = row[5]
        
        # Perform reverse geocoding for each event
        address = "N/A" # Default value
        if lat and lon:
            geo_data = maps_client.reverse_geocode(lat, lon)
            if geo_data and geo_data.get('results'):
                # Get the first, most relevant address
                address = geo_data['results'][0]['formatted_address']

        event_details = {
            "type": row[0],
            "date": row[1].isoformat(), # Convert date object to YYYY-MM-DD string
            "warning_zone": row[2],
            "county": row[3] if row[3] else "Not Assigned (e.g., Offshore)",
            "location": {
                "latitude": lat,
                "longitude": lon
            },
            "nearest_address": address
        }
        event_list.append(event_details)
        
    return event_list

In [115]:
def get_svi_data(connection, fips_code, release_year=2022):
    """
    Retrieves the Social Vulnerability Index (SVI) data for a given county and year.
    """
    query = """
        SELECT
            s.overallNational,
            s.overallState,
            t.Theme,
            v.SVIVariable,
            s.SVIValue
        FROM flai.TBLSVI s
        JOIN flai.TCLSVIThemes t ON s.idSVITheme = t.idSVITheme
        LEFT JOIN flai.TCLSVIVariables v ON s.idSVIVariable = v.idSVIVariable
        WHERE s.fips_county_code = %s AND s.release_year = %s;
    """
    results = execute_query(connection, query, params=(fips_code, release_year), fetch=True)
    
    if not results:
        return None

    # Structure the SVI data into a clean dictionary
    svi_data = {
        "release_year": release_year,
        "overall_ranking": {
            "national": float(results[0][0]) if results[0][0] is not None else None,
            "state": float(results[0][1]) if results[0][1] is not None else None
        },
        "themes": {},
        "variables": {}
    }

    for row in results:
        theme_name = row[2]
        variable_name = row[3]
        svi_value = float(row[4]) if row[4] is not None else None
        
        if variable_name is None: # This is a theme-level record
            svi_data["themes"][theme_name] = svi_value
        else: # This is a variable-level record
            svi_data["variables"][variable_name] = svi_value
            
    return svi_data

In [116]:
def get_contextual_data_for_locations(geocoded_locations, maps_client):
    """
    Main orchestrator function. Takes a list of geocoded locations and
    enriches each with data from the local database.
    """
    enriched_data = []
    
    for location in geocoded_locations:
        lat = location['latitude']
        lon = location['longitude']
        print(f"--- Fetching contextual data for: {location['name']} ({lat}, {lon}) ---")
        
        county_info = get_county_info(conn, lat, lon)
        
        if not county_info:
            print(f"Location '{location['name']}' is not within a known county. Skipping.\n")
            enriched_data.append({
                "input_location": location,
                "status": "No county found"
            })
            continue
            
        fips_code = county_info['fips_code']
        print(f"Found County: {county_info['county_name']} ({fips_code})")
        
        # Assemble all the data into one object
        location_context = {
            "input_location": location,
            "county_data": county_info,
            "precipitation_history": get_precipitation_history(conn, fips_code),
            "flood_event_history": get_flood_history(conn, fips_code, maps_client, lat, lon),
            "social_vulnerability_index": get_svi_data(conn, fips_code, release_year=2022)
        }
        
        enriched_data.append(location_context)
        print("Successfully fetched all data.\n")

    return enriched_data
    

In [117]:
# user_query = "What is the weather forecast for the area around the Northeast Medical Building in Tuscaloosa and what was it like yesterday?"
# user_query = "Compare the current weather at the University of Alabama stadium with the forecast for Northeast Medical Building in Tuscaloosa."
user_query = "Search for information about flood history at 10 Thornbury Rd, Tuscaloosa, AL"

geocoded_results = extract_coordinates(user_query)

print("="*40)
print("Final object returned by the function:")
print(json.dumps(geocoded_results, indent=2))
print("="*40)

if geocoded_results:
    # Step 2: Use coordinates to get all contextual data from the database
    final_contextual_data = get_contextual_data_for_locations(geocoded_results, maps_client)
    
    print("="*50)
    print("FINAL ENRICHED DATA OBJECT")
    print("="*50)
    # Use json.dumps for pretty printing the final complex object
    print(json.dumps(final_contextual_data, indent=2))
else:
    print("Could not get coordinates, unable to fetch contextual data.")


Processing user query: 'Search for information about flood history at 10 Thornbury Rd, Tuscaloosa, AL'

Locations identified by OpenAI: ['10 Thornbury Rd, Tuscaloosa, AL']

--- Geocoding: 10 Thornbury Rd, Tuscaloosa, AL ---
Coordinates: Lat=33.2532583, Lng=-87.5124441
Formatted Address: 10 Thornbury Rd, Tuscaloosa, AL 35406, USA

Final object returned by the function:
[
  {
    "name": "10 Thornbury Rd, Tuscaloosa, AL",
    "formatted_address": "10 Thornbury Rd, Tuscaloosa, AL 35406, USA",
    "latitude": 33.2532583,
    "longitude": -87.5124441
  }
]
--- Fetching contextual data for: 10 Thornbury Rd, Tuscaloosa, AL (33.2532583, -87.5124441) ---
Found County: Tuscaloosa (01125)
Found 13 historical flood events. Sorting by distance and reverse geocoding...
Successfully fetched all data.

FINAL ENRICHED DATA OBJECT
[
  {
    "input_location": {
      "name": "10 Thornbury Rd, Tuscaloosa, AL",
      "formatted_address": "10 Thornbury Rd, Tuscaloosa, AL 35406, USA",
      "latitude": 33.25

In [118]:
# Close the database connection when done
if conn:
    conn.close()
    print("\nDatabase connection closed.")


Database connection closed.
