# Genie Spaces Integration

This notebook simulates the integration of Databricks Genie Spaces with the climate analytics solution, enabling natural language interaction with climate data and insights. It demonstrates how users can ask questions in plain language and receive data-driven responses about Singapore's climate resilience.

## Setup and Imports

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import json
from datetime import datetime
import re

# In a real Databricks environment, we would use:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("GenieSpacesIntegration").getOrCreate()

# Define data directories - in Databricks these would typically be in DBFS
PROCESSED_DIR = "/dbfs/FileStore/climate_resilience/processed"
ANALYTICS_DIR = "/dbfs/FileStore/climate_resilience/analytics"
VISUALIZATION_DIR = "/dbfs/FileStore/climate_resilience/visualization"
GENIE_DIR = "/dbfs/FileStore/climate_resilience/genie_spaces"
os.makedirs(GENIE_DIR, exist_ok=True)

print("Genie Spaces Integration environment initialized.")

## Load Data for Genie Spaces

This function loads all relevant data (processed data, analytics results) for Genie Spaces to use in answering queries.

In [None]:
def load_data_for_genie():
    """
    Loads all relevant data for Genie Spaces to use in answering queries
    """
    print("Loading data for Genie Spaces...")
    
    data_dict = {}
    
    try:
        # Load processed climate data
        long_path = os.path.join(PROCESSED_DIR, "climate_data_long.csv")
        if os.path.exists(long_path):
            data_dict["climate_data_long"] = pd.read_csv(long_path)
            print(f"Loaded long format data from {long_path}")
        
        wide_path = os.path.join(PROCESSED_DIR, "climate_data_wide.csv")
        if os.path.exists(wide_path):
            data_dict["climate_data_wide"] = pd.read_csv(wide_path)
            print(f"Loaded wide format data from {wide_path}")
        
        # Load analytics results
        trend_path = os.path.join(ANALYTICS_DIR, "trend_analysis_results.csv")
        if os.path.exists(trend_path):
            data_dict["trend_analysis"] = pd.read_csv(trend_path)
            print(f"Loaded trend analysis from {trend_path}")
        
        corr_path = os.path.join(ANALYTICS_DIR, "indicator_correlations.csv")
        if os.path.exists(corr_path):
            data_dict["correlations"] = pd.read_csv(corr_path, index_col=0)
            print(f"Loaded correlations from {corr_path}")
        
        model_path = os.path.join(ANALYTICS_DIR, "predictive_model_results.csv")
        if os.path.exists(model_path):
            data_dict["model_results"] = pd.read_csv(model_path)
            print(f"Loaded model results from {model_path}")
        
        future_path = os.path.join(ANALYTICS_DIR, "future_predictions.csv")
        if os.path.exists(future_path):
            data_dict["future_predictions"] = pd.read_csv(future_path)
            print(f"Loaded future predictions from {future_path}")
        
        index_path = os.path.join(ANALYTICS_DIR, "climate_vulnerability_index.csv")
        if os.path.exists(index_path):
            data_dict["vulnerability_index"] = pd.read_csv(index_path)
            print(f"Loaded vulnerability index from {index_path}")
        
        if data_dict:
            return data_dict
        else:
            print("No data files found for Genie Spaces")
            return None
    except Exception as e:
        print(f"Error loading data for Genie: {e}")
        return None

## Create Knowledge Base

This function creates a structured knowledge base from the loaded data for Genie Spaces to use.

In [None]:
def create_knowledge_base(data_dict):
    """
    Creates a knowledge base from the data for Genie Spaces to use
    """
    print("Creating knowledge base for Genie Spaces...")
    
    if data_dict is None:
        print("No data for knowledge base")
        return None
    
    knowledge_base = {
        "indicators": {},
        "trends": {},
        "correlations": {},
        "predictions": {},
        "vulnerability": {},
        "general_facts": []
    }
    
    # Extract indicator information
    if "climate_data_long" in data_dict and data_dict["climate_data_long"] is not None:
        df_long = data_dict["climate_data_long"]
        
        # Ensure Year is numeric for calculations
        if df_long["Year"].dtype == "datetime64[ns]":
            df_long["Year_Numeric"] = df_long["Year"].dt.year
        else:
            df_long["Year_Numeric"] = pd.to_numeric(df_long["Year"], errors="coerce")
            
        df_long = df_long.dropna(subset=["Year_Numeric"])
        df_long["Year_Numeric"] = df_long["Year_Numeric"].astype(int)
        
        for indicator in df_long["Indicator"].unique():
            indicator_data = df_long[df_long["Indicator"] == indicator]
            
            if indicator_data.empty:
                continue
            
            # Get basic stats
            latest_year = indicator_data["Year_Numeric"].max()
            latest_value_series = indicator_data[indicator_data["Year_Numeric"] == latest_year]["Value"]
            latest_value = latest_value_series.iloc[0] if not latest_value_series.empty else None
            
            earliest_year = indicator_data["Year_Numeric"].min()
            earliest_value_series = indicator_data[indicator_data["Year_Numeric"] == earliest_year]["Value"]
            earliest_value = earliest_value_series.iloc[0] if not earliest_value_series.empty else None
            
            if latest_value is not None and earliest_value is not None:
                total_change = latest_value - earliest_value
                percent_change = (total_change / earliest_value) * 100 if earliest_value != 0 else float("inf")
            else:
                total_change = None
                percent_change = None
            
            # Store in knowledge base
            knowledge_base["indicators"][indicator] = {
                "latest_year": int(latest_year) if not pd.isna(latest_year) else None,
                "latest_value": float(latest_value) if latest_value is not None and not pd.isna(latest_value) else None,
                "earliest_year": int(earliest_year) if not pd.isna(earliest_year) else None,
                "earliest_value": float(earliest_value) if earliest_value is not None and not pd.isna(earliest_value) else None,
                "total_change": float(total_change) if total_change is not None and not pd.isna(total_change) else None,
                "percent_change": float(percent_change) if percent_change is not None and not pd.isna(percent_change) else None
            }
    
    # Extract trend information
    if "trend_analysis" in data_dict and data_dict["trend_analysis"] is not None:
        trend_df = data_dict["trend_analysis"]
        
        for _, row in trend_df.iterrows():
            indicator = row["Indicator"]
            
            knowledge_base["trends"][indicator] = {
                "slope": float(row["Slope"]) if not pd.isna(row["Slope"]) else None,
                "r_squared": float(row["R_Squared"]) if not pd.isna(row["R_Squared"]) else None,
                "avg_annual_change": float(row["Avg_Annual_Change"]) if not pd.isna(row["Avg_Annual_Change"]) else None,
                "total_change": float(row["Total_Change"]) if not pd.isna(row["Total_Change"]) else None,
                "percent_change": float(row["Percent_Change"]) if not pd.isna(row["Percent_Change"]) else None
            }
    
    # Extract correlation information
    if "correlations" in data_dict and data_dict["correlations"] is not None:
        corr_matrix = data_dict["correlations"]
        
        # Find strongest correlations
        for i in range(len(corr_matrix.columns)):
            for j in range(i+1, len(corr_matrix.columns)):
                indicator1 = corr_matrix.columns[i]
                indicator2 = corr_matrix.index[j] # Use index for rows
                correlation = corr_matrix.iloc[j, i] # Correct indexing
                
                if abs(correlation) > 0.5:  # Only include strong correlations
                    key = f"{indicator1}_{indicator2}"
                    knowledge_base["correlations"][key] = {
                        "indicator1": indicator1,
                        "indicator2": indicator2,
                        "correlation": float(correlation) if not pd.isna(correlation) else None,
                        "strength": "strong positive" if correlation > 0.7 else 
                                   "moderate positive" if correlation > 0.3 else
                                   "strong negative" if correlation < -0.7 else
                                   "moderate negative"
                    }
    
    # Extract prediction information
    if "future_predictions" in data_dict and data_dict["future_predictions"] is not None:
        future_df = data_dict["future_predictions"]
        
        for indicator in future_df["Indicator"].unique():
            indicator_data = future_df[future_df["Indicator"] == indicator]
            
            # Get predictions for key years
            predictions = {}
            for year in sorted(indicator_data["Year"].unique()):
                value = indicator_data[indicator_data["Year"] == year]["Predicted_Value"].iloc[0]
                predictions[int(year)] = float(value) if not pd.isna(value) else None
            
            knowledge_base["predictions"][indicator] = predictions
    
    # Extract vulnerability information
    if "vulnerability_index" in data_dict and data_dict["vulnerability_index"] is not None:
        df_index = data_dict["vulnerability_index"]
        
        if not df_index.empty:
            # Ensure Year_Numeric exists and is numeric
            if "Year_Numeric" not in df_index.columns:
                 if df_index["Year"].dtype == "datetime64[ns]":
                     df_index["Year_Numeric"] = df_index["Year"].dt.year
                 else:
                     df_index["Year_Numeric"] = pd.to_numeric(df_index["Year"], errors="coerce")
                     df_index = df_index.dropna(subset=["Year_Numeric"])
                     df_index["Year_Numeric"] = df_index["Year_Numeric"].astype(int)
            
            # Get latest vulnerability index
            latest_year = df_index["Year_Numeric"].max()
            latest_index_series = df_index[df_index["Year_Numeric"] == latest_year]["Climate_Vulnerability_Index"]
            latest_index = latest_index_series.iloc[0] if not latest_index_series.empty else None
            
            # Get trend
            earliest_year = df_index["Year_Numeric"].min()
            earliest_index_series = df_index[df_index["Year_Numeric"] == earliest_year]["Climate_Vulnerability_Index"]
            earliest_index = earliest_index_series.iloc[0] if not earliest_index_series.empty else None
            
            if latest_index is not None and earliest_index is not None:
                index_change = latest_index - earliest_index
                index_percent_change = (index_change / earliest_index) * 100 if earliest_index != 0 else float("inf")
            else:
                index_change = None
                index_percent_change = None
            
            # Determine risk level
            risk_level = "high" if latest_index is not None and latest_index > 75 else "medium" if latest_index is not None and latest_index > 50 else "low"
            
            knowledge_base["vulnerability"] = {
                "latest_year": int(latest_year) if not pd.isna(latest_year) else None,
                "latest_index": float(latest_index) if latest_index is not None and not pd.isna(latest_index) else None,
                "earliest_year": int(earliest_year) if not pd.isna(earliest_year) else None,
                "earliest_index": float(earliest_index) if earliest_index is not None and not pd.isna(earliest_index) else None,
                "change": float(index_change) if index_change is not None and not pd.isna(index_change) else None,
                "percent_change": float(index_percent_change) if index_percent_change is not None and not pd.isna(index_percent_change) else None,
                "risk_level": risk_level
            }
            
            # Get component contributions if available
            component_cols = [col for col in df_index.columns if "_Norm" in col]
            
            if component_cols:
                latest_data = df_index[df_index["Year_Numeric"] == latest_year].iloc[0]
                
                components = {}
                for col in component_cols:
                    component_name = col.replace("_Norm", "")
                    components[component_name] = float(latest_data[col]) if not pd.isna(latest_data[col]) else None
                
                knowledge_base["vulnerability"]["components"] = components
    
    # Add general facts
    knowledge_base["general_facts"] = [
        "Singapore is particularly vulnerable to climate change as an island nation.",
        "Rising sea levels pose a significant threat to Singapore's coastal infrastructure.",
        "Changing precipitation patterns can affect Singapore's water security.",
        "Increasing temperatures impact public health and energy consumption in Singapore.",
        "Climate resilience measures are critical for Singapore's sustainable development."
    ]
    
    # Save knowledge base
    kb_path = os.path.join(GENIE_DIR, "knowledge_base.json")
    try:
        with open(kb_path, "w") as f:
            json.dump(knowledge_base, f, indent=2, default=lambda x: None if pd.isna(x) else x) # Handle NaN
        print(f"Saved knowledge base to {kb_path}")
    except Exception as e:
        print(f"Error saving knowledge base: {e}")
    
    return knowledge_base

## Create Query Patterns

This function defines regular expression patterns for recognizing and responding to natural language queries.

In [None]:
def create_query_patterns():
    """
    Creates patterns for recognizing and responding to natural language queries
    """
    print("Creating query patterns for Genie Spaces...")
    
    patterns = [
        {
            "name": "indicator_value",
            "patterns": [
                r"what is the (?:current|latest) (.*?)(?:\s+in Singapore)?",
                r"what is the value of (.*?)(?:\s+in Singapore)?",
                r"how much (.*?) does Singapore have",
                r"what are the current (.*?) levels"
            ],
            "response_template": "The latest {indicator} for Singapore is {value} (as of {year})."
        },
        {
            "name": "indicator_trend",
            "patterns": [
                r"how has (.*?) changed",
                r"what is the trend (?:for|of) (.*?)(?: in Singapore)?",
                r"is (.*?) increasing or decreasing",
                r"what is the rate of change (?:for|of) (.*?)"
            ],
            "response_template": "The {indicator} in Singapore has {trend_direction} by {change} ({percent_change}%) from {start_year} to {end_year}. The average annual change is {annual_change}."
        },
        {
            "name": "indicator_prediction",
            "patterns": [
                r"what will (.*?) be in (\d{4})",
                r"predict (.*?) for (\d{4})",
                r"what is the projected (.*?) for (\d{4})",
                r"how much (.*?) will there be in (\d{4})"
            ],
            "response_template": "Based on our predictive models, the {indicator} in Singapore is projected to be {value} in {year}."
        },
        {
            "name": "indicator_correlation",
            "patterns": [
                r"(?:how|what) (?:is|are) (.*?) related to (.*?)",
                r"(?:is|are) there (?:a|any) correlation between (.*?) and (.*?)",
                r"do (.*?) and (.*?) correlate",
                r"what indicators are correlated with (.*?)"
            ],
            "response_template": "There is a {strength} correlation ({correlation}) between {indicator1} and {indicator2} in Singapore."
        },
        {
            "name": "vulnerability_index",
            "patterns": [
                r"what is the (?:current|latest) climate vulnerability(?: index)?",
                r"how vulnerable is Singapore to climate change",
                r"what is Singapore's climate risk(?: level)?",
                r"what is the climate vulnerability(?: index)? for Singapore"
            ],
            "response_template": "Singapore's Climate Vulnerability Index is currently {value} (as of {year}), which indicates a {risk_level} risk level. The index has {trend_direction} by {change} ({percent_change}%) since {start_year}."
        },
        {
            "name": "vulnerability_components",
            "patterns": [
                r"what (?:factors|components) contribute to (?:the|Singapore's) vulnerability",
                r"what are the main (?:drivers|causes) of climate vulnerability",
                r"what makes Singapore vulnerable to climate change",
                r"what components make up the vulnerability index"
            ],
            "response_template": "The main components of Singapore's Climate Vulnerability Index are: {components}. {top_component} currently has the highest contribution to the overall vulnerability."
        },
        {
            "name": "general_info",
            "patterns": [
                r"tell me about climate change in Singapore",
                r"what should I know about Singapore's climate",
                r"give me an overview of climate change impacts",
                r"what are the key climate issues for Singapore"
            ],
            "response_template": "Here are some key facts about climate change in Singapore:\n\n{facts}"
        }
    ]
    
    # Save patterns
    patterns_path = os.path.join(GENIE_DIR, "query_patterns.json")
    try:
        with open(patterns_path, "w") as f:
            json.dump(patterns, f, indent=2)
        print(f"Saved query patterns to {patterns_path}")
    except Exception as e:
        print(f"Error saving query patterns: {e}")
    
    return patterns

## Process Query

This function processes a natural language query using the knowledge base and defined patterns.

In [None]:
def process_query(query, knowledge_base, patterns):
    """
    Processes a natural language query using the knowledge base and patterns
    """
    print(f"Processing query: {query}")
    
    if knowledge_base is None or patterns is None:
        return "I'm sorry, but I don't have enough information to answer that question."
    
    # Check each pattern for a match
    for pattern_group in patterns:
        for pattern in pattern_group["patterns"]:
            match = re.search(pattern, query, re.IGNORECASE)
            
            if match:
                print(f"Matched pattern: {pattern}")
                
                if pattern_group["name"] == "indicator_value":
                    indicator_term = match.group(1).strip().lower()
                    return handle_indicator_value(indicator_term, knowledge_base, pattern_group["response_template"])
                
                elif pattern_group["name"] == "indicator_trend":
                    indicator_term = match.group(1).strip().lower()
                    return handle_indicator_trend(indicator_term, knowledge_base, pattern_group["response_template"])
                
                elif pattern_group["name"] == "indicator_prediction":
                    indicator_term = match.group(1).strip().lower()
                    year = int(match.group(2))
                    return handle_indicator_prediction(indicator_term, year, knowledge_base, pattern_group["response_template"])
                
                elif pattern_group["name"] == "indicator_correlation":
                    if len(match.groups()) >= 2:
                        indicator1 = match.group(1).strip().lower()
                        indicator2 = match.group(2).strip().lower()
                        return handle_indicator_correlation(indicator1, indicator2, knowledge_base, pattern_group["response_template"])
                    else:
                        indicator = match.group(1).strip().lower()
                        return handle_indicator_correlations_list(indicator, knowledge_base)
                
                elif pattern_group["name"] == "vulnerability_index":
                    return handle_vulnerability_index(knowledge_base, pattern_group["response_template"])
                
                elif pattern_group["name"] == "vulnerability_components":
                    return handle_vulnerability_components(knowledge_base, pattern_group["response_template"])
                
                elif pattern_group["name"] == "general_info":
                    return handle_general_info(knowledge_base, pattern_group["response_template"])
    
    # If no pattern matched, try to find the most relevant information
    return handle_fallback_query(query, knowledge_base)

## Helper Functions for Query Handling

These functions assist in finding matching indicators and formatting responses for different query types.

In [None]:
def find_matching_indicator(indicator_term, knowledge_base):
    """
    Finds the indicator in the knowledge base that best matches the query term
    """
    if "indicators" not in knowledge_base:
        return None
    
    # Check for exact match (case-insensitive)
    for indicator in knowledge_base["indicators"]:
        if indicator_term.lower() == indicator.lower():
            return indicator
            
    # Check for partial matches (contains)
    for indicator in knowledge_base["indicators"]:
        if indicator_term.lower() in indicator.lower():
            return indicator
    
    # Check for partial matches (words)
    for indicator in knowledge_base["indicators"]:
        words = indicator.lower().split()
        query_words = indicator_term.lower().split()
        if any(word in query_words for word in words):
            return indicator
    
    return None

def handle_indicator_value(indicator_term, knowledge_base, template):
    """
    Handles queries about the current value of an indicator
    """
    indicator = find_matching_indicator(indicator_term, knowledge_base)
    
    if indicator is None:
        return f"I'm sorry, but I don't have information about {indicator_term}."
    
    if indicator in knowledge_base["indicators"]:
        data = knowledge_base["indicators"][indicator]
        
        value = round(data["latest_value"], 2) if data["latest_value"] is not None else "unknown"
        year = data["latest_year"] if data["latest_year"] is not None else "the most recent year"
        
        return template.format(indicator=indicator, value=value, year=year)
    
    return f"I'm sorry, but I don't have current value information for {indicator}."

def handle_indicator_trend(indicator_term, knowledge_base, template):
    """
    Handles queries about the trend of an indicator
    """
    indicator = find_matching_indicator(indicator_term, knowledge_base)
    
    if indicator is None:
        return f"I'm sorry, but I don't have trend information about {indicator_term}."
    
    if indicator in knowledge_base["trends"] and indicator in knowledge_base["indicators"]:
        trend_data = knowledge_base["trends"][indicator]
        indicator_data = knowledge_base["indicators"][indicator]
        
        trend_direction = "increased" if trend_data["avg_annual_change"] is not None and trend_data["avg_annual_change"] > 0 else "decreased"
        change = round(trend_data["total_change"], 2) if trend_data["total_change"] is not None else "an unknown amount"
        percent_change = round(trend_data["percent_change"], 2) if trend_data["percent_change"] is not None else "an unknown percentage"
        start_year = indicator_data["earliest_year"] if indicator_data["earliest_year"] is not None else "the earliest year"
        end_year = indicator_data["latest_year"] if indicator_data["latest_year"] is not None else "the latest year"
        annual_change = round(trend_data["avg_annual_change"], 4) if trend_data["avg_annual_change"] is not None else "unknown"
        
        return template.format(
            indicator=indicator,
            trend_direction=trend_direction,
            change=change,
            percent_change=percent_change,
            start_year=start_year,
            end_year=end_year,
            annual_change=annual_change
        )
    
    return f"I'm sorry, but I don't have trend information for {indicator}."

def handle_indicator_prediction(indicator_term, year, knowledge_base, template):
    """
    Handles queries about future predictions for an indicator
    """
    indicator = find_matching_indicator(indicator_term, knowledge_base)
    
    if indicator is None:
        return f"I'm sorry, but I don't have prediction information about {indicator_term}."
    
    if indicator in knowledge_base["predictions"]:
        predictions = knowledge_base["predictions"][indicator]
        
        # Find the closest year if exact year not available
        if str(year) in predictions:
            value = predictions[str(year)]
        else:
            available_years = [int(y) for y in predictions.keys()]
            if not available_years:
                 return f"I'm sorry, but I don't have prediction information for {indicator} in {year}."
            closest_year = min(available_years, key=lambda x: abs(x - year))
            value = predictions[str(closest_year)]
            year = closest_year
        
        value_formatted = round(value, 2) if value is not None else "unknown"
        
        return template.format(indicator=indicator, value=value_formatted, year=year)
    
    return f"I'm sorry, but I don't have prediction information for {indicator} in {year}."

def handle_indicator_correlation(indicator1_term, indicator2_term, knowledge_base, template):
    """
    Handles queries about correlations between two indicators
    """
    indicator1 = find_matching_indicator(indicator1_term, knowledge_base)
    indicator2 = find_matching_indicator(indicator2_term, knowledge_base)
    
    if indicator1 is None or indicator2 is None:
        return f"I'm sorry, but I don't have correlation information about {indicator1_term} and {indicator2_term}."
    
    # Check both possible orderings of the indicators
    key1 = f"{indicator1}_{indicator2}"
    key2 = f"{indicator2}_{indicator1}"
    
    corr_data = None
    if key1 in knowledge_base["correlations"]:
        corr_data = knowledge_base["correlations"][key1]
    elif key2 in knowledge_base["correlations"]:
        corr_data = knowledge_base["correlations"][key2]
        
    if corr_data:
        strength = corr_data["strength"]
        correlation = round(corr_data["correlation"], 2) if corr_data["correlation"] is not None else "unknown"
        
        return template.format(
            indicator1=indicator1,
            indicator2=indicator2,
            strength=strength,
            correlation=correlation
        )
    
    return f"I don't have information about the correlation between {indicator1} and {indicator2}."

def handle_indicator_correlations_list(indicator_term, knowledge_base):
    """
    Handles queries about all correlations for a specific indicator
    """
    indicator = find_matching_indicator(indicator_term, knowledge_base)
    
    if indicator is None:
        return f"I'm sorry, but I don't have correlation information about {indicator_term}."
    
    correlations = []
    
    for key, corr_data in knowledge_base["correlations"].items():
        if indicator == corr_data["indicator1"] or indicator == corr_data["indicator2"]:
            other_indicator = corr_data["indicator2"] if indicator == corr_data["indicator1"] else corr_data["indicator1"]
            
            correlations.append({
                "indicator": other_indicator,
                "correlation": corr_data["correlation"] if corr_data["correlation"] is not None else 0,
                "strength": corr_data["strength"]
            })
    
    if correlations:
        # Sort by absolute correlation strength
        correlations.sort(key=lambda x: abs(x["correlation"]), reverse=True)
        
        response = f"Here are the indicators correlated with {indicator}:\n\n"
        
        for corr in correlations:
            correlation_val = round(corr["correlation"], 2) if corr["correlation"] is not None else "unknown"
            response += f"- {corr['indicator']}: {corr['strength']} correlation ({correlation_val})\n"
        
        return response
    
    return f"I don't have information about correlations for {indicator}."

def handle_vulnerability_index(knowledge_base, template):
    """
    Handles queries about the climate vulnerability index
    """
    if "vulnerability" not in knowledge_base or not knowledge_base["vulnerability"]:
        return "I'm sorry, but I don't have information about the climate vulnerability index."
    
    vuln_data = knowledge_base["vulnerability"]
    
    trend_direction = "increased" if vuln_data["change"] is not None and vuln_data["change"] > 0 else "decreased"
    value = round(vuln_data["latest_index"], 2) if vuln_data["latest_index"] is not None else "unknown"
    year = vuln_data["latest_year"] if vuln_data["latest_year"] is not None else "the most recent year"
    risk_level = vuln_data["risk_level"]
    change = round(vuln_data["change"], 2) if vuln_data["change"] is not None else "an unknown amount"
    percent_change = round(vuln_data["percent_change"], 2) if vuln_data["percent_change"] is not None else "an unknown percentage"
    start_year = vuln_data["earliest_year"] if vuln_data["earliest_year"] is not None else "the earliest year"
    
    return template.format(
        value=value,
        year=year,
        risk_level=risk_level,
        trend_direction=trend_direction,
        change=change,
        percent_change=percent_change,
        start_year=start_year
    )

def handle_vulnerability_components(knowledge_base, template):
    """
    Handles queries about the components of the vulnerability index
    """
    if "vulnerability" not in knowledge_base or "components" not in knowledge_base["vulnerability"]:
        return "I'm sorry, but I don't have information about the components of the climate vulnerability index."
    
    components = knowledge_base["vulnerability"]["components"]
    
    # Format component list
    component_list = ", ".join([f"{comp.replace('_', ' ')}" for comp in components.keys()])
    
    # Find top component
    valid_components = {k: v for k, v in components.items() if v is not None}
    if valid_components:
        top_component = max(valid_components.items(), key=lambda x: x[1])
        top_component_name = top_component[0].replace('_', ' ')
    else:
        top_component_name = "N/A"
    
    return template.format(components=component_list, top_component=top_component_name)

def handle_general_info(knowledge_base, template):
    """
    Handles general queries about climate change in Singapore
    """
    if "general_facts" not in knowledge_base or not knowledge_base["general_facts"]:
        return "I'm sorry, but I don't have general information about climate change in Singapore."
    
    facts = "\n".join([f"- {fact}" for fact in knowledge_base["general_facts"]])
    
    return template.format(facts=facts)

def handle_fallback_query(query, knowledge_base):
    """
    Handles queries that don't match any specific pattern
    """
    # Look for indicator names in the query
    if "indicators" in knowledge_base:
        for indicator in knowledge_base["indicators"]:
            if indicator.lower() in query.lower():
                # Return basic information about the indicator
                return handle_indicator_value(indicator, knowledge_base, "The latest {indicator} for Singapore is {value} (as of {year}).")
    
    # Check for vulnerability-related terms
    vulnerability_terms = ["vulnerability", "risk", "resilience", "climate change"]
    for term in vulnerability_terms:
        if term in query.lower():
            return handle_vulnerability_index(knowledge_base, "Singapore's Climate Vulnerability Index is currently {value} (as of {year}), which indicates a {risk_level} risk level.")
    
    # Default response
    return "I'm sorry, but I don't understand your question. You can ask about specific climate indicators, trends, correlations, predictions, or the climate vulnerability index for Singapore."

## Create Example Queries

This function generates example queries based on the available data in the knowledge base.

In [None]:
def create_example_queries(knowledge_base):
    """
    Creates example queries based on the available data
    """
    print("Creating example queries for Genie Spaces...")
    
    examples = []
    
    if knowledge_base is None:
        return ["What is the temperature?"] # Default if no KB
    
    # Add indicator value queries
    if "indicators" in knowledge_base and knowledge_base["indicators"]:
        indicators = list(knowledge_base["indicators"].keys())
        if indicators:
            examples.append(f"What is the current {indicators[0]} in Singapore?")
            if len(indicators) > 1:
                examples.append(f"What is the value of {indicators[1]}?")
    
    # Add trend queries
    if "trends" in knowledge_base and knowledge_base["trends"]:
        trends = list(knowledge_base["trends"].keys())
        if trends:
            examples.append(f"How has {trends[0]} changed over time?")
            if len(trends) > 1:
                examples.append(f"Is {trends[1]} increasing or decreasing?")
    
    # Add prediction queries
    if "predictions" in knowledge_base and knowledge_base["predictions"]:
        predictions = list(knowledge_base["predictions"].keys())
        if predictions:
            examples.append(f"What will {predictions[0]} be in 2030?")
            if len(predictions) > 1:
                examples.append(f"Predict {predictions[1]} for 2025.")
    
    # Add correlation queries
    if "correlations" in knowledge_base and knowledge_base["correlations"]:
        correlations = list(knowledge_base["correlations"].keys())
        if correlations:
            corr = knowledge_base["correlations"][correlations[0]]
            examples.append(f"How is {corr['indicator1']} related to {corr['indicator2']}?")
            examples.append(f"What indicators are correlated with {corr['indicator1']}?")
    
    # Add vulnerability queries
    examples.append("What is the current climate vulnerability index for Singapore?")
    examples.append("What factors contribute to Singapore's vulnerability to climate change?")
    
    # Add general queries
    examples.append("Tell me about climate change in Singapore.")
    examples.append("What are the key climate issues for Singapore?")
    
    # Save examples
    examples_path = os.path.join(GENIE_DIR, "example_queries.json")
    try:
        with open(examples_path, "w") as f:
            json.dump(examples, f, indent=2)
        print(f"Saved example queries to {examples_path}")
    except Exception as e:
        print(f"Error saving example queries: {e}")
    
    return examples

## Create Simulated Genie Interface (HTML)

This function creates a simple HTML file to simulate the Genie Spaces interface for demonstration purposes.

In [None]:
def create_genie_interface(examples):
    """
    Creates a simulated Genie Spaces interface HTML file
    """
    print("Creating Genie Spaces interface...")
    
    # Create HTML interface
    html = f"""<!DOCTYPE html>
<html>
<head>
    <title>Genie Spaces - Singapore Climate Resilience</title>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <style>
        body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }}
        .header {{ background-color: #0072C6; color: white; padding: 20px; text-align: center; }}
        .container {{ max-width: 800px; margin: 0 auto; padding: 20px; }}
        .genie-interface {{ background-color: white; border-radius: 8px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); padding: 20px; margin-bottom: 20px; }}
        .query-input {{ display: flex; margin-bottom: 20px; }}
        .query-input input {{ flex: 1; padding: 10px; border: 1px solid #ccc; border-radius: 4px 0 0 4px; font-size: 16px; }}
        .query-input button {{ padding: 10px 15px; background-color: #0072C6; color: white; border: none; border-radius: 0 4px 4px 0; cursor: pointer; font-size: 16px; }}
        .conversation {{ max-height: 400px; overflow-y: auto; border: 1px solid #eee; padding: 10px; border-radius: 4px; }}
        .message {{ margin-bottom: 15px; padding: 10px; border-radius: 4px; }}
        .user-message {{ background-color: #e6f7ff; text-align: right; margin-left: 20%; }}
        .genie-message {{ background-color: #f0f0f0; margin-right: 20%; }}
        .examples {{ background-color: white; border-radius: 8px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); padding: 20px; }}
        .example-query {{ padding: 10px; margin: 5px 0; background-color: #f0f7ff; border-radius: 4px; cursor: pointer; }}
        .example-query:hover {{ background-color: #d0e7ff; }}
    </style>
</head>
<body>
    <div class="header">
        <h1>Genie Spaces</h1>
        <p>Singapore Climate Resilience - Natural Language Analytics</p>
    </div>
    
    <div class="container">
        <div class="genie-interface">
            <h2>Ask Questions About Singapore's Climate Data</h2>
            <p>Use natural language to explore climate indicators, trends, and insights</p>
            
            <div class="query-input">
                <input type="text" id="query-input" placeholder="Ask a question about climate data...">
                <button id="query-button">Ask</button>
            </div>
            
            <div class="conversation" id="conversation">
                <div class="message genie-message">
                    Hello! I'm your climate data assistant. How can I help you understand Singapore's climate resilience data today?
                </div>
            </div>
        </div>
        
        <div class="examples">
            <h2>Example Questions</h2>
            <p>Click on any example to ask it:</p>
            
            <div id="example-queries">
                {chr(10).join([f'<div class="example-query" onclick="askExample(this.innerText)">{example}</div>' for example in examples])}
            </div>
        </div>
    </div>
    
    <script>
        // Simulated responses based on query patterns
        function simulateResponse(query) {{ 
            // This is a simplified simulation - in a real implementation, this would call the Databricks Genie API
            // or use the process_query function defined in this notebook.
            
            if (query.match(/temperature|temp/i)) {{ return "The latest Average Temperature for Singapore is 28.5°C (as of 2023). This represents an increase of 1.2°C since 1960."; }}
            else if (query.match(/co2|carbon|emissions/i)) {{ return "CO2 Emissions in Singapore have increased by 2.8 tons per capita (175%) from 1960 to 2023. The average annual increase is 0.044 tons per capita."; }}
            else if (query.match(/sea level/i) && query.match(/2030/i)) {{ return "Based on our predictive models, the Sea Level Rise in Singapore is projected to be 25.3 cm in 2030."; }}
            else if (query.match(/vulnerability|index/i)) {{ return "Singapore's Climate Vulnerability Index is currently 68.5 (as of 2023), which indicates a medium risk level. The index has increased by 15.3 (28.7%) since 1990."; }}
            else if (query.match(/factors|components|contribute/i) && query.match(/vulnerability/i)) {{ return "The main components of Singapore's Climate Vulnerability Index are: Temperature, Rainfall, CO2 Emissions, and Sea Level Rise. Temperature currently has the highest contribution to the overall vulnerability."; }}
            else if (query.match(/tell|about|overview|key/i) && query.match(/climate|change/i)) {{ return "Here are some key facts about climate change in Singapore:\n\n- Singapore is particularly vulnerable to climate change as an island nation.\n- Rising sea levels pose a significant threat to Singapore's coastal infrastructure.\n- Changing precipitation patterns can affect Singapore's water security.\n- Increasing temperatures impact public health and energy consumption in Singapore.\n- Climate resilience measures are critical for Singapore's sustainable development."; }}
            else if (query.match(/related|correlation|correlate/i)) {{ return "There is a strong positive correlation (0.85) between Temperature and CO2 Emissions in Singapore."; }}
            else {{ return "I understand you're asking about " + query + ". To provide a specific answer, I would need to analyze the relevant climate data. In a full implementation, this would be processed through Databricks Genie Spaces to give you precise insights based on the data."; }}
        }}
        
        function addMessage(text, isUser) {{ 
            const conversation = document.getElementById('conversation');
            const message = document.createElement('div');
            message.className = isUser ? 'message user-message' : 'message genie-message';
            message.innerText = text;
            conversation.appendChild(message);
            conversation.scrollTop = conversation.scrollHeight;
        }}
        
        function askQuery() {{ 
            const input = document.getElementById('query-input');
            const query = input.value.trim();
            if (query) {{ 
                addMessage(query, true);
                setTimeout(() => {{ const response = simulateResponse(query); addMessage(response, false); }}, 1000);
                input.value = '';
            }}
        }}
        
        function askExample(text) {{ document.getElementById('query-input').value = text; askQuery(); }}
        
        document.getElementById('query-button').addEventListener('click', askQuery);
        document.getElementById('query-input').addEventListener('keypress', function(e) {{ if (e.key === 'Enter') {{ askQuery(); }} }});
    </script>
</body>
</html>
"""
    
    # Save interface
    interface_path = os.path.join(GENIE_DIR, "genie_interface.html")
    try:
        with open(interface_path, "w") as f:
            f.write(html)
        print(f"Created Genie Spaces interface at {interface_path}")
    except Exception as e:
        print(f"Error creating Genie interface: {e}")

## Main Function

This function orchestrates the Genie Spaces integration setup.

In [None]:
def main():
    """
    Main function to orchestrate the Genie Spaces integration
    """
    print(f"Starting Genie Spaces integration setup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Load data for Genie
    data_dict = load_data_for_genie()
    
    if data_dict is not None:
        # Create knowledge base
        knowledge_base = create_knowledge_base(data_dict)
        
        # Create query patterns
        patterns = create_query_patterns()
        
        # Create example queries
        examples = create_example_queries(knowledge_base)
        
        # Create Genie interface
        create_genie_interface(examples)
        
        # Test some example queries using the process_query function
        print("\nTesting example queries with process_query function:")
        for i, example in enumerate(examples[:3]):  # Test first 3 examples
            print(f"\nQuery {i+1}: {example}")
            response = process_query(example, knowledge_base, patterns)
            print(f"Response: {response}")
        
        print(f"\nGenie Spaces integration setup completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print("Genie Spaces knowledge base, patterns, and interface are ready.")
    else:
        print("Genie Spaces integration setup failed: No data available")

## Execute Genie Spaces Integration Setup

In [None]:
# Run the Genie Spaces integration setup
main()