# Climate Data Visualization

This notebook focuses on creating visualizations based on the processed and analyzed climate data. In a Databricks environment, these visualizations would typically be built using Databricks AI/BI Dashboards for interactivity. This notebook demonstrates how to generate key visualizations programmatically.

## Setup and Imports

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# In a real Databricks environment, we would use:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("ClimateDataVisualization").getOrCreate()

# Define data directories - in Databricks these would typically be in DBFS
PROCESSED_DIR = "/dbfs/FileStore/climate_resilience/processed"
ANALYTICS_DIR = "/dbfs/FileStore/climate_resilience/analytics"
VISUALIZATION_DIR = "/dbfs/FileStore/climate_resilience/visualization"
os.makedirs(VISUALIZATION_DIR, exist_ok=True)

# Set up plotting style
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("viridis")

print("Climate Data Visualization environment initialized.")

## Load Data for Visualization

This function loads the processed data and analytics results needed for visualization.

In [None]:
def load_data_for_visualization():
    """
    Loads processed data and analytics results for visualization
    """
    print("Loading data for visualization...")
    
    data_dict = {}
    
    try:
        # Load processed data (long and wide formats)
        long_path = os.path.join(PROCESSED_DIR, "climate_data_long.csv")
        wide_path = os.path.join(PROCESSED_DIR, "climate_data_wide.csv")
        
        if os.path.exists(long_path):
            data_dict["long_data"] = pd.read_csv(long_path)
            print(f"Loaded long format data from {long_path}")
        
        if os.path.exists(wide_path):
            data_dict["wide_data"] = pd.read_csv(wide_path)
            print(f"Loaded wide format data from {wide_path}")
            
        # Load analytics results
        trend_path = os.path.join(ANALYTICS_DIR, "trend_analysis_results.csv")
        corr_path = os.path.join(ANALYTICS_DIR, "indicator_correlations.csv")
        future_path = os.path.join(ANALYTICS_DIR, "future_predictions.csv")
        vuln_path = os.path.join(ANALYTICS_DIR, "climate_vulnerability_index.csv")
        
        if os.path.exists(trend_path):
            data_dict["trend_results"] = pd.read_csv(trend_path)
            print(f"Loaded trend analysis results from {trend_path}")
            
        if os.path.exists(corr_path):
            data_dict["correlation_matrix"] = pd.read_csv(corr_path, index_col=0)
            print(f"Loaded correlation matrix from {corr_path}")
            
        if os.path.exists(future_path):
            data_dict["future_predictions"] = pd.read_csv(future_path)
            print(f"Loaded future predictions from {future_path}")
            
        if os.path.exists(vuln_path):
            data_dict["vulnerability_index"] = pd.read_csv(vuln_path)
            print(f"Loaded vulnerability index data from {vuln_path}")
            
        if data_dict:
            return data_dict
        else:
            print("No data files found for visualization")
            return None
    except Exception as e:
        print(f"Error loading data for visualization: {e}")
        return None

## Create Executive Dashboard Visualizations

This function generates key visualizations for an executive dashboard summary.

In [None]:
def create_executive_dashboard_visuals(data_dict):
    """
    Generates key visualizations for an executive dashboard summary
    """
    print("Creating executive dashboard visualizations...")
    
    if data_dict is None:
        print("No data for executive dashboard")
        return
    
    # Key Performance Indicators (KPIs)
    kpis = {}
    if "vulnerability_index" in data_dict and data_dict["vulnerability_index"] is not None:
        vuln_df = data_dict["vulnerability_index"]
        latest_vuln = vuln_df.iloc[-1]["Climate_Vulnerability_Index"]
        kpis["Latest Vulnerability Index"] = round(latest_vuln, 2)
        
    if "trend_results" in data_dict and data_dict["trend_results"] is not None:
        trend_df = data_dict["trend_results"]
        temp_trend = trend_df[trend_df["Indicator"].str.contains("Temperature")]
        if not temp_trend.empty:
            kpis["Avg Annual Temp Change (°C)"] = round(temp_trend.iloc[0]["Avg_Annual_Change"], 3)
            
        co2_trend = trend_df[trend_df["Indicator"].str.contains("CO2")]
        if not co2_trend.empty:
            kpis["Avg Annual CO2 Change (tons/capita)"] = round(co2_trend.iloc[0]["Avg_Annual_Change"], 3)
            
    print(f"Executive KPIs: {kpis}")
    
    # Create KPI visualization (using Plotly for better dashboard integration)
    fig_kpi = go.Figure()
    
    domain_x = [0, 0.3, 0.6, 0.9]
    for i, (key, value) in enumerate(kpis.items()):
        fig_kpi.add_trace(go.Indicator(
            mode = "number",
            value = value,
            title = {"text": key, "font": {"size": 14}},
            domain = {"x": [domain_x[i], domain_x[i]+0.25], "y": [0, 1]},
            number = {"font": {"size": 36}}
        ))
        
    fig_kpi.update_layout(
        title="Key Climate Resilience Indicators for Singapore",
        height=200,
        margin=dict(l=20, r=20, t=50, b=20)
    )
    
    kpi_path = os.path.join(VISUALIZATION_DIR, "executive_kpis.html")
    fig_kpi.write_html(kpi_path)
    print(f"Saved KPI visualization to {kpi_path}")
    
    # Create overall vulnerability trend plot
    if "vulnerability_index" in data_dict and data_dict["vulnerability_index"] is not None:
        vuln_df = data_dict["vulnerability_index"]
        fig_vuln = px.line(vuln_df, x="Year_Numeric", y="Climate_Vulnerability_Index", 
                         title="Climate Vulnerability Index Trend for Singapore",
                         labels={"Year_Numeric": "Year", "Climate_Vulnerability_Index": "Vulnerability Index"})
        fig_vuln.update_layout(hovermode="x unified")
        
        vuln_trend_path = os.path.join(VISUALIZATION_DIR, "vulnerability_trend.html")
        fig_vuln.write_html(vuln_trend_path)
        print(f"Saved vulnerability trend plot to {vuln_trend_path}")
        
    # Create key indicator trends plot
    if "long_data" in data_dict and data_dict["long_data"] is not None:
        df_long = data_dict["long_data"]
        key_indicators = ["Average Temperature", "CO2 Emissions", "Rainfall", "Sea Level Rise"]
        df_key = df_long[df_long["Indicator"].isin(key_indicators)]
        
        if not df_key.empty:
            # Ensure Year is datetime
            if df_key["Year"].dtype != "datetime64[ns]":
                 df_key["Year"] = pd.to_datetime(df_key["Year"].astype(str), format="%Y")
            
            fig_trends = px.line(df_key, x="Year", y="Value", color="Indicator", facet_row="Indicator",
                               title="Trends of Key Climate Indicators for Singapore",
                               labels={"Year": "Year", "Value": "Indicator Value"}, height=800)
            fig_trends.update_yaxes(matches=None) # Allow different y-axis scales
            fig_trends.update_layout(hovermode="x unified")
            
            key_trends_path = os.path.join(VISUALIZATION_DIR, "key_indicator_trends.html")
            fig_trends.write_html(key_trends_path)
            print(f"Saved key indicator trends plot to {key_trends_path}")

## Create Trend Analysis Dashboard Visualizations

This function generates visualizations for a detailed trend analysis dashboard.

In [None]:
def create_trend_dashboard_visuals(data_dict):
    """
    Generates visualizations for a detailed trend analysis dashboard
    """
    print("Creating trend analysis dashboard visualizations...")
    
    if data_dict is None or "long_data" not in data_dict or "trend_results" not in data_dict:
        print("Missing data for trend dashboard")
        return
    
    df_long = data_dict["long_data"]
    trend_df = data_dict["trend_results"]
    
    # Ensure Year is datetime
    if df_long["Year"].dtype != "datetime64[ns]":
        df_long["Year"] = pd.to_datetime(df_long["Year"].astype(str), format="%Y")
        
    # Create interactive trend plot for all indicators
    fig_all_trends = px.line(df_long, x="Year", y="Value", color="Indicator",
                           title="Detailed Trends of Climate Indicators for Singapore",
                           labels={"Year": "Year", "Value": "Indicator Value"})
    fig_all_trends.update_layout(hovermode="x unified")
    
    all_trends_path = os.path.join(VISUALIZATION_DIR, "all_indicator_trends.html")
    fig_all_trends.write_html(all_trends_path)
    print(f"Saved detailed trends plot to {all_trends_path}")
    
    # Create bar chart of average annual change
    trend_df_sorted = trend_df.sort_values("Avg_Annual_Change", ascending=False)
    fig_annual_change = px.bar(trend_df_sorted, x="Indicator", y="Avg_Annual_Change",
                             title="Average Annual Change by Climate Indicator",
                             labels={"Indicator": "Climate Indicator", "Avg_Annual_Change": "Average Annual Change"})
    fig_annual_change.update_layout(xaxis_tickangle=-45)
    
    annual_change_path = os.path.join(VISUALIZATION_DIR, "average_annual_change.html")
    fig_annual_change.write_html(annual_change_path)
    print(f"Saved average annual change plot to {annual_change_path}")
    
    # Create scatter plot of R-squared vs Slope
    fig_r2_slope = px.scatter(trend_df, x="Slope", y="R_Squared", color="Indicator",
                          hover_name="Indicator", size="Total_Change",
                          title="Trend Strength (R-Squared) vs. Trend Magnitude (Slope)",
                          labels={"Slope": "Trend Slope (Magnitude)", "R_Squared": "Trend Strength (R-Squared)"})
    
    r2_slope_path = os.path.join(VISUALIZATION_DIR, "r2_vs_slope.html")
    fig_r2_slope.write_html(r2_slope_path)
    print(f"Saved R-squared vs Slope plot to {r2_slope_path}")

## Create Predictive Dashboard Visualizations

This function generates visualizations for a predictive dashboard.

In [None]:
def create_predictive_dashboard_visuals(data_dict):
    """
    Generates visualizations for a predictive dashboard
    """
    print("Creating predictive dashboard visualizations...")
    
    if data_dict is None or "long_data" not in data_dict or "future_predictions" not in data_dict:
        print("Missing data for predictive dashboard")
        return
    
    df_long = data_dict["long_data"]
    future_df = data_dict["future_predictions"]
    
    # Ensure Year is datetime
    if df_long["Year"].dtype != "datetime64[ns]":
        df_long["Year"] = pd.to_datetime(df_long["Year"].astype(str), format="%Y")
        
    # Combine historical and future data
    df_combined = pd.concat([
        df_long[["Year", "Indicator", "Value"]].rename(columns={"Value": "Actual_Value"}),
        future_df[["Year", "Indicator", "Predicted_Value"]].rename(columns={"Year": "Year_Num"})
    ], ignore_index=True)
    
    # Convert future year num to datetime
    df_combined["Year"] = df_combined["Year"].fillna(pd.to_datetime(df_combined["Year_Num"], format="%Y"))
    df_combined = df_combined.drop(columns=["Year_Num"])
    
    # Create interactive plot with historical data and future predictions
    fig_predictions = go.Figure()
    
    indicators = df_combined["Indicator"].unique()
    
    for indicator in indicators:
        df_indicator = df_combined[df_combined["Indicator"] == indicator]
        
        # Add historical trace
        fig_predictions.add_trace(go.Scatter(
            x=df_indicator["Year"], 
            y=df_indicator["Actual_Value"], 
            mode="lines+markers", 
            name=f"{indicator} (Historical)",
            visible=(indicator == indicators[0]) # Show first indicator by default
        ))
        
        # Add prediction trace
        fig_predictions.add_trace(go.Scatter(
            x=df_indicator["Year"], 
            y=df_indicator["Predicted_Value"], 
            mode="lines", 
            line=dict(dash="dash"),
            name=f"{indicator} (Predicted)",
            visible=(indicator == indicators[0]) # Show first indicator by default
        ))

    # Create dropdown menu for indicator selection
    buttons = []
    for i, indicator in enumerate(indicators):
        visibility = [False] * (len(indicators) * 2)
        visibility[i*2] = True  # Historical trace
        visibility[i*2+1] = True # Prediction trace
        buttons.append(dict(
            label=indicator,
            method="update",
            args=[{"visible": visibility}, {"title": f"Historical Data and Future Predictions: {indicator}"}]
        ))

    fig_predictions.update_layout(
        updatemenus=[dict(
            active=0,
            buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1, xanchor="left",
            y=1.15, yanchor="top"
        )],
        title=f"Historical Data and Future Predictions: {indicators[0]}",
        xaxis_title="Year",
        yaxis_title="Indicator Value",
        hovermode="x unified"
    )
    
    predictions_path = os.path.join(VISUALIZATION_DIR, "future_predictions_interactive.html")
    fig_predictions.write_html(predictions_path)
    print(f"Saved future predictions plot to {predictions_path}")

## Create Vulnerability Dashboard Visualizations

This function generates visualizations for a vulnerability assessment dashboard.

In [None]:
def create_vulnerability_dashboard_visuals(data_dict):
    """
    Generates visualizations for a vulnerability assessment dashboard
    """
    print("Creating vulnerability dashboard visualizations...")
    
    if data_dict is None or "vulnerability_index" not in data_dict:
        print("Missing data for vulnerability dashboard")
        return
    
    vuln_df = data_dict["vulnerability_index"]
    
    # Ensure Year is datetime
    if vuln_df["Year"].dtype != "datetime64[ns]":
        vuln_df["Year"] = pd.to_datetime(vuln_df["Year"].astype(str), format="%Y")
        
    # Create vulnerability index trend plot (already created in executive dashboard, reuse)
    vuln_trend_path = os.path.join(VISUALIZATION_DIR, "vulnerability_trend.html")
    if not os.path.exists(vuln_trend_path):
        fig_vuln = px.line(vuln_df, x="Year_Numeric", y="Climate_Vulnerability_Index", 
                         title="Climate Vulnerability Index Trend for Singapore",
                         labels={"Year_Numeric": "Year", "Climate_Vulnerability_Index": "Vulnerability Index"})
        fig_vuln.update_layout(hovermode="x unified")
        fig_vuln.write_html(vuln_trend_path)
        print(f"Saved vulnerability trend plot to {vuln_trend_path}")
    
    # Create component contribution plot (if components exist)
    component_cols = [col for col in vuln_df.columns if "_Norm" in col]
    if component_cols:
        # Get latest year data
        latest_year_data = vuln_df.iloc[-1]
        component_values = latest_year_data[component_cols]
        component_names = [col.replace("_Norm", "") for col in component_cols]
        
        fig_components = px.bar(x=component_names, y=component_values.values,
                              title=f"Component Contributions to Vulnerability Index ({latest_year_data['Year_Numeric']})",
                              labels={"x": "Component", "y": "Normalized Contribution"})
        
        components_path = os.path.join(VISUALIZATION_DIR, "vulnerability_components.html")
        fig_components.write_html(components_path)
        print(f"Saved vulnerability components plot to {components_path}")
        
        # Create stacked area plot of component contributions over time
        df_components_long = vuln_df.melt(id_vars=["Year", "Year_Numeric"], value_vars=component_cols, 
                                          var_name="Component", value_name="Contribution")
        df_components_long["Component"] = df_components_long["Component"].str.replace("_Norm", "")
        
        fig_stacked_area = px.area(df_components_long, x="Year", y="Contribution", color="Component",
                                 title="Vulnerability Component Contributions Over Time",
                                 labels={"Year": "Year", "Contribution": "Normalized Contribution"})
        fig_stacked_area.update_layout(hovermode="x unified")
        
        stacked_area_path = os.path.join(VISUALIZATION_DIR, "vulnerability_components_over_time.html")
        fig_stacked_area.write_html(stacked_area_path)
        print(f"Saved stacked area plot of components to {stacked_area_path}")

## Main Function

This function orchestrates the creation of all visualizations.

In [None]:
def main():
    """
    Main function to orchestrate the creation of all visualizations
    """
    print(f"Starting visualization creation at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Load data
    data_dict = load_data_for_visualization()
    
    if data_dict is not None:
        # Create visualizations for different dashboards
        create_executive_dashboard_visuals(data_dict)
        create_trend_dashboard_visuals(data_dict)
        create_predictive_dashboard_visuals(data_dict)
        create_vulnerability_dashboard_visuals(data_dict)
        
        print(f"Visualization creation completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print("Visualizations are saved and ready for dashboard integration")
    else:
        print("Visualization creation failed: No data available")

## Execute Visualization Creation

In [None]:
# Run the visualization creation process
main()