<a href="https://colab.research.google.com/github/kiogora-nk/PLP-final-project-/blob/main/finalproject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Africa Climate & Drought Risk Dashboard - Jupyter Notebook Version
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from IPython.display import display, HTML
import datetime
import warnings
warnings.filterwarnings('ignore')

print("üåç Africa Climate & Drought Risk Dashboard - Jupyter Notebook Version")

üåç Africa Climate & Drought Risk Dashboard - Jupyter Notebook Version


In [None]:
# Cell 2: Configuration and Data Loading
import pandas as pd
import numpy as np
import datetime

# Configuration
APP_CONFIG = {
    "default_start_date": datetime.date(2015, 1, 1),
    "default_end_date": datetime.date(2024, 12, 31),
    "default_bounds": {
        "min_lat": -35.0,
        "max_lat": 38.0,
        "min_lon": -20.0,
        "max_lon": 55.0
    },
    "risk_levels": ["Extreme", "High", "Medium", "Low", "Unknown"]
}

def create_sample_data():
    """Create synthetic sample data for testing"""
    print("üìä Generating sample data...")
    times = pd.date_range("2015-01-01", "2024-12-01", freq="MS")

    # Create multiple grid points across Africa
    grid_points = [
        (15, 18),   # Central Africa
        (5, 39),    # East Africa
        (-15, 25),  # Southern Africa
        (25, 15),   # North Africa
        (8, -5),    # West Africa
    ]

    data = []
    for lat, lon in grid_points:
        for time in times:
            # Seasonal patterns with some randomness
            month = time.month
            base_rain = 50 + 30 * np.sin(2 * np.pi * (month - 4) / 12)
            base_temp = 25 + 8 * np.sin(2 * np.pi * (month - 6) / 12)

            rainfall = max(0, base_rain + np.random.normal(0, 10))
            temperature = base_temp + np.random.normal(0, 2)
            solar_radiation = 5 + 2 * np.sin(2 * np.pi * (month - 6) / 12) + np.random.normal(0, 0.5)

            # Drought risk based on rainfall
            if rainfall < 30:
                drought_risk = "Extreme"
            elif rainfall < 60:
                drought_risk = "High"
            elif rainfall < 100:
                drought_risk = "Medium"
            else:
                drought_risk = "Low"

            data.append({
                "time": time,
                "lat": lat + np.random.normal(0, 1),
                "lon": lon + np.random.normal(0, 1),
                "Rainfall_mm": rainfall,
                "Temperature_C": temperature,
                "SolarRadiation_kWh_m2_day": solar_radiation,
                "Drought_Risk": drought_risk
            })

    return pd.DataFrame(data)  # Make sure this return statement is there!

# Load or create data
try:
    df = pd.read_csv("africa_grid_climate.csv", parse_dates=["time"])
    print("‚úÖ Loaded data from africa_grid_climate.csv")
except Exception as e:
    print(f"‚ö†Ô∏è Could not load africa_grid_climate.csv: {e}")
    print("Creating sample data...")
    df = create_sample_data()
    print(f"‚úÖ Created sample data with {len(df):,} rows")

print(f"\nüìà Data Overview:")
print(f"   Period: {df['time'].min().date()} to {df['time'].max().date()}")
print(f"   Grid points: {len(df[['lat', 'lon']].drop_duplicates())}")
print(f"   Variables: {', '.join(df.columns[3:])}")

‚ö†Ô∏è Could not load africa_grid_climate.csv: [Errno 2] No such file or directory: 'africa_grid_climate.csv'
Creating sample data...
üìä Generating sample data...
‚úÖ Created sample data with 600 rows

üìà Data Overview:
   Period: 2015-01-01 to 2024-12-01
   Grid points: 600
   Variables: Rainfall_mm, Temperature_C, SolarRadiation_kWh_m2_day, Drought_Risk


In [None]:
def apply_filters(df, start_date, end_date, risk_levels, lat_range, lon_range):
    # ... the filtering function from above ...
    pass

# Set initial filter values
start_date = APP_CONFIG["default_start_date"]
end_date = APP_CONFIG["default_end_date"]
risk_levels = ["Extreme", "High", "Medium", "Low"]
lat_range = (APP_CONFIG["default_bounds"]["min_lat"], APP_CONFIG["default_bounds"]["max_lat"])
lon_range = (APP_CONFIG["default_bounds"]["min_lon"], APP_CONFIG["default_bounds"]["max_lon"])

# Apply initial filters
df_filtered = apply_filters(df, start_date, end_date, risk_levels, lat_range, lon_range)
print(f"Rows after filtering: {len(df_filtered):,}")

TypeError: object of type 'NoneType' has no len()

In [None]:
# app_v4_1.py ‚Äî Africa Climate & Energy Dashboard (Water / Energy / Both)
import streamlit as st, pandas as pd, plotly.express as px, datetime

st.set_page_config(page_title="Africa Water & Energy Dashboard v4.1", layout="wide")

st.title("üåç Africa Climate ‚Ä¢ Water & Energy Dashboard ‚Äî v4.1")
st.markdown("Switch between **Water**, **Energy**, or **Both** modes to explore rainfall, drought risk, "
            "and solar-energy potential across Africa (2015‚Äì2024).")

# --- Sidebar controls ---
st.sidebar.header("Controls")
data_file = st.sidebar.text_input("Data file", "africa_climate_data.csv")
mode = st.sidebar.radio("Select Mode", ["Water", "Energy", "Both"])
start = st.sidebar.date_input("Start", datetime.date(2015,1,1))
end = st.sidebar.date_input("End", datetime.date(2024,12,31))

@st.cache_data
def load_data(path):
    df = pd.read_csv(path, parse_dates=["time"])
    return df

df = load_data(data_file)
df = df[(df["time"]>=pd.to_datetime(start)) & (df["time"]<=pd.to_datetime(end))]

# ------------- Layout -------------
if mode in ["Water","Both"]:
    st.subheader("üíß Water / Drought Overview")
    fig_rain = px.line(df, x="time", y="Rainfall_mm",
                       title="Average Rainfall (mm) ‚Äî Africa",
                       labels={"time":"Time","Rainfall_mm":"Rainfall (mm)"})
    st.plotly_chart(fig_rain, use_container_width=True)

    risk_counts = df["Drought_Risk"].value_counts().reindex(["Extreme","High","Medium","Low"]).fillna(0)
    fig_risk = px.bar(x=risk_counts.index, y=risk_counts.values, color=risk_counts.index,
                      title="Drought Risk Distribution (2015‚Äì2024)",
                      labels={"x":"Risk Level","y":"Months"})
    st.plotly_chart(fig_risk, use_container_width=True)

if mode in ["Energy","Both"]:
    st.subheader("‚ö° Energy / Solar Potential Overview")
    fig_solar = px.line(df, x="time", y="SolarRadiation_kWh_m2_day",
                        title="Solar Radiation Potential (kWh/m¬≤/day) ‚Äî Africa",
                        labels={"time":"Time","SolarRadiation_kWh_m2_day":"Solar (kWh/m¬≤/day)"})
    st.plotly_chart(fig_solar, use_container_width=True)

    if "Energy_Demand_kWh" in df.columns:
        fig_demand = px.line(df, x="time", y="Energy_Demand_kWh",
                             title="Energy Demand (kWh)",
                             labels={"time":"Time","Energy_Demand_kWh":"Demand (kWh)"})
        st.plotly_chart(fig_demand, use_container_width=True)
    else:
        st.info("No energy-demand column found; showing solar potential only.")

# --- Combined Summary (Both) ---
if mode == "Both":
    st.subheader("üåç Correlation Overview")
    corr = df[["Rainfall_mm","SolarRadiation_kWh_m2_day"]].corr().iloc[0,1]
    st.metric("Rainfall vs Solar correlation", f"{corr:.2f}",
              delta="Negative" if corr<0 else "Positive")

st.markdown("---")
st.caption("Built by Senior Kiogo Newton ‚Ä¢ v4.1 ‚Äî Powered by Senior Empire‚Ñ¢")


ModuleNotFoundError: No module named 'streamlit'

In [None]:
# Debug Cell - Run this first
import pandas as pd
import numpy as np
import datetime

print("Testing basic imports...")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

# Test if we can create a simple DataFrame
test_df = pd.DataFrame({
    'time': [pd.Timestamp('2020-01-01')],
    'lat': [0],
    'lon': [0],
    'Rainfall_mm': [50],
    'Temperature_C': [25],
    'SolarRadiation_kWh_m2_day': [5],
    'Drought_Risk': ['Low']
})

print(f"Test DataFrame created successfully with {len(test_df)} rows")
print(test_df.head())

Testing basic imports...
Pandas version: 2.2.2
NumPy version: 2.0.2
Test DataFrame created successfully with 1 rows
        time  lat  lon  Rainfall_mm  Temperature_C  SolarRadiation_kWh_m2_day  \
0 2020-01-01    0    0           50             25                          5   

  Drought_Risk  
0          Low  


In [None]:
# Cell 2 - Fixed Version
def create_sample_data():
    """Create synthetic sample data for testing - SIMPLIFIED VERSION"""
    print("üìä Generating sample data...")

    # Create simpler data first to test
    times = pd.date_range("2015-01-01", "2016-12-01", freq="MS")  # Just 2 years for testing

    data = []
    for time in times:
        # Simple deterministic data (no randomness for now)
        month = time.month

        # Basic seasonal pattern
        rainfall = 50 + 20 * np.sin(2 * np.pi * (month - 4) / 12)
        temperature = 25 + 5 * np.sin(2 * np.pi * (month - 6) / 12)
        solar_radiation = 5 + 1 * np.sin(2 * np.pi * (month - 6) / 12)

        # Ensure positive values
        rainfall = max(rainfall, 0)

        # Drought risk based on rainfall
        if rainfall < 30:
            drought_risk = "Extreme"
        elif rainfall < 60:
            drought_risk = "High"
        elif rainfall < 100:
            drought_risk = "Medium"
        else:
            drought_risk = "Low"

        data.append({
            "time": time,
            "lat": 15.0,  # Fixed location for testing
            "lon": 18.0,
            "Rainfall_mm": rainfall,
            "Temperature_C": temperature,
            "SolarRadiation_kWh_m2_day": solar_radiation,
            "Drought_Risk": drought_risk
        })

    df = pd.DataFrame(data)
    print(f"‚úÖ Sample data created with {len(df)} rows")
    return df

# Load or create data
try:
    df = pd.read_csv("africa_grid_climate.csv", parse_dates=["time"])
    print("‚úÖ Loaded data from africa_grid_climate.csv")
except Exception as e:
    print(f"‚ö†Ô∏è Could not load africa_grid_climate.csv: {e}")
    print("Creating sample data...")
    df = create_sample_data()

print(f"\nüìà Data Overview:")
print(f"   Data type: {type(df)}")
if df is not None:
    print(f"   Period: {df['time'].min().date()} to {df['time'].max().date()}")
    print(f"   Rows: {len(df):,}")
    print(f"   Columns: {list(df.columns)}")
    print(f"   First few rows:")
    print(df.head())
else:
    print("‚ùå ERROR: df is None!")

‚ö†Ô∏è Could not load africa_grid_climate.csv: [Errno 2] No such file or directory: 'africa_grid_climate.csv'
Creating sample data...
üìä Generating sample data...
‚úÖ Sample data created with 24 rows

üìà Data Overview:
   Data type: <class 'pandas.core.frame.DataFrame'>
   Period: 2015-01-01 to 2016-12-01
   Rows: 24
   Columns: ['time', 'lat', 'lon', 'Rainfall_mm', 'Temperature_C', 'SolarRadiation_kWh_m2_day', 'Drought_Risk']
   First few rows:
        time   lat   lon  Rainfall_mm  Temperature_C  \
0 2015-01-01  15.0  18.0    30.000000      22.500000   
1 2015-02-01  15.0  18.0    32.679492      20.669873   
2 2015-03-01  15.0  18.0    40.000000      20.000000   
3 2015-04-01  15.0  18.0    50.000000      20.669873   
4 2015-05-01  15.0  18.0    60.000000      22.500000   

   SolarRadiation_kWh_m2_day Drought_Risk  
0                   4.500000         High  
1                   4.133975         High  
2                   4.000000         High  
3                   4.133975     

In [None]:
# Cell 3: Filtering Functions and Initial Filtering

def apply_filters(df, start_date, end_date, risk_levels, lat_range, lon_range):
    """Apply all filters to the dataframe"""
    print(f"üîç Applying filters...")
    print(f"   Date range: {start_date} to {end_date}")
    print(f"   Risk levels: {risk_levels}")
    print(f"   Latitude: {lat_range[0]} to {lat_range[1]}")
    print(f"   Longitude: {lon_range[0]} to {lon_range[1]}")

    df_filtered = df.copy()

    # Date filter
    date_mask = (df_filtered['time'] >= pd.to_datetime(start_date)) & (df_filtered['time'] <= pd.to_datetime(end_date))
    df_filtered = df_filtered[date_mask]
    print(f"   After date filter: {len(df_filtered):,} rows")

    # Risk level filter
    if risk_levels:  # Only apply if risk_levels is not empty
        risk_mask = df_filtered['Drought_Risk'].isin(risk_levels)
        df_filtered = df_filtered[risk_mask]
        print(f"   After risk filter: {len(df_filtered):,} rows")

    # Geographic bounds
    lat_mask = (df_filtered['lat'] >= lat_range[0]) & (df_filtered['lat'] <= lat_range[1])
    lon_mask = (df_filtered['lon'] >= lon_range[0]) & (df_filtered['lon'] <= lon_range[1])
    df_filtered = df_filtered[lat_mask & lon_mask]
    print(f"   After geographic filter: {len(df_filtered):,} rows")

    return df_filtered

# Set initial filter values
start_date = APP_CONFIG["default_start_date"]
end_date = APP_CONFIG["default_end_date"]
risk_levels = ["Extreme", "High", "Medium", "Low"]
lat_range = (APP_CONFIG["default_bounds"]["min_lat"], APP_CONFIG["default_bounds"]["max_lat"])
lon_range = (APP_CONFIG["default_bounds"]["min_lon"], APP_CONFIG["default_bounds"]["max_lon"])

print("üéõÔ∏è  Setting initial filters:")
print(f"   Start date: {start_date}")
print(f"   End date: {end_date}")
print(f"   Risk levels: {risk_levels}")
print(f"   Latitude range: {lat_range}")
print(f"   Longitude range: {lon_range}")

# Apply initial filters
df_filtered = apply_filters(df, start_date, end_date, risk_levels, lat_range, lon_range)

print(f"\n‚úÖ Filtering complete!")
print(f"   Original data: {len(df):,} rows")
print(f"   Filtered data: {len(df_filtered):,} rows")
print(f"   Data reduction: {((len(df) - len(df_filtered)) / len(df) * 100):.1f}%")

# Show sample of filtered data
if len(df_filtered) > 0:
    print(f"\nüìã Sample of filtered data:")
    print(df_filtered.head())
else:
    print(f"\n‚ö†Ô∏è  No data remaining after filtering!")

    # Show why no data remains
    print("\nDebugging filter issues:")

    # Check date range
    date_min = df['time'].min().date()
    date_max = df['time'].max().date()
    print(f"   Data date range: {date_min} to {date_max}")

    # Check risk levels available
    available_risks = df['Drought_Risk'].unique()
    print(f"   Available risk levels: {list(available_risks)}")

    # Check geographic bounds
    lat_min = df['lat'].min()
    lat_max = df['lat'].max()
    lon_min = df['lon'].min()
    lon_max = df['lon'].max()
    print(f"   Data latitude range: {lat_min:.2f} to {lat_max:.2f}")
    print(f"   Data longitude range: {lon_min:.2f} to {lon_max:.2f}")

üéõÔ∏è  Setting initial filters:
   Start date: 2015-01-01
   End date: 2024-12-31
   Risk levels: ['Extreme', 'High', 'Medium', 'Low']
   Latitude range: (-35.0, 38.0)
   Longitude range: (-20.0, 55.0)
üîç Applying filters...
   Date range: 2015-01-01 to 2024-12-31
   Risk levels: ['Extreme', 'High', 'Medium', 'Low']
   Latitude: -35.0 to 38.0
   Longitude: -20.0 to 55.0
   After date filter: 24 rows
   After risk filter: 24 rows
   After geographic filter: 24 rows

‚úÖ Filtering complete!
   Original data: 24 rows
   Filtered data: 24 rows
   Data reduction: 0.0%

üìã Sample of filtered data:
        time   lat   lon  Rainfall_mm  Temperature_C  \
0 2015-01-01  15.0  18.0    30.000000      22.500000   
1 2015-02-01  15.0  18.0    32.679492      20.669873   
2 2015-03-01  15.0  18.0    40.000000      20.000000   
3 2015-04-01  15.0  18.0    50.000000      20.669873   
4 2015-05-01  15.0  18.0    60.000000      22.500000   

   SolarRadiation_kWh_m2_day Drought_Risk  
0             

In [None]:
# Cell 4: Visualization Functions

print("üìä Setting up visualization functions...")

def create_drought_risk_map(df_map, month_sel):
    """Create drought risk map for selected month"""
    if df_map.empty:
        print("‚ö†Ô∏è  No data available for the selected month and filters")
        return None

    print(f"üó∫Ô∏è  Creating map for {month_sel} with {len(df_map)} locations...")

    # Color mapping for drought risk
    color_discrete_map = {
        "Extreme": "red",
        "High": "orange",
        "Medium": "yellow",
        "Low": "green",
        "Unknown": "gray"
    }

    fig = px.scatter_geo(
        df_map,
        lat="lat",
        lon="lon",
        hover_name="Drought_Risk",
        hover_data={
            "Rainfall_mm": ":.1f",
            "Temperature_C": ":.1f",
            "SolarRadiation_kWh_m2_day": ":.2f",
            "lat": ":.2f",
            "lon": ":.2f",
            "Drought_Risk": True
        },
        size="Rainfall_mm",
        color="Drought_Risk",
        color_discrete_map=color_discrete_map,
        projection="natural earth",
        title=f"üåç Drought Risk Map ‚Äî {month_sel}",
        size_max=15
    )

    fig.update_layout(
        height=500,
        margin={"r": 0, "t": 60, "l": 0, "b": 0},
        geo=dict(
            showland=True,
            landcolor="lightgreen",
            showocean=True,
            oceancolor="lightblue",
            showcountries=True,
            countrycolor="white",
            showcoastlines=True,
            coastlinecolor="black"
        )
    )

    # Update marker appearance
    fig.update_traces(
        marker=dict(opacity=0.7, line=dict(width=1, color='darkgray')),
        selector=dict(mode='markers')
    )

    print("‚úÖ Map created successfully")
    return fig

def create_time_series(df, metric):
    """Create time series plot for selected metric"""
    if df.empty:
        print("‚ö†Ô∏è  No data available for time series")
        return None

    print(f"üìà Creating time series for {metric}...")

    # Aggregate over time
    ts = df.groupby("time")[metric].agg(['mean', 'std', 'min', 'max']).reset_index()
    ts = ts.rename(columns={'mean': metric})

    fig = px.line(
        ts,
        x="time",
        y=metric,
        title=f"üìä Mean {metric} over Time",
        markers=True
    )

    # Add confidence interval if we have multiple points per time
    if len(df) > len(ts) and 'std' in ts.columns:
        fig.add_trace(go.Scatter(
            x=ts['time'].tolist() + ts['time'].tolist()[::-1],
            y=(ts[metric] + ts['std']).tolist() + (ts[metric] - ts['std']).tolist()[::-1],
            fill='toself',
            fillcolor='rgba(0,100,80,0.2)',
            line=dict(color='rgba(255,255,255,0)'),
            hoverinfo="skip",
            showlegend=False,
            name='Std Dev'
        ))

    fig.update_layout(
        xaxis_title="Date",
        yaxis_title=metric,
        hovermode="x unified",
        showlegend=True
    )

    print(f"‚úÖ Time series created with {len(ts)} time points")
    return fig

def create_risk_breakdown(df):
    """Create drought risk breakdown bar chart"""
    if df.empty:
        print("‚ö†Ô∏è  No data available for risk breakdown")
        return None

    print("üìä Creating risk breakdown chart...")

    risk_counts = df["Drought_Risk"].value_counts()

    # Ensure all risk levels are present
    for risk in ["Extreme", "High", "Medium", "Low", "Unknown"]:
        if risk not in risk_counts:
            risk_counts[risk] = 0

    risk_counts = risk_counts.reindex(["Extreme", "High", "Medium", "Low", "Unknown"]).fillna(0)

    fig = px.bar(
        x=risk_counts.index,
        y=risk_counts.values,
        labels={"x": "Risk Level", "y": "Number of Records"},
        title="‚ö†Ô∏è Drought Risk Distribution",
        color=risk_counts.index,
        color_discrete_map={
            "Extreme": "red",
            "High": "orange",
            "Medium": "yellow",
            "Low": "green",
            "Unknown": "gray"
        }
    )

    # Add count labels on bars
    fig.update_traces(
        text=risk_counts.values,
        textposition='auto',
        hovertemplate="<b>%{x}</b><br>Count: %{y}<extra></extra>"
    )

    fig.update_layout(
        showlegend=False,
        xaxis_title="Drought Risk Level",
        yaxis_title="Number of Records"
    )

    print("‚úÖ Risk breakdown chart created")
    return fig

def show_correlations(df):
    """Show correlation matrix between variables"""
    if df.empty or len(df) < 2:
        print("‚ö†Ô∏è  Not enough data for correlation analysis")
        return None

    print("üîó Calculating correlations...")

    numeric_cols = ["Rainfall_mm", "Temperature_C", "SolarRadiation_kWh_m2_day"]
    available_cols = [col for col in numeric_cols if col in df.columns]

    if len(available_cols) < 2:
        print("‚ö†Ô∏è  Not enough numeric columns for correlation")
        return None

    corr_matrix = df[available_cols].corr().round(2)

    fig = px.imshow(
        corr_matrix,
        title="üîó Correlation Matrix Between Climate Variables",
        color_continuous_scale="RdBu_r",
        aspect="auto",
        text_auto=True,
        labels=dict(color="Correlation")
    )

    fig.update_layout(
        xaxis_title="Variable",
        yaxis_title="Variable"
    )

    print("‚úÖ Correlation matrix created")
    return fig

def seasonal_analysis(df):
    """Show seasonal patterns"""
    if df.empty:
        print("‚ö†Ô∏è  No data for seasonal analysis")
        return None

    print("üå§Ô∏è  Analyzing seasonal patterns...")

    df_seasonal = df.copy()
    df_seasonal['month'] = df_seasonal['time'].dt.month
    df_seasonal['year'] = df_seasonal['time'].dt.year

    monthly_avg = df_seasonal.groupby('month')[['Rainfall_mm', 'Temperature_C']].mean().reset_index()

    # Create subplots
    fig = make_subplots(
        rows=2, cols=1,
        subplot_titles=('üåßÔ∏è Average Monthly Rainfall', 'üå°Ô∏è Average Monthly Temperature'),
        vertical_spacing=0.15
    )

    # Rainfall subplot
    fig.add_trace(
        go.Scatter(
            x=monthly_avg['month'],
            y=monthly_avg['Rainfall_mm'],
            name='Rainfall',
            line=dict(color='blue', width=3),
            marker=dict(size=8, color='blue')
        ),
        row=1, col=1
    )

    # Temperature subplot
    fig.add_trace(
        go.Scatter(
            x=monthly_avg['month'],
            y=monthly_avg['Temperature_C'],
            name='Temperature',
            line=dict(color='red', width=3),
            marker=dict(size=8, color='red')
        ),
        row=2, col=1
    )

    # Update axes
    fig.update_xaxes(
        title_text="Month",
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        row=2, col=1
    )

    fig.update_yaxes(title_text="Rainfall (mm)", row=1, col=1)
    fig.update_yaxes(title_text="Temperature (¬∞C)", row=2, col=1)

    fig.update_layout(
        height=600,
        showlegend=False,
        title_text="üìÖ Seasonal Climate Patterns"
    )

    print("‚úÖ Seasonal analysis completed")
    return fig

def show_alerts(df):
    """Show risk alerts based on current data"""
    if df.empty:
        return ["‚ùå No data available for alerts"]

    # Get the most recent data
    current_data = df[df['time'] == df['time'].max()]

    if current_data.empty:
        return ["‚ö†Ô∏è  No current data available for alerts"]

    extreme_risk_count = len(current_data[current_data['Drought_Risk'] == 'Extreme'])
    high_risk_count = len(current_data[current_data['Drought_Risk'] == 'High'])
    total_locations = len(current_data)

    alerts = []

    if extreme_risk_count > 0:
        alert_level = "üö® CRITICAL" if extreme_risk_count > 5 else "‚ö†Ô∏è  WARNING"
        alerts.append(f"{alert_level}: {extreme_risk_count} locations with EXTREME drought risk!")

    if high_risk_count > 0:
        alerts.append(f"‚ö†Ô∏è  WARNING: {high_risk_count} locations with HIGH drought risk")

    if not alerts:
        alerts.append("‚úÖ No high or extreme drought risk areas detected")

    # Add summary
    alerts.append(f"üìä Summary: {total_locations} locations analyzed")

    return alerts

def create_metrics_summary(df):
    """Create a summary of key metrics"""
    if df.empty:
        print("‚ö†Ô∏è  No data for metrics summary")
        return None

    print("üìã Creating metrics summary...")

    # Calculate basic statistics
    latest_data = df[df['time'] == df['time'].max()]

    if latest_data.empty:
        return "No current data available"

    metrics = {
        "Total Locations": len(latest_data),
        "Avg Rainfall (mm)": latest_data['Rainfall_mm'].mean(),
        "Avg Temperature (¬∞C)": latest_data['Temperature_C'].mean(),
        "Avg Solar Radiation (kWh/m¬≤/day)": latest_data['SolarRadiation_kWh_m2_day'].mean(),
        "Extreme Risk Locations": len(latest_data[latest_data['Drought_Risk'] == 'Extreme']),
        "High Risk Locations": len(latest_data[latest_data['Drought_Risk'] == 'High'])
    }

    # Create a nice display
    summary_html = """
    <div style="background-color:#f0f8ff; padding:15px; border-radius:10px; border:2px solid #e0e0e0;">
        <h3 style="color:#2c3e50; margin-top:0;">üìä Current Situation Summary</h3>
    """

    for key, value in metrics.items():
        if 'Avg' in key:
            value = f"{value:.1f}"
        summary_html += f'<p style="margin:8px 0;"><b>{key}:</b> {value}</p>'

    summary_html += "</div>"

    return summary_html

print("‚úÖ All visualization functions defined successfully!")
print("\nAvailable functions:")
print("  - create_drought_risk_map()")
print("  - create_time_series()")
print("  - create_risk_breakdown()")
print("  - show_correlations()")
print("  - seasonal_analysis()")
print("  - show_alerts()")
print("  - create_metrics_summary()")

üìä Setting up visualization functions...
‚úÖ All visualization functions defined successfully!

Available functions:
  - create_drought_risk_map()
  - create_time_series()
  - create_risk_breakdown()
  - show_correlations()
  - seasonal_analysis()
  - show_alerts()
  - create_metrics_summary()


In [None]:
!pip install streamlit pyngrok plotly pandas


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.1/10.1 MB[0m [31m75.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m113.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.4.0 streamlit-1.50.0


In [None]:
# Cell 5: Test Basic Visualizations

print("üé® Testing Visualizations with Current Data")
print("=" * 50)

if df_filtered.empty:
    print("‚ùå No data available for visualization testing")
    print("   Please check your filters in Cell 3")
else:
    print(f"‚úÖ Testing with {len(df_filtered):,} filtered records")
    print(f"   Date range: {df_filtered['time'].min().date()} to {df_filtered['time'].max().date()}")
    print(f"   Unique locations: {len(df_filtered[['lat', 'lon']].drop_duplicates())}")

    # 1. Show Alerts and Summary
    print("\n1. üîî RISK ALERTS")
    print("-" * 30)
    alerts = show_alerts(df_filtered)
    for alert in alerts:
        print(f"   {alert}")

    # Display metrics summary
    summary = create_metrics_summary(df_filtered)
    if summary:
        from IPython.display import display, HTML
        display(HTML(summary))

    # 2. Test Map Visualization (Latest Month)
    print("\n2. üó∫Ô∏è DROUGHT RISK MAP (Latest Month)")
    print("-" * 40)
    latest_month = df_filtered['time'].max().strftime('%Y-%m')
    df_latest = df_filtered[df_filtered['time'] == df_filtered['time'].max()]

    if not df_latest.empty:
        fig_map = create_drought_risk_map(df_latest, latest_month)
        if fig_map:
            fig_map.show()
        else:
            print("   ‚ùå Failed to create map")
    else:
        print("   ‚ö†Ô∏è  No data for latest month")

    # 3. Test Time Series
    print("\n3. üìà TIME SERIES ANALYSIS")
    print("-" * 30)

    # Test with Rainfall first
    fig_ts_rain = create_time_series(df_filtered, "Rainfall_mm")
    if fig_ts_rain:
        fig_ts_rain.show()
    else:
        print("   ‚ùå Failed to create rainfall time series")

    # Test with Temperature
    fig_ts_temp = create_time_series(df_filtered, "Temperature_C")
    if fig_ts_temp:
        fig_ts_temp.show()
    else:
        print("   ‚ùå Failed to create temperature time series")

    # 4. Test Risk Breakdown
    print("\n4. üìä RISK DISTRIBUTION")
    print("-" * 25)
    fig_risk = create_risk_breakdown(df_filtered)
    if fig_risk:
        fig_risk.show()
    else:
        print("   ‚ùå Failed to create risk breakdown")

    # 5. Test Seasonal Analysis (if we have multiple months)
    print("\n5. üå§Ô∏è SEASONAL PATTERNS")
    print("-" * 25)
    unique_months = df_filtered['time'].dt.to_period('M').nunique()
    if unique_months >= 12:  # Only show if we have at least a year of data
        fig_seasonal = seasonal_analysis(df_filtered)
        if fig_seasonal:
            fig_seasonal.show()
        else:
            print("   ‚ùå Failed to create seasonal analysis")
    else:
        print(f"   ‚è≥ Not enough data for seasonal analysis (have {unique_months} months, need 12+)")

    # 6. Test Correlations (if we have enough data)
    print("\n6. üîó VARIABLE CORRELATIONS")
    print("-" * 30)
    if len(df_filtered) >= 10:  # Only show if we have enough data points
        fig_corr = show_correlations(df_filtered)
        if fig_corr:
            fig_corr.show()
        else:
            print("   ‚ùå Failed to create correlation matrix")
    else:
        print(f"   üìä Not enough data for correlation analysis (have {len(df_filtered)} records, need 10+)")

    # 7. Data Quality Check
    print("\n7. ‚úÖ DATA QUALITY CHECK")
    print("-" * 25)
    print(f"   Total records: {len(df_filtered):,}")
    print(f"   Date range: {df_filtered['time'].min().date()} to {df_filtered['time'].max().date()}")
    print(f"   Unique months: {df_filtered['time'].dt.to_period('M').nunique()}")
    print(f"   Unique locations: {len(df_filtered[['lat', 'lon']].drop_duplicates())}")

    # Check for missing values
    missing_data = df_filtered.isnull().sum()
    if missing_data.sum() > 0:
        print(f"   ‚ö†Ô∏è  Missing values detected:")
        for col, count in missing_data.items():
            if count > 0:
                print(f"      {col}: {count} missing")
    else:
        print("   ‚úÖ No missing values")

    # Check data types
    print(f"   Data types:")
    for col in df_filtered.columns:
        print(f"      {col}: {df_filtered[col].dtype}")

print("\n" + "=" * 50)
print("üéâ VISUALIZATION TESTING COMPLETE!")
print("=" * 50)
print("\nNext steps:")
print("1. Check if all visualizations rendered correctly")
print("2. If any failed, check the error messages above")
print("3. Proceed to Cell 6 for interactive analysis scenarios")

üé® Testing Visualizations with Current Data
‚úÖ Testing with 24 filtered records
   Date range: 2015-01-01 to 2016-12-01
   Unique locations: 1

1. üîî RISK ALERTS
------------------------------
   üìä Summary: 1 locations analyzed
üìã Creating metrics summary...



2. üó∫Ô∏è DROUGHT RISK MAP (Latest Month)
----------------------------------------
üó∫Ô∏è  Creating map for 2016-12 with 1 locations...
‚úÖ Map created successfully



3. üìà TIME SERIES ANALYSIS
------------------------------
üìà Creating time series for Rainfall_mm...
‚úÖ Time series created with 24 time points


üìà Creating time series for Temperature_C...
‚úÖ Time series created with 24 time points



4. üìä RISK DISTRIBUTION
-------------------------
üìä Creating risk breakdown chart...
‚úÖ Risk breakdown chart created



5. üå§Ô∏è SEASONAL PATTERNS
-------------------------
üå§Ô∏è  Analyzing seasonal patterns...
‚úÖ Seasonal analysis completed



6. üîó VARIABLE CORRELATIONS
------------------------------
üîó Calculating correlations...
‚úÖ Correlation matrix created



7. ‚úÖ DATA QUALITY CHECK
-------------------------
   Total records: 24
   Date range: 2015-01-01 to 2016-12-01
   Unique months: 24
   Unique locations: 1
   ‚úÖ No missing values
   Data types:
      time: datetime64[ns]
      lat: float64
      lon: float64
      Rainfall_mm: float64
      Temperature_C: float64
      SolarRadiation_kWh_m2_day: float64
      Drought_Risk: object

üéâ VISUALIZATION TESTING COMPLETE!

Next steps:
1. Check if all visualizations rendered correctly
2. If any failed, check the error messages above
3. Proceed to Cell 6 for interactive analysis scenarios


In [None]:
# app_v4_1.py ‚Äî Africa Climate & Energy Dashboard (Water / Energy / Both)
import streamlit as st, pandas as pd, plotly.express as px, datetime

st.set_page_config(page_title="Africa Water & Energy Dashboard v4.1", layout="wide")

st.title("üåç Africa Climate ‚Ä¢ Water & Energy Dashboard ‚Äî v4.1")
st.markdown("Switch between **Water**, **Energy**, or **Both** modes to explore rainfall, drought risk, "
            "and solar-energy potential across Africa (2015‚Äì2024).")

# --- Sidebar controls ---
st.sidebar.header("Controls")
data_file = st.sidebar.text_input("Data file", "africa_climate_data.csv")
mode = st.sidebar.radio("Select Mode", ["Water", "Energy", "Both"])
start = st.sidebar.date_input("Start", datetime.date(2015,1,1))
end = st.sidebar.date_input("End", datetime.date(2024,12,31))

@st.cache_data
def load_data(path):
    df = pd.read_csv(path, parse_dates=["time"])
    return df

df = load_data(data_file)
df = df[(df["time"]>=pd.to_datetime(start)) & (df["time"]<=pd.to_datetime(end))]

# ------------- Layout -------------
if mode in ["Water","Both"]:
    st.subheader("üíß Water / Drought Overview")
    fig_rain = px.line(df, x="time", y="Rainfall_mm",
                       title="Average Rainfall (mm) ‚Äî Africa",
                       labels={"time":"Time","Rainfall_mm":"Rainfall (mm)"})
    st.plotly_chart(fig_rain, use_container_width=True)

    risk_counts = df["Drought_Risk"].value_counts().reindex(["Extreme","High","Medium","Low"]).fillna(0)
    fig_risk = px.bar(x=risk_counts.index, y=risk_counts.values, color=risk_counts.index,
                      title="Drought Risk Distribution (2015‚Äì2024)",
                      labels={"x":"Risk Level","y":"Months"})
    st.plotly_chart(fig_risk, use_container_width=True)

if mode in ["Energy","Both"]:
    st.subheader("‚ö° Energy / Solar Potential Overview")
    fig_solar = px.line(df, x="time", y="SolarRadiation_kWh_m2_day",
                        title="Solar Radiation Potential (kWh/m¬≤/day) ‚Äî Africa",
                        labels={"time":"Time","SolarRadiation_kWh_m2_day":"Solar (kWh/m¬≤/day)"})
    st.plotly_chart(fig_solar, use_container_width=True)

    if "Energy_Demand_kWh" in df.columns:
        fig_demand = px.line(df, x="time", y="Energy_Demand_kWh",
                             title="Energy Demand (kWh)",
                             labels={"time":"Time","Energy_Demand_kWh":"Demand (kWh)"})
        st.plotly_chart(fig_demand, use_container_width=True)
    else:
        st.info("No energy-demand column found; showing solar potential only.")

# --- Combined Summary (Both) ---
if mode == "Both":
    st.subheader("üåç Correlation Overview")
    corr = df[["Rainfall_mm","SolarRadiation_kWh_m2_day"]].corr().iloc[0,1]
    st.metric("Rainfall vs Solar correlation", f"{corr:.2f}",
              delta="Negative" if corr<0 else "Positive")

st.markdown("---")
st.caption("Built by Senior Kiogo Newton ‚Ä¢ v4.1 ‚Äî Powered by Senior Empire‚Ñ¢")


2025-10-16 21:30:08.864 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-10-16 21:30:08.872 Session state does not function when running a script without `streamlit run`
2025-10-16 21:30:08.889 No runtime found, using MemoryCacheStorageManager
2025-10-16 21:30:08.891 No runtime found, using MemoryCacheStorageManager


FileNotFoundError: [Errno 2] No such file or directory: 'africa_climate_data.csv'

In [None]:
# Cell 6: Interactive Analysis Scenarios

print("üîç INTERACTIVE ANALYSIS SCENARIOS")
print("=" * 60)

def run_scenario(scenario_name, filters, description):
    """Run a specific analysis scenario with given filters"""
    print(f"\n{'='*50}")
    print(f"üìã SCENARIO: {scenario_name}")
    print(f"üìù {description}")
    print(f"{'='*50}")

    # Apply filters for this scenario
    df_scenario = apply_filters(
        df,
        filters['start_date'],
        filters['end_date'],
        filters['risk_levels'],
        filters['lat_range'],
        filters['lon_range']
    )

    if df_scenario.empty:
        print("‚ùå No data for this scenario")
        return None

    print(f"üìä Scenario Data: {len(df_scenario):,} records")

    # Show alerts for this scenario
    alerts = show_alerts(df_scenario)
    print("üîî Alerts:", alerts[0])  # Show main alert

    return df_scenario

# Define different scenarios to test
scenarios = {
    "recent_analysis": {
        "name": "Recent Data Analysis (Last 2 Years)",
        "description": "Focus on most recent data to see current conditions",
        "filters": {
            "start_date": datetime.date(2023, 1, 1),
            "end_date": APP_CONFIG["default_end_date"],
            "risk_levels": ["Extreme", "High", "Medium", "Low"],
            "lat_range": (APP_CONFIG["default_bounds"]["min_lat"], APP_CONFIG["default_bounds"]["max_lat"]),
            "lon_range": (APP_CONFIG["default_bounds"]["min_lon"], APP_CONFIG["default_bounds"]["max_lon"])
        }
    },
    "high_risk_only": {
        "name": "High Risk Areas Only",
        "description": "Focus only on areas with Extreme or High drought risk",
        "filters": {
            "start_date": APP_CONFIG["default_start_date"],
            "end_date": APP_CONFIG["default_end_date"],
            "risk_levels": ["Extreme", "High"],
            "lat_range": (APP_CONFIG["default_bounds"]["min_lat"], APP_CONFIG["default_bounds"]["max_lat"]),
            "lon_range": (APP_CONFIG["default_bounds"]["min_lon"], APP_CONFIG["default_bounds"]["max_lon"])
        }
    },
    "east_africa": {
        "name": "East Africa Regional Analysis",
        "description": "Focus on East Africa region (Ethiopia, Kenya, Tanzania, etc.)",
        "filters": {
            "start_date": APP_CONFIG["default_start_date"],
            "end_date": APP_CONFIG["default_end_date"],
            "risk_levels": ["Extreme", "High", "Medium", "Low"],
            "lat_range": (-12, 12),   # East Africa latitude range
            "lon_range": (28, 52)     # East Africa longitude range
        }
    },
    "southern_africa": {
        "name": "Southern Africa Regional Analysis",
        "description": "Focus on Southern Africa region (South Africa, Namibia, Botswana, etc.)",
        "filters": {
            "start_date": APP_CONFIG["default_start_date"],
            "end_date": APP_CONFIG["default_end_date"],
            "risk_levels": ["Extreme", "High", "Medium", "Low"],
            "lat_range": (-35, -5),   # Southern Africa latitude range
            "lon_range": (10, 40)     # Southern Africa longitude range
        }
    },
    "dry_season": {
        "name": "Dry Season Analysis (Jan-Mar)",
        "description": "Analyze conditions during typical dry season months",
        "filters": {
            "start_date": APP_CONFIG["default_start_date"],
            "end_date": APP_CONFIG["default_end_date"],
            "risk_levels": ["Extreme", "High", "Medium", "Low"],
            "lat_range": (APP_CONFIG["default_bounds"]["min_lat"], APP_CONFIG["default_bounds"]["max_lat"]),
            "lon_range": (APP_CONFIG["default_bounds"]["min_lon"], APP_CONFIG["default_bounds"]["max_lon"])
        }
    }
}

# Store results from each scenario
scenario_results = {}

print("üöÄ RUNNING ANALYSIS SCENARIOS")
print("This will test different filter combinations to uncover insights")
print(f"Total scenarios to run: {len(scenarios)}")

# Run each scenario
for scenario_key, scenario_config in scenarios.items():
    df_scenario = run_scenario(
        scenario_config["name"],
        scenario_config["filters"],
        scenario_config["description"]
    )

    if df_scenario is not None:
        scenario_results[scenario_key] = df_scenario

        # Show quick visualization for each scenario
        if not df_scenario.empty:
            # Show risk breakdown
            risk_counts = df_scenario['Drought_Risk'].value_counts()
            print(f"   üìä Risk Distribution:")
            for risk, count in risk_counts.items():
                percentage = (count / len(df_scenario)) * 100
                print(f"      {risk}: {count:,} ({percentage:.1f}%)")

            # Show latest conditions
            latest_data = df_scenario[df_scenario['time'] == df_scenario['time'].max()]
            if not latest_data.empty:
                avg_rainfall = latest_data['Rainfall_mm'].mean()
                avg_temp = latest_data['Temperature_C'].mean()
                print(f"   üå°Ô∏è  Latest Conditions (avg):")
                print(f"      Rainfall: {avg_rainfall:.1f} mm")
                print(f"      Temperature: {avg_temp:.1f} ¬∞C")

            print("   ‚úÖ Scenario completed successfully")
    print()

# Comparative Analysis
print("\n" + "="*60)
print("üìä COMPARATIVE ANALYSIS ACROSS SCENARIOS")
print("="*60)

if scenario_results:
    comparison_data = []

    for scenario_key, df_scenario in scenario_results.items():
        if not df_scenario.empty:
            scenario_name = scenarios[scenario_key]["name"]
            total_records = len(df_scenario)

            # Risk analysis
            extreme_count = len(df_scenario[df_scenario['Drought_Risk'] == 'Extreme'])
            high_count = len(df_scenario[df_scenario['Drought_Risk'] == 'High'])
            risk_percentage = ((extreme_count + high_count) / total_records * 100) if total_records > 0 else 0

            # Latest conditions
            latest_data = df_scenario[df_scenario['time'] == df_scenario['time'].max()]
            if not latest_data.empty:
                avg_rainfall = latest_data['Rainfall_mm'].mean()
                avg_temp = latest_data['Temperature_C'].mean()
            else:
                avg_rainfall = avg_temp = 0

            comparison_data.append({
                'Scenario': scenario_name,
                'Records': total_records,
                'High+Extreme Risk %': risk_percentage,
                'Avg Rainfall (mm)': avg_rainfall,
                'Avg Temp (¬∞C)': avg_temp
            })

    # Create comparison DataFrame
    if comparison_data:
        comparison_df = pd.DataFrame(comparison_data)
        print("\nüìà Scenario Comparison Table:")
        print(comparison_df.to_string(index=False))

        # Create visual comparison
        print("\nüé® Creating comparison visualizations...")

        # Risk comparison chart
        fig_risk_comp = px.bar(
            comparison_df,
            x='Scenario',
            y='High+Extreme Risk %',
            title='üìä Percentage of High+Extreme Risk Records by Scenario',
            color='High+Extreme Risk %',
            color_continuous_scale='reds'
        )
        fig_risk_comp.show()

        # Conditions comparison
        fig_conditions = make_subplots(
            rows=2, cols=1,
            subplot_titles=('üåßÔ∏è Average Rainfall by Scenario', 'üå°Ô∏è Average Temperature by Scenario'),
            vertical_spacing=0.15
        )

        fig_conditions.add_trace(
            go.Bar(x=comparison_df['Scenario'], y=comparison_df['Avg Rainfall (mm)'],
                  name='Rainfall', marker_color='blue'),
            row=1, col=1
        )

        fig_conditions.add_trace(
            go.Bar(x=comparison_df['Scenario'], y=comparison_df['Avg Temp (¬∞C)'],
                  name='Temperature', marker_color='red'),
            row=2, col=1
        )

        fig_conditions.update_layout(height=600, showlegend=False)
        fig_conditions.show()

else:
    print("‚ùå No scenario results to compare")

# Interactive Scenario Explorer
print("\n" + "="*60)
print("üîß INTERACTIVE SCENARIO EXPLORER")
print("="*60)

def explore_custom_scenario():
    """Allow custom scenario exploration"""
    print("\nüéõÔ∏è  Create Your Own Scenario:")
    print("   (Modify the values below and re-run this cell)")

    # You can modify these values to create custom scenarios
    custom_filters = {
        "start_date": datetime.date(2020, 1, 1),  # Change this
        "end_date": datetime.date(2024, 12, 31),  # Change this
        "risk_levels": ["Extreme", "High"],       # Change this
        "lat_range": (-10, 10),                   # Change this
        "lon_range": (30, 45)                     # Change this
    }

    print(f"   Current custom filters:")
    print(f"   - Date: {custom_filters['start_date']} to {custom_filters['end_date']}")
    print(f"   - Risk levels: {custom_filters['risk_levels']}")
    print(f"   - Latitude: {custom_filters['lat_range'][0]} to {custom_filters['lat_range'][1]}")
    print(f"   - Longitude: {custom_filters['lon_range'][0]} to {custom_filters['lon_range'][1]}")

    df_custom = apply_filters(df, **custom_filters)

    if not df_custom.empty:
        print(f"\n   ‚úÖ Custom scenario data: {len(df_custom):,} records")

        # Show quick analysis
        latest_custom = df_custom[df_custom['time'] == df_custom['time'].max()]
        if not latest_custom.empty:
            print(f"   üìä Latest conditions:")
            print(f"      - Locations: {len(latest_custom)}")
            print(f"      - Avg Rainfall: {latest_custom['Rainfall_mm'].mean():.1f} mm")
            print(f"      - Avg Temperature: {latest_custom['Temperature_C'].mean():.1f} ¬∞C")

            # Show risk distribution
            risk_dist = latest_custom['Drought_Risk'].value_counts()
            print(f"      - Risk distribution:")
            for risk, count in risk_dist.items():
                print(f"          {risk}: {count}")
    else:
        print("   ‚ùå No data for custom filters")

    return df_custom

# Run custom scenario explorer
custom_df = explore_custom_scenario()

print("\n" + "="*60)
print("üéØ KEY INSIGHTS FROM SCENARIO ANALYSIS")
print("="*60)

if scenario_results:
    print("\nüí° Summary of Findings:")

    # Find scenario with highest risk
    max_risk_scenario = None
    max_risk_value = 0

    for scenario_key, df_scenario in scenario_results.items():
        if not df_scenario.empty:
            extreme_count = len(df_scenario[df_scenario['Drought_Risk'] == 'Extreme'])
            high_count = len(df_scenario[df_scenario['Drought_Risk'] == 'High'])
            total_records = len(df_scenario)
            risk_percentage = ((extreme_count + high_count) / total_records * 100) if total_records > 0 else 0

            if risk_percentage > max_risk_value:
                max_risk_value = risk_percentage
                max_risk_scenario = scenarios[scenario_key]["name"]

    if max_risk_scenario:
        print(f"   üî• Highest risk scenario: '{max_risk_scenario}' ({max_risk_value:.1f}% high+extreme risk)")

    # Count scenarios with data
    valid_scenarios = len([df for df in scenario_results.values() if not df.empty])
    print(f"   üìä {valid_scenarios} out of {len(scenarios)} scenarios had data")

    print("\nüéØ Recommended Next Steps:")
    print("   1. Focus on high-risk scenarios for detailed analysis")
    print("   2. Use custom scenario explorer to test specific regions")
    print("   3. Export interesting scenarios for further analysis")
    print("   4. Proceed to Cell 7 for trend analysis and exports")

else:
    print("   No insights available - check your data and filters")

print("\n‚úÖ Scenario analysis complete!")

üîç INTERACTIVE ANALYSIS SCENARIOS
üöÄ RUNNING ANALYSIS SCENARIOS
This will test different filter combinations to uncover insights
Total scenarios to run: 5

üìã SCENARIO: Recent Data Analysis (Last 2 Years)
üìù Focus on most recent data to see current conditions
üîç Applying filters...
   Date range: 2023-01-01 to 2024-12-31
   Risk levels: ['Extreme', 'High', 'Medium', 'Low']
   Latitude: -35.0 to 38.0
   Longitude: -20.0 to 55.0
   After date filter: 0 rows
   After risk filter: 0 rows
   After geographic filter: 0 rows
‚ùå No data for this scenario


üìã SCENARIO: High Risk Areas Only
üìù Focus only on areas with Extreme or High drought risk
üîç Applying filters...
   Date range: 2015-01-01 to 2024-12-31
   Risk levels: ['Extreme', 'High']
   Latitude: -35.0 to 38.0
   Longitude: -20.0 to 55.0
   After date filter: 24 rows
   After risk filter: 14 rows
   After geographic filter: 14 rows
üìä Scenario Data: 14 records
   üìä Risk Distribution:
      High: 14 (100.0%)
   ü


üîß INTERACTIVE SCENARIO EXPLORER

üéõÔ∏è  Create Your Own Scenario:
   (Modify the values below and re-run this cell)
   Current custom filters:
   - Date: 2020-01-01 to 2024-12-31
   - Risk levels: ['Extreme', 'High']
   - Latitude: -10 to 10
   - Longitude: 30 to 45
üîç Applying filters...
   Date range: 2020-01-01 to 2024-12-31
   Risk levels: ['Extreme', 'High']
   Latitude: -10 to 10
   Longitude: 30 to 45
   After date filter: 0 rows
   After risk filter: 0 rows
   After geographic filter: 0 rows
   ‚ùå No data for custom filters

üéØ KEY INSIGHTS FROM SCENARIO ANALYSIS

üí° Summary of Findings:
   üî• Highest risk scenario: 'High Risk Areas Only' (100.0% high+extreme risk)
   üìä 2 out of 5 scenarios had data

üéØ Recommended Next Steps:
   1. Focus on high-risk scenarios for detailed analysis
   2. Use custom scenario explorer to test specific regions
   3. Export interesting scenarios for further analysis
   4. Proceed to Cell 7 for trend analysis and exports

‚úÖ Sc

In [None]:
# Cell 7: Trend Analysis and Data Export - FIXED VERSION

import os
from datetime import datetime as dt # Import datetime with alias to avoid conflict

print("üìà TREND ANALYSIS AND DATA EXPORT")
print("=" * 60)

def analyze_trends(df):
    """Comprehensive trend analysis"""
    if df.empty:
        print("‚ùå No data for trend analysis")
        return None

    print("üîç Analyzing trends over time...")

    # Create yearly aggregates
    df_trend = df.copy()
    df_trend['year'] = df_trend['time'].dt.year
    df_trend['month'] = df_trend['time'].dt.month
    df_trend['season'] = df_trend['month'].apply(lambda x:
        'DJF' if x in [12, 1, 2] else
        'MAM' if x in [3, 4, 5] else
        'JJA' if x in [6, 7, 8] else 'SON'
    )

    # Annual trends
    annual_metrics = df_trend.groupby('year').agg({
        'Rainfall_mm': ['mean', 'std', 'count'],
        'Temperature_C': ['mean', 'std'],
        'SolarRadiation_kWh_m2_day': ['mean', 'std'],
        'Drought_Risk': lambda x: (x.isin(['Extreme', 'High'])).mean() * 100  # % high risk
    }).round(2)

    # Flatten column names
    annual_metrics.columns = ['_'.join(col).strip() for col in annual_metrics.columns.values]
    annual_metrics = annual_metrics.reset_index()

    return df_trend, annual_metrics

def create_trend_visualizations(df_trend, annual_metrics):
    """Create comprehensive trend visualizations"""
    if df_trend.empty:
        return None

    print("üìä Creating trend visualizations...")

    # 1. Annual Rainfall Trend
    fig_rain_trend = px.line(
        annual_metrics,
        x='year',
        y='Rainfall_mm_mean',
        title='üåßÔ∏è Annual Rainfall Trend',
        markers=True
    )

    # Add confidence interval if we have std data
    if 'Rainfall_mm_std' in annual_metrics.columns:
        fig_rain_trend.add_scatter(
            x=annual_metrics['year'],
            y=annual_metrics['Rainfall_mm_mean'] + annual_metrics['Rainfall_mm_std'],
            mode='lines',
            line=dict(width=0),
            showlegend=False,
            name='Upper Std'
        )
        fig_rain_trend.add_scatter(
            x=annual_metrics['year'],
            y=annual_metrics['Rainfall_mm_mean'] - annual_metrics['Rainfall_mm_std'],
            mode='lines',
            line=dict(width=0),
            fill='tonexty',
            showlegend=False,
            name='Lower Std'
        )

    fig_rain_trend.update_layout(yaxis_title="Rainfall (mm)")

    # 2. Annual Temperature Trend
    fig_temp_trend = px.line(
        annual_metrics,
        x='year',
        y='Temperature_C_mean',
        title='üå°Ô∏è Annual Temperature Trend',
        markers=True,
        color_discrete_sequence=['red']
    )
    fig_temp_trend.update_layout(yaxis_title="Temperature (¬∞C)")

    # 3. Drought Risk Trend
    risk_column = None
    for col in annual_metrics.columns:
        if 'Drought_Risk' in col:
            risk_column = col
            break

    if risk_column:
        fig_risk_trend = px.line(
            annual_metrics,
            x='year',
            y=risk_column,
            title='‚ö†Ô∏è Percentage of High+Extreme Risk Areas Over Time',
            markers=True,
            color_discrete_sequence=['orange']
        )
        fig_risk_trend.update_layout(yaxis_title="% High+Extreme Risk")
    else:
        fig_risk_trend = None

    # 4. Seasonal Analysis
    seasonal_avg = df_trend.groupby(['year', 'season']).agg({
        'Rainfall_mm': 'mean',
        'Temperature_C': 'mean'
    }).reset_index()

    fig_seasonal_trend = px.line(
        seasonal_avg,
        x='year',
        y='Rainfall_mm',
        color='season',
        title='üìÖ Seasonal Rainfall Patterns Over Years',
        markers=True
    )
    fig_seasonal_trend.update_layout(yaxis_title="Rainfall (mm)")

    return {
        'rain_trend': fig_rain_trend,
        'temp_trend': fig_temp_trend,
        'risk_trend': fig_risk_trend,
        'seasonal_trend': fig_seasonal_trend
    }

def perform_statistical_tests(df_trend):
    """Perform statistical tests on trends"""
    if len(df_trend) < 2:
        print("‚ö†Ô∏è Not enough data for statistical tests")
        return None

    print("üìä Performing statistical tests...")

    results = {}

    # Calculate linear trends using correlation
    years = df_trend['year'].unique()
    if len(years) >= 2:
        # Annual averages for trend calculation
        annual_avg = df_trend.groupby('year').agg({
            'Rainfall_mm': 'mean',
            'Temperature_C': 'mean'
        }).reset_index()

        # Rainfall trend
        try:
            rain_corr = annual_avg['Rainfall_mm'].corr(annual_avg['year'])
            if pd.isna(rain_corr):
                rain_corr = 0  # Handle NaN case
            results['rainfall_trend'] = {
                'correlation': rain_corr,
                'trend': 'increasing' if rain_corr > 0 else 'decreasing',
                'strength': 'strong' if abs(rain_corr) > 0.7 else 'moderate' if abs(rain_corr) > 0.3 else 'weak'
            }
        except:
            results['rainfall_trend'] = {
                'correlation': 0,
                'trend': 'no trend',
                'strength': 'insufficient data'
            }

        # Temperature trend
        try:
            temp_corr = annual_avg['Temperature_C'].corr(annual_avg['year'])
            if pd.isna(temp_corr):
                temp_corr = 0  # Handle NaN case
            results['temperature_trend'] = {
                'correlation': temp_corr,
                'trend': 'increasing' if temp_corr > 0 else 'decreasing',
                'strength': 'strong' if abs(temp_corr) > 0.7 else 'moderate' if abs(temp_corr) > 0.3 else 'weak'
            }
        except:
            results['temperature_trend'] = {
                'correlation': 0,
                'trend': 'no trend',
                'strength': 'insufficient data'
            }
    else:
        results['rainfall_trend'] = {
            'correlation': 0,
            'trend': 'no trend',
            'strength': 'insufficient data (need 2+ years)'
        }
        results['temperature_trend'] = {
            'correlation': 0,
            'trend': 'no trend',
            'strength': 'insufficient data (need 2+ years)'
        }

    return results

# Run trend analysis on filtered data
if not df_filtered.empty:
    print("1. üìà TREND ANALYSIS")
    print("-" * 30)

    df_trend, annual_metrics = analyze_trends(df_filtered)

    if df_trend is not None:
        # Display annual metrics
        print("\nüìä Annual Summary Statistics:")
        print(annual_metrics.to_string(index=False))

        # Create visualizations
        trends = create_trend_visualizations(df_trend, annual_metrics)

        # Display visualizations
        if trends:
            print("\nüé® Displaying trend visualizations...")
            for trend_name, fig in trends.items():
                if fig:
                    fig.show()

        # Statistical tests
        stats = perform_statistical_tests(df_trend)
        if stats:
            print("\nüìä Statistical Trend Analysis:")
            for metric, result in stats.items():
                print(f"   {metric.replace('_', ' ').title()}:")
                print(f"      Correlation: {result['correlation']:.3f}")
                print(f"      Trend: {result['trend']} ({result['strength']})")
else:
    print("‚ùå No data available for trend analysis")

print("\n2. üíæ DATA EXPORT AND REPORT GENERATION")
print("-" * 40)

def export_data_and_charts(df, df_filtered, trends=None):
    """Export data, charts, and generate reports"""
    from datetime import datetime as dt  # Rename to avoid conflict

    # Create export directory
    export_dir = f"africa_climate_export_{dt.now().strftime('%Y%m%d_%H%M%S')}"
    os.makedirs(export_dir, exist_ok=True)

    print(f"üìÅ Creating export directory: {export_dir}")

    # Export data files
    try:
        # Full dataset
        df.to_csv(f"{export_dir}/full_dataset.csv", index=False)

        # Filtered dataset
        df_filtered.to_csv(f"{export_dir}/filtered_data.csv", index=False)

        # Annual summary
        if 'annual_metrics' in locals():
            annual_metrics.to_csv(f"{export_dir}/annual_summary.csv", index=False)

        print("‚úÖ Data files exported successfully")
    except Exception as e:
        print(f"‚ùå Error exporting data: {e}")

    # Export summary statistics
    try:
        if not df_filtered.empty:
            summary_stats = df_filtered.describe()
            summary_stats.to_csv(f"{export_dir}/summary_statistics.csv")

            # Risk distribution
            risk_dist = df_filtered['Drought_Risk'].value_counts()
            risk_dist.to_csv(f"{export_dir}/risk_distribution.csv")

            print("‚úÖ Summary statistics exported")
    except Exception as e:
        print(f"‚ùå Error exporting statistics: {e}")

    return export_dir

def generate_report(export_dir, df_filtered, stats=None):
    """Generate a text report summary"""
    from datetime import datetime as dt  # Rename to avoid conflict

    report_path = f"{export_dir}/analysis_report.txt"

    with open(report_path, 'w') as f:
        f.write("AFRICA CLIMATE AND DROUGHT RISK ANALYSIS REPORT\n")
        f.write("=" * 50 + "\n\n")

        f.write(f"Report generated: {dt.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

        # Data overview
        f.write("DATA OVERVIEW:\n")
        f.write(f"- Total records: {len(df_filtered):,}\n")
        f.write(f"- Date range: {df_filtered['time'].min().date()} to {df_filtered['time'].max().date()}\n")
        f.write(f"- Unique locations: {len(df_filtered[['lat', 'lon']].drop_duplicates())}\n")
        f.write(f"- Unique months: {df_filtered['time'].dt.to_period('M').nunique()}\n\n")

        # Current conditions
        latest_data = df_filtered[df_filtered['time'] == df_filtered['time'].max()]
        if not latest_data.empty:
            f.write("CURRENT CONDITIONS (Latest Data):\n")
            f.write(f"- Locations analyzed: {len(latest_data)}\n")
            f.write(f"- Average rainfall: {latest_data['Rainfall_mm'].mean():.1f} mm\n")
            f.write(f"- Average temperature: {latest_data['Temperature_C'].mean():.1f} ¬∞C\n")
            f.write(f"- Average solar radiation: {latest_data['SolarRadiation_kWh_m2_day'].mean():.2f} kWh/m¬≤/day\n\n")

            # Risk assessment
            f.write("RISK ASSESSMENT:\n")
            risk_counts = latest_data['Drought_Risk'].value_counts()
            for risk, count in risk_counts.items():
                percentage = (count / len(latest_data)) * 100
                f.write(f"- {risk} risk: {count} locations ({percentage:.1f}%)\n")
            f.write("\n")

        # Trend analysis summary
        if stats:
            f.write("TREND ANALYSIS:\n")
            for metric, result in stats.items():
                f.write(f"- {metric.replace('_', ' ').title()}: {result['trend']} trend ({result['strength']}, r={result['correlation']:.3f})\n")
            f.write("\n")

        # Key findings
        f.write("KEY FINDINGS:\n")
        if not latest_data.empty:
            extreme_high_count = len(latest_data[latest_data['Drought_Risk'].isin(['Extreme', 'High'])])
            if extreme_high_count > 0:
                f.write(f"- ‚ö†Ô∏è  {extreme_high_count} locations are at High or Extreme drought risk\n")
            else:
                f.write("- ‚úÖ No locations at High or Extreme drought risk\n")

        f.write("\nEXPORTED FILES:\n")
        for file in os.listdir(export_dir):
            f.write(f"- {file}\n")

    print(f"‚úÖ Analysis report generated: {report_path}")
    return report_path

# Export data and generate report
if not df_filtered.empty:
    export_dir = export_data_and_charts(df, df_filtered, trends if 'trends' in locals() else None)

    # Generate report
    report_path = generate_report(export_dir, df_filtered, stats if 'stats' in locals() else None)

    print(f"\nüì¶ EXPORT SUMMARY:")
    print(f"   Export directory: {export_dir}")
    print(f"   Total files: {len(os.listdir(export_dir))}")
    print(f"   Report: {report_path}")

    # List exported files
    print("\n   Exported files:")
    for file in os.listdir(export_dir):
        file_path = os.path.join(export_dir, file)
        file_size = os.path.getsize(file_path)
        print(f"   - {file} ({file_size:,} bytes)")
else:
    print("‚ùå No data to export")

print("\n3. üéØ RECOMMENDATIONS AND INSIGHTS")
print("-" * 40)

def generate_recommendations(df_filtered, stats=None):
    """Generate actionable recommendations based on analysis"""
    if df_filtered.empty:
        return ["No data available for recommendations"]

    recommendations = []

    # Get latest data
    latest_data = df_filtered[df_filtered['time'] == df_filtered['time'].max()]

    if not latest_data.empty:
        # Risk-based recommendations
        extreme_count = len(latest_data[latest_data['Drought_Risk'] == 'Extreme'])
        high_count = len(latest_data[latest_data['Drought_Risk'] == 'High'])

        if extreme_count > 0:
            recommendations.append("üö® IMMEDIATE ACTION: Areas with Extreme drought risk require emergency water management and drought response planning")

        if high_count > 0:
            recommendations.append("‚ö†Ô∏è  HIGH PRIORITY: Implement water conservation measures in High risk areas and monitor conditions closely")

        # Rainfall-based recommendations
        avg_rainfall = latest_data['Rainfall_mm'].mean()
        if avg_rainfall < 30:
            recommendations.append("üíß WATER STRESS: Very low rainfall conditions - consider implementing water restrictions and promoting water-efficient practices")
        elif avg_rainfall < 60:
            recommendations.append("üåßÔ∏è  MODERATE DROUGHT: Below average rainfall - monitor water resources and prepare contingency plans")

        # Trend-based recommendations
        if stats:
            rainfall_trend = stats.get('rainfall_trend', {})
            if rainfall_trend.get('trend') == 'decreasing' and rainfall_trend.get('strength') != 'insufficient data':
                recommendations.append("üìâ RAINFALL DECLINE: Rainfall shows decreasing trend - consider long-term water resource planning and drought-resistant agriculture")

            temperature_trend = stats.get('temperature_trend', {})
            if temperature_trend.get('trend') == 'increasing' and temperature_trend.get('strength') != 'insufficient data':
                recommendations.append("üå°Ô∏è  WARMING TREND: Temperatures show increasing trend - implement heat stress management and climate adaptation strategies")

        # General recommendations
        recommendations.append("üìä CONTINUOUS MONITORING: Maintain regular climate monitoring and early warning systems")
        recommendations.append("üå± SUSTAINABLE PRACTICES: Promote sustainable land and water management practices")

    else:
        recommendations.append("No current data available for specific recommendations")

    return recommendations

# Generate and display recommendations
recommendations = generate_recommendations(df_filtered, stats if 'stats' in locals() else None)

print("\nüí° ACTIONABLE RECOMMENDATIONS:")
for i, rec in enumerate(recommendations, 1):
    print(f"   {i}. {rec}")

print("\n4. üîÆ FUTURE ANALYSIS SUGGESTIONS")
print("-" * 40)

future_analysis_suggestions = [
    "üìÖ Expand date range to include longer-term climate patterns",
    "üåç Incorporate additional climate variables (humidity, wind speed, evaporation)",
    "üèûÔ∏è Add land use and vegetation index data",
    "üíß Integrate water availability and groundwater data",
    "üë• Include socioeconomic vulnerability indicators",
    "üîÆ Develop predictive models for drought forecasting",
    "üó∫Ô∏è Create higher resolution spatial analysis",
    "üì± Build real-time monitoring dashboard",
    "üå°Ô∏è Analyze climate change projections",
    "ü§ù Compare with historical drought events"
]

print("\nSuggested next steps for enhanced analysis:")
for i, suggestion in enumerate(future_analysis_suggestions, 1):
    print(f"   {i}. {suggestion}")

print("\n" + "=" * 60)
print("‚úÖ TREND ANALYSIS AND EXPORT COMPLETE!")
print("=" * 60)

print(f"\nüéâ NOTEBOOK EXECUTION SUMMARY:")
print(f"   ‚úÖ Cell 1: Imports and setup")
print(f"   ‚úÖ Cell 2: Data loading and configuration")
print(f"   ‚úÖ Cell 3: Filtering functions")
print(f"   ‚úÖ Cell 4: Visualization functions")
print(f"   ‚úÖ Cell 5: Basic visualization testing")
print(f"   ‚úÖ Cell 6: Interactive scenario analysis")
print(f"   ‚úÖ Cell 7: Trend analysis and export (current)")

print(f"\nüìä ANALYSIS COMPLETE!")
print(f"   Total records processed: {len(df):,}")
print(f"   Final filtered records: {len(df_filtered):,}")
print(f"   Visualizations created: Multiple charts and maps")
print(f"   Data exported: Yes (check export directory)")

if not df_filtered.empty:
    latest_data = df_filtered[df_filtered['time'] == df_filtered['time'].max()]
    extreme_high = len(latest_data[latest_data['Drought_Risk'].isin(['Extreme', 'High'])])
    print(f"   Current high+extreme risk locations: {extreme_high}")

print(f"\nüéØ Next steps:")
print(f"   1. Review exported data and reports")
print(f"   2. Implement recommendations as appropriate")
print(f"   3. Share findings with stakeholders")
print(f"   4. Consider implementing the Streamlit dashboard")

print(f"\nThank you for using the Africa Climate & Drought Risk Analysis! üåç")

üìà TREND ANALYSIS AND DATA EXPORT
1. üìà TREND ANALYSIS
------------------------------
üîç Analyzing trends over time...

üìä Annual Summary Statistics:
 year  Rainfall_mm_mean  Rainfall_mm_std  Rainfall_mm_count  Temperature_C_mean  Temperature_C_std  SolarRadiation_kWh_m2_day_mean  SolarRadiation_kWh_m2_day_std  Drought_Risk_<lambda>
 2015              50.0            14.77                 12                25.0               3.69                             5.0                           0.74                  58.33
 2016              50.0            14.77                 12                25.0               3.69                             5.0                           0.74                  58.33
üìä Creating trend visualizations...

üé® Displaying trend visualizations...


üìä Performing statistical tests...

üìä Statistical Trend Analysis:
   Rainfall Trend:
      Correlation: 0.000
      Trend: decreasing (weak)
   Temperature Trend:
      Correlation: 0.000
      Trend: decreasing (weak)

2. üíæ DATA EXPORT AND REPORT GENERATION
----------------------------------------
üìÅ Creating export directory: africa_climate_export_20251016_214004
‚úÖ Data files exported successfully
‚úÖ Summary statistics exported
‚úÖ Analysis report generated: africa_climate_export_20251016_214004/analysis_report.txt

üì¶ EXPORT SUMMARY:
   Export directory: africa_climate_export_20251016_214004
   Total files: 5
   Report: africa_climate_export_20251016_214004/analysis_report.txt

   Exported files:
   - summary_statistics.csv (541 bytes)
   - full_dataset.csv (1,410 bytes)
   - risk_distribution.csv (37 bytes)
   - filtered_data.csv (1,410 bytes)
   - analysis_report.txt (792 bytes)

3. üéØ RECOMMENDATIONS AND INSIGHTS
----------------------------------------

üí° ACT

In [None]:
# Cell 8: Summary and Next Steps

print("üéâ ANALYSIS COMPLETE - SUMMARY AND NEXT STEPS")
print("=" * 60)

print("\nüìä ANALYSIS SUMMARY")
print("-" * 30)

if not df_filtered.empty:
    # Final data summary
    total_records = len(df_filtered)
    date_range = f"{df_filtered['time'].min().date()} to {df_filtered['time'].max().date()}"
    unique_locations = len(df_filtered[['lat', 'lon']].drop_duplicates())
    unique_months = df_filtered['time'].dt.to_period('M').nunique()

    # Latest conditions
    latest_data = df_filtered[df_filtered['time'] == df_filtered['time'].max()]
    extreme_high_count = len(latest_data[latest_data['Drought_Risk'].isin(['Extreme', 'High'])])

    print(f"‚úÖ Data Analysis Completed Successfully!")
    print(f"   Total records analyzed: {total_records:,}")
    print(f"   Time period: {date_range}")
    print(f"   Unique locations: {unique_locations}")
    print(f"   Unique months: {unique_months}")
    print(f"   Current high/extreme risk areas: {extreme_high_count}")

    # Key metrics from latest data
    if not latest_data.empty:
        print(f"\nüå°Ô∏è Current Conditions (Latest Data):")
        print(f"   Average Rainfall: {latest_data['Rainfall_mm'].mean():.1f} mm")
        print(f"   Average Temperature: {latest_data['Temperature_C'].mean():.1f} ¬∞C")
        print(f"   Average Solar Radiation: {latest_data['SolarRadiation_kWh_m2_day'].mean():.2f} kWh/m¬≤/day")

        # Risk distribution
        risk_counts = latest_data['Drought_Risk'].value_counts()
        print(f"\n‚ö†Ô∏è Current Risk Distribution:")
        for risk, count in risk_counts.items():
            percentage = (count / len(latest_data)) * 100
            print(f"   {risk}: {count} locations ({percentage:.1f}%)")
else:
    print("‚ùå No data available for summary")

print("\nüîç KEY INSIGHTS DISCOVERED")
print("-" * 30)

# Display insights based on the analysis
if not df_filtered.empty:
    insights = []

    latest_data = df_filtered[df_filtered['time'] == df_filtered['time'].max()]

    # Risk insights
    extreme_count = len(latest_data[latest_data['Drought_Risk'] == 'Extreme'])
    high_count = len(latest_data[latest_data['Drought_Risk'] == 'High'])

    if extreme_count > 0:
        insights.append(f"üö® {extreme_count} locations are at EXTREME drought risk requiring immediate attention")
    if high_count > 0:
        insights.append(f"‚ö†Ô∏è {high_count} locations are at HIGH drought risk needing monitoring")

    # Rainfall insights
    avg_rainfall = latest_data['Rainfall_mm'].mean()
    if avg_rainfall < 30:
        insights.append("üíß Very low rainfall conditions detected across the region")
    elif avg_rainfall < 60:
        insights.append("üåßÔ∏è Below average rainfall conditions observed")

    # Trend insights (if available)
    if 'stats' in locals() and stats:
        rain_trend = stats.get('rainfall_trend', {})
        if rain_trend.get('trend') == 'decreasing' and rain_trend.get('strength') in ['moderate', 'strong']:
            insights.append("üìâ Significant decreasing rainfall trend identified")

        temp_trend = stats.get('temperature_trend', {})
        if temp_trend.get('trend') == 'increasing' and temp_trend.get('strength') in ['moderate', 'strong']:
            insights.append("üå°Ô∏è Significant increasing temperature trend detected")

    # Display insights
    if insights:
        for i, insight in enumerate(insights, 1):
            print(f"   {i}. {insight}")
    else:
        print("   üìä Analysis shows generally stable conditions with no major alerts")
else:
    print("   No insights available - check data availability")

print("\nüöÄ NEXT STEPS FOR DEPLOYMENT")
print("-" * 30)

next_steps = [
    "1. üéØ REVIEW EXPORTS: Check the exported data and reports in the export directory",
    "2. üìä VALIDATE FINDINGS: Verify analysis results with domain experts",
    "3. üåê STREAMLIT DEPLOYMENT: Implement the dashboard using the tested code",
    "4. üîÑ UPDATE DATA: Set up automated data updates for current analysis",
    "5. üì¢ SHARE INSIGHTS: Present findings to stakeholders and decision-makers",
    "6. üõ†Ô∏è ENHANCE FEATURES: Add more variables and analysis based on user feedback"
]

for step in next_steps:
    print(f"   {step}")

print("\nüí° STREAMLIT DASHBOARD IMPLEMENTATION TIPS")
print("-" * 40)

streamlit_tips = [
    "‚Ä¢ Use the tested functions from this notebook in your Streamlit app",
    "‚Ä¢ Start with the basic layout and gradually add interactive features",
    "‚Ä¢ Implement caching for data loading to improve performance",
    "‚Ä¢ Add error handling for missing data files",
    "‚Ä¢ Include tooltips and explanations for better user experience",
    "‚Ä¢ Test with different screen sizes and devices",
    "‚Ä¢ Consider deploying on Streamlit Cloud for easy sharing"
]

for tip in streamlit_tips:
    print(f"   {tip}")

print("\nüìÅ DATA MANAGEMENT RECOMMENDATIONS")
print("-" * 35)

data_recommendations = [
    "‚Ä¢ Maintain regular backups of your climate data",
    "‚Ä¢ Implement data validation checks for new data imports",
    "‚Ä¢ Consider using a database for larger datasets",
    "‚Ä¢ Set up automated data quality monitoring",
    "‚Ä¢ Document data sources and update frequencies"
]

for rec in data_recommendations:
    print(f"   {rec}")

print("\nüîß TECHNICAL IMPROVEMENTS FOR FUTURE")
print("-" * 35)

technical_improvements = [
    "‚Ä¢ Add machine learning models for drought prediction",
    "‚Ä¢ Implement real-time data streaming capabilities",
    "‚Ä¢ Create API endpoints for data access",
    "‚Ä¢ Develop mobile-friendly responsive design",
    "‚Ä¢ Add user authentication and access controls",
    "‚Ä¢ Implement automated reporting and alerts"
]

for improvement in technical_improvements:
    print(f"   {improvement}")

print("\nüìû SUPPORT AND MAINTENANCE")
print("-" * 25)

support_info = [
    "‚Ä¢ Document all functions and data sources",
    "‚Ä¢ Create user guides and tutorial materials",
    "‚Ä¢ Set up monitoring for system performance",
    "‚Ä¢ Plan regular updates and maintenance schedules",
    "‚Ä¢ Establish feedback collection mechanisms"
]

for info in support_info:
    print(f"   {info}")

print("\n" + "=" * 60)
print("üéØ NOTEBOOK EXECUTION COMPLETE!")
print("=" * 60)

print(f"""
‚ú® SUCCESS! Your Africa Climate & Drought Risk Analysis is complete.

What you've accomplished:
‚úì Loaded and validated climate data
‚úì Applied sophisticated filtering and analysis
‚úì Created interactive visualizations and maps
‚úì Conducted trend analysis and statistical tests
‚úì Generated actionable insights and recommendations
‚úì Exported comprehensive reports and datasets

Next immediate actions:
1. Locate your export directory: {export_dir if 'export_dir' in locals() else 'Check previous cell output'}
2. Review the analysis_report.txt for key findings
3. Begin Streamlit dashboard implementation using the tested code

Thank you for using this comprehensive climate analysis toolkit! üåç

Your data is now ready for decision-making and further development.
""")

# Final check for export directory
try:
    if 'export_dir' in locals():
        import os
        if os.path.exists(export_dir):
            print(f"üìÅ Your export directory exists: {export_dir}")
            files = os.listdir(export_dir)
            print(f"   Contains {len(files)} files for your use")
        else:
            print("‚ö†Ô∏è  Export directory not found - check Cell 7 output")
    else:
        print("‚ÑπÔ∏è  Run Cell 7 to generate data exports")
except:
    print("‚ÑπÔ∏è  Export directory status unavailable")

print("\n" + "üéâ" * 30)
print("ANALYSIS COMPLETE - READY FOR ACTION!")
print("üéâ" * 30)

üéâ ANALYSIS COMPLETE - SUMMARY AND NEXT STEPS

üìä ANALYSIS SUMMARY
------------------------------
‚úÖ Data Analysis Completed Successfully!
   Total records analyzed: 24
   Time period: 2015-01-01 to 2016-12-01
   Unique locations: 1
   Unique months: 24
   Current high/extreme risk areas: 1

üå°Ô∏è Current Conditions (Latest Data):
   Average Rainfall: 32.7 mm
   Average Temperature: 25.0 ¬∞C
   Average Solar Radiation: 5.00 kWh/m¬≤/day

‚ö†Ô∏è Current Risk Distribution:
   High: 1 locations (100.0%)

üîç KEY INSIGHTS DISCOVERED
------------------------------
   1. ‚ö†Ô∏è 1 locations are at HIGH drought risk needing monitoring
   2. üåßÔ∏è Below average rainfall conditions observed

üöÄ NEXT STEPS FOR DEPLOYMENT
------------------------------
   1. üéØ REVIEW EXPORTS: Check the exported data and reports in the export directory
   2. üìä VALIDATE FINDINGS: Verify analysis results with domain experts
   3. üåê STREAMLIT DEPLOYMENT: Implement the dashboard using the tested cod

In [None]:
import pandas as pd
import numpy as np
from datetime import date, timedelta

# =====================================================
# üåç  Africa Climate Synthetic Dataset (2015‚Äì2024)
#  Columns: time, Rainfall_mm, SolarRadiation_kWh_m2_day
# =====================================================

# 10 years of monthly data
dates = pd.date_range(start="2015-01-01", end="2024-12-31", freq="MS")

# create realistic rainfall & solar patterns
rainfall = np.random.normal(100, 40, len(dates))      # mm/month
rainfall = np.clip(rainfall, 5, 250)                  # keep within bounds
solar = 7 - (rainfall / 400) * 2 + np.random.normal(0, 0.2, len(dates))
solar = np.clip(solar, 3, 7)                          # kWh/m¬≤/day typical Africa range

df = pd.DataFrame({
    "time": dates,
    "Rainfall_mm": np.round(rainfall, 2),
    "SolarRadiation_kWh_m2_day": np.round(solar, 2)
})

df.to_csv("africa_climate_data.csv", index=False)
print("‚úÖ africa_climate_data.csv created successfully!")
print(df.head(), "\nRows:", len(df))


‚úÖ africa_climate_data.csv created successfully!
        time  Rainfall_mm  SolarRadiation_kWh_m2_day
0 2015-01-01       107.02                       6.19
1 2015-02-01       109.87                       6.30
2 2015-03-01       114.82                       6.61
3 2015-04-01        83.33                       6.54
4 2015-05-01        70.67                       6.69 
Rows: 120


In [None]:
!streamlit run app_v4_1.py &>/dev/null&
from pyngrok import ngrok
public_url = ngrok.connect(8501)
print("üåç Open your dashboard at:", public_url)



ERROR:pyngrok.process.ngrok:t=2025-10-17T11:08:33+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-10-17T11:08:33+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-10-17T11:08:33+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.4.0
