# 3. Future Scenario Analysis

This notebook uses the best-performing model (determined in `02_model_analysis.ipynb`) to predict future sea level rise based on different GHG emission scenarios. It performs the following steps:
1. Loads the future GHG emission scenario data.
2. Loads the chosen, pre-trained model from the `../models/` directory.
3. Prepares the input sequences for the model based on the future scenarios.
4. Generates predictions for sea level rise up to the year 2050 for each scenario.
5. Visualizes the results and presents them in a final summary table.

### 3.1 Setup and Data Preparation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os

# Add src directory to path to import neural_networks module
sys.path.append(os.path.abspath(os.path.join('..', 'src')))
from neural_networks import NeuralNetwork_2hl # Assuming 2hl is the best model

# Set random seed for reproducibility
np.random.seed(42)

# Data Loading and Processing (Identical to other notebooks)


# Load historical data to get the correct normalization stats (mean/std)
df_past_GHG2 = pd.read_csv("https://ourworldindata.org/grapher/total-ghg-emissions.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
df_past_GHG2 = df_past_GHG2.loc[df_past_GHG2['Entity'] == 'World']
df_past_GHG2 = df_past_GHG2.drop(['Entity','Code'], axis=1)
df_past_GHG2 = df_past_GHG2.set_index('Year')
df_past_GHG2.annual_emissions_ghg_total_co2eq *= 10**(-9)
GHG_past_comb = df_past_GHG2.copy()
df_pred_raw = pd.read_excel("../data/AR6-SYR-LR-F2-5-Panel(a).xlsx", sheet_name="Data")
df_pred = df_pred_raw.drop([1,2,3,4,6,7,8,10,11,13,14,16])
df_pred = df_pred.drop(['Unnamed: 1',2019], axis=1)
GHG_past_raw = df_pred.iloc[[0]].values[0,1:7]
GHG_past_comb.loc[[2012, 2013, 2014], 'annual_emissions_ghg_total_co2eq'] = [float(emi) for emi in GHG_past_raw[2:5]]
del_years = np.arange(2015,2024)
GHG_past_comb = GHG_past_comb.drop(del_years)

df_sealevel = pd.read_csv("https://ourworldindata.org/grapher/sea-level.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})
df_sealevel = df_sealevel.drop(['Entity','Code','sea_level_church_and_white_2011','sea_level_average'], axis=1)
df_sealevel = df_sealevel.dropna()
df_sealevel['Day'] = [np.datetime64(day) for day in df_sealevel['Day']]
df_sealevel = df_sealevel.groupby(df_sealevel.Day.dt.year).mean()
df_sealevel = df_sealevel.drop('Day', axis=1, errors='ignore')

# Store mean/std for denormalization later
ghg_mean = GHG_past_comb.mean().values[0]
ghg_std = GHG_past_comb.std().values[0]
sealevel_mean = df_sealevel.mean().values[0]
sealevel_std = df_sealevel.std().values[0]

# Load future scenarios
df_pred = df_pred.rename(columns={'spm_cat (year)': 'Year'})
df_pred = df_pred.set_index('Year')
df_pred = df_pred.transpose()
df_pred = df_pred.drop('Past GHG emissions (Black line) ', axis=1)
df_pred = df_pred.rename(columns={'Trend from implemented policies (Lowest bound of  red shading ) ': 'Trend from implemented policies','Limit warming to 2°C (>67%) or return warming to 1.5°C (>50%) after a high overshoot, NDCs until 2030 (Median , dark navy blue line )': 'Limit warming to 2°C or return warming to 1.5°C after a high overshoot', 'Limit warming to 2°C (>67%) (Median , dark green line )': 'Limit warming to 2°C', 'Limit warming to 1.5°C (>50%) with no or limited overshoot ( Median ligh blue line ) ': 'Limit warming to 1.5°C'})
df_pred = df_pred.drop([2010,2011,2012,2013,2014], axis=0)
for year in df_pred.index[:-1]:
    diff = df_pred.loc[year+5] - df_pred.loc[year]
    for i in range(4):
        df_pred.loc[year+i+1] = df_pred.loc[year] + (i+1)*diff/5
df_pred = df_pred.sort_index()

# Combine historical and future data for creating sequences
future_scenarios = df_pred.columns
full_ghg_df = GHG_past_comb.copy()
for scenario in future_scenarios:
    full_ghg_df[scenario] = np.nan
    # Get the last historical year to start the scenarios from
    last_hist_year = GHG_past_comb.index.max()
    last_hist_value = GHG_past_comb.loc[last_hist_year].values[0]
    
    # Blend the start of the scenario with the last historical point
    first_scenario_year = df_pred.index.min()
    first_scenario_value = df_pred[scenario].loc[first_scenario_year]
    
    # Create a smooth transition over a few years (e.g., 5 years)
    transition_years = np.arange(last_hist_year + 1, first_scenario_year)
    transition_values = np.linspace(last_hist_value, first_scenario_value, len(transition_years) + 2)[1:-1]
    
    # Create a temporary series for the transition
    transition_s = pd.Series(transition_values, index=transition_years)
    
    # Combine historical, transition, and future data
    scenario_data = pd.concat([
        GHG_past_comb['annual_emissions_ghg_total_co2eq'],
        transition_s,
        df_pred[scenario]
    ])
    full_ghg_df[scenario] = scenario_data

full_ghg_df = full_ghg_df.drop('annual_emissions_ghg_total_co2eq', axis=1)

# Normalize the full dataset
full_ghg_norm = (full_ghg_df - ghg_mean) / ghg_std

### 3.2 Load Model and Generate Predictions

In [None]:
# Load the best model
model = NeuralNetwork_2hl.load_model('../models/2hl_model.pkl')

timespan = 15
prediction_years = np.arange(2015, 2051)
results_denorm = {}

for scenario in future_scenarios:
    predictions_norm = []
    current_sequence_norm = full_ghg_norm[scenario].loc[2015-timespan:2014].values.reshape(1, -1)
    
    for year in prediction_years:
        # Predict the next value
        pred_norm = model.predict(current_sequence_norm)
        predictions_norm.append(pred_norm[0][0])
        
        # Update the sequence for the next prediction
        next_year_ghg_norm = full_ghg_norm[scenario].loc[year]
        current_sequence_norm = np.roll(current_sequence_norm, -1)
        current_sequence_norm[0, -1] = next_year_ghg_norm
        
    # Denormalize the predictions
    predictions_denorm = (np.array(predictions_norm) * sealevel_std) + sealevel_mean
    results_denorm[scenario] = predictions_denorm

print('Future predictions generated for all scenarios.')

### 3.3 Visualize and Summarize Results

In [None]:
# Plotting the results
plt.figure(figsize=(14, 8))

# Plot historical sea level data
plt.plot(df_sealevel.index, df_sealevel['sea_level_nasa_global_average'], label='Historical Sea Level', color='black', linewidth=2)

# Plot predictions for each scenario
for scenario, predictions in results_denorm.items():
    plt.plot(prediction_years, predictions, label=f'Prediction: {scenario}')

plt.title('Future Sea Level Rise Predictions Based on GHG Emission Scenarios')
plt.xlabel('Year')
plt.ylabel('Sea Level (mm)')
plt.legend()
plt.grid(True)
plt.xlim(1990, 2050)
plt.show()

# Create a summary table
summary_data = {'Year': prediction_years}
for scenario, predictions in results_denorm.items():
    summary_data[scenario] = predictions

df_summary = pd.DataFrame(summary_data).set_index('Year')

# Display the table for key years
print("Predicted Sea Level Rise (mm) for Key Years:")
print(df_summary.loc[display_years].round(2).to_markdown())