In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import cuda
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, sampler
import torch.nn.functional as F
from torch_utils import AverageMeter
import math
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from numpy import inf
from merge_datasets import MergeDatasets
from merge_features import MergeFeatures
from sklearn.preprocessing import StandardScaler
from joblib import load

from sklearn.model_selection import train_test_split
from glob import glob
from torchvision import transforms
from torchvision import datasets
from torchvision import models
from torch import optim, cuda, Tensor

import numpy as np
import os
from PIL import Image
from timeit import default_timer as timer
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [4]:
# define multi-layer perceptron deep neural network model
# network includes two linear layers, one Relu layer, and one dropout layer
class MLP(nn.Module):
    def __init__(self, input_d, hidden_d, output_d):
        super().__init__()

        self.linear1 = nn.Linear(input_d, hidden_d)
        self.linear2 = nn.Linear(hidden_d, output_d)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):

        x = self.linear1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

In [5]:
input_d = 13
hidden_d = 64
output_d = 1

model = MLP(input_d, hidden_d, output_d)
model.load_state_dict(torch.load('model_state.pth'))
model.eval()

<All keys matched successfully>

MLP(
  (linear1): Linear(in_features=13, out_features=64, bias=True)
  (linear2): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [6]:
co2_df = pd.read_csv('../predictions/co2_concentrations_predictions_2024_2124.csv')
forest_df = pd.read_csv('../predictions/forest_area_predictions_2024_2124.csv')
land_df = pd.read_csv('../predictions/land_use_predictions_2024_2124.csv')
temp_df = pd.read_csv('../predictions/temperature_anomaly_predictions_2024_2124.csv')
lpi_df = pd.read_csv('../datasets/global-living-planet-index.csv')

merged_features = MergeFeatures(co2_df, forest_df, land_df, temp_df)
merged_df = merged_features.merge()
merged_df = pd.merge(merged_df, lpi_df, on=['Year', 'Entity'], how='outer')
merged_df.drop(columns=['Code', 'Upper CI', 'Lower CI'])



Unnamed: 0,Year,Predicted CO2,Entity,Forest area,Land use: Built-up area,Land use: Grazingland,Land use: Cropland,Temperature anomaly,Living Planet Index
0,2024,416.005756,Africa,1.763424e+06,143490.498432,3.024114e+07,6.572303e+06,1.174406,
1,2024,416.005756,Africa,1.763424e+06,143490.498432,3.024114e+07,6.572303e+06,0.482284,
2,2024,416.005756,Africa,1.763424e+06,143490.498432,3.024114e+07,6.572303e+06,0.412299,
3,2024,416.005756,Africa,1.763424e+06,143490.498432,3.024114e+07,6.572303e+06,0.113485,
4,2024,416.005756,Africa,1.763424e+06,143490.498432,3.024114e+07,6.572303e+06,0.353911,
...,...,...,...,...,...,...,...,...,...
48672540,2014,,World,,,,,,31.669020
48672541,2015,,World,,,,,,31.627533
48672542,2016,,World,,,,,,31.145410
48672543,2017,,World,,,,,,30.885270


In [7]:
X_train_continuous = merged_df[['Year', 'Predicted CO2', 'Forest area', 'Land use: Built-up area', 'Land use: Grazingland', 'Land use: Cropland', 'Temperature anomaly']].values
entities_train = pd.get_dummies(merged_df['Entity'])
X_train = np.hstack((X_train_continuous, entities_train.values))

# Create and fit the scalers based on the training or representative data
scaler_X = StandardScaler().fit(X_train)

y_train = merged_df['Living Planet Index'].values.reshape(-1, 1)
scaler_y = StandardScaler().fit(y_train)

In [9]:
X_pred_continuous = merged_df[['Year', 'Predicted CO2', 'Forest area', 'Land use: Built-up area', 'Land use: Grazingland', 'Land use: Cropland', 'Temperature anomaly']].values
entities_pred = pd.get_dummies(merged_df['Entity'])
unique_entities = ['Africa', 'Asia and Pacific', 'Europe and Central Asia', 'Latin America and the Caribbean', 'North America', 'World']
entities_pred = pd.get_dummies(merged_df['Entity'], columns=unique_entities)

for entity in unique_entities:
    if entity not in entities_pred.columns:
        entities_pred[entity] = 0

entities_pred = entities_pred[unique_entities]
entities_pred = entities_pred.reindex(columns=unique_entities, fill_value=0)
print(f"Columns after reindexing: {entities_pred.shape[1]} (should match the number of unique entities from training)")
# Merge continuous and categorical features
X_pred = np.hstack([X_pred_continuous, entities_pred.values])

print("Features used for training:", scaler_X.n_features_in_)
print("Features attempting to scale:", X_pred.shape[1])


# # Ensure the number of features in X_pred matches what scaler_X expects
# X_pred_scaled = scaler_X.transform(X_pred)

# X_pred_tensor = torch.tensor(X_pred_scaled, dtype=torch.float32)

# model.eval()
# with torch.no_grad():
#     lpi_pred_scaled = model(X_pred_tensor).numpy()

# lpi_predictions = scaler_y.inverse_transform(lpi_pred_scaled.reshape(-1, 1)).flatten()
# lpi_predictions_adjusted = np.maximum(lpi_predictions, 0)

# predictions_df = pd.DataFrame({
#     'Year': merged_df['Year'],
#     'Entity': merged_df['Entity'],
#     'Predicted_LPI': lpi_predictions_adjusted
# })

# predictions_df.to_csv('predicted_lpi_2024_2124.csv', index=False)

Columns after reindexing: 6 (should match the number of unique entities from training)
Features used for training: 14
Features attempting to scale: 13
