In [None]:
!pip install fastapi uvicorn pyngrok nest_asyncio streamlit

Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.5-py3-none-any.whl.metadata (8.9 kB)
Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownload

In [None]:
from fastapi import FastAPI
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import uvicorn
from pyngrok import ngrok
import nest_asyncio

In [None]:
app = FastAPI()

In [None]:
import pandas as pd
import requests

# Fetch the data.
df = pd.read_csv("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.csv?v=1&csvType=filtered&useColumnShortNames=true&country=IND~AFG~OWID_AFR~ALB~DZA~LUX~IRL~SGP~QAT~BMU~ARE~CHE~NOR~USA~DNK~NLD~BRN~HKG~AUT~ISL~SWE~DEU~BEL~BHR~MAC~AUS~SAU~KWT~CAN~FIN~MLT~GBR~FRA~KOR~NZL~CYP~ISR~ITA~JPN~ABW~CZE~SVN~ESP~LTU~EST~POL~PRT~HUN~GUY~OMN~BHS~HRV~PAN~SVK~TUR~LVA~ROU~GRC~KNA~MYS~RUS~ARG~KAZ~BGR~CHL~SYC~URY~TTO~CUW~MUS~ATG~MNE~CRI~MDV~SRB~MEX~LBY~DOM~BLR~CHN~THA~GEO~COL~BWA~IRN~BRB~BRA~AZE~ZAF~EGY~LKA~UKR~TUN~SLV~IRQ~PHL~UZB~BGD~PAK~PSE~MRT~KHM~MMR~TJK~NPL~ZWE", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})

# Fetch the metadata
metadata = requests.get("https://ourworldindata.org/grapher/annual-co2-emissions-per-country.metadata.json?v=1&csvType=filtered&useColumnShortNames=true&country=IND~AFG~OWID_AFR~ALB~DZA~LUX~IRL~SGP~QAT~BMU~ARE~CHE~NOR~USA~DNK~NLD~BRN~HKG~AUT~ISL~SWE~DEU~BEL~BHR~MAC~AUS~SAU~KWT~CAN~FIN~MLT~GBR~FRA~KOR~NZL~CYP~ISR~ITA~JPN~ABW~CZE~SVN~ESP~LTU~EST~POL~PRT~HUN~GUY~OMN~BHS~HRV~PAN~SVK~TUR~LVA~ROU~GRC~KNA~MYS~RUS~ARG~KAZ~BGR~CHL~SYC~URY~TTO~CUW~MUS~ATG~MNE~CRI~MDV~SRB~MEX~LBY~DOM~BLR~CHN~THA~GEO~COL~BWA~IRN~BRB~BRA~AZE~ZAF~EGY~LKA~UKR~TUN~SLV~IRQ~PHL~UZB~BGD~PAK~PSE~MRT~KHM~MMR~TJK~NPL~ZWE").json()

In [None]:
# Data preprocessing
df.columns = df.columns.str.strip()
df = df.rename(columns={'Entity': 'Country', 'Year': 'Year', df.columns[-1]: 'CO2_Emissions'})
df = df[['Country', 'Year', 'CO2_Emissions']]
df = df.dropna()

# Normalize data
scaler = StandardScaler()
df['CO2_Emissions'] = scaler.fit_transform(df[['CO2_Emissions']])

In [None]:
#Feature engineering (rolling window of 20 years)
def create_sequences(data, seq_length=20):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 20

In [None]:
# Build optimized LSTM model
lstm_model = Sequential([
    Bidirectional(LSTM(512, activation='relu', return_sequences=True), input_shape=(seq_length, 1)),
    BatchNormalization(),
    Dropout(0.25),
    Bidirectional(LSTM(256, activation='relu', return_sequences=True)),
    BatchNormalization(),
    Dropout(0.25),
    Bidirectional(LSTM(128, activation='relu')),
    BatchNormalization(),
    Dropout(0.25),
    Dense(1)
])

optimizer = Adam(learning_rate=0.0005)
lstm_model.compile(optimizer=optimizer, loss=Huber(delta=1.0))

In [None]:
# Calculate evaluation metrics
lstm_mae = mean_absolute_error(y_test, y_pred_lstm)
lstm_mse = mean_squared_error(y_test, y_pred_lstm)
lstm_r2 = r2_score(y_test, y_pred_lstm)

print("LSTM Model:")
print("Mean Absolute Error:", lstm_mae)
print("Mean Squared Error:", lstm_mse)
print("R-squared Score:", lstm_r2)

LSTM Model:
Mean Absolute Error: 0.6907363756073053
Mean Squared Error: 0.6299393398449882
R-squared Score: -0.4836220194386138


In [None]:
@app.get("/predict")
def predict_emissions(country: str, baseline_year: int, target_year: int):
    country_data = df[df['Country'] == country].groupby('Year')['CO2_Emissions'].sum().reset_index()
    country_data = country_data.sort_values(by="Year")

    if len(country_data) < seq_length:
        return {"error": "Not enough data for prediction"}

    X, y = create_sequences(country_data['CO2_Emissions'].values, seq_length)
    X = X.reshape((X.shape[0], X.shape[1], 1))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

    lstm_model.fit(X_train, y_train, epochs=500, batch_size=32, verbose=0, callbacks=[reduce_lr, early_stopping])

    future_predictions = []
    input_sequence = country_data['CO2_Emissions'].values[-seq_length:].reshape(1, seq_length, 1)
    for _ in range(target_year - baseline_year + 1):
        pred = lstm_model.predict(input_sequence, verbose=0)[0, 0]
        future_predictions.append(pred)
        input_sequence = np.roll(input_sequence, -1)
        input_sequence[0, -1, 0] = pred

    future_years_df = pd.DataFrame({'Year': np.arange(baseline_year, target_year + 1), 'Predicted_CO2_Emissions': scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).ravel()})

    if baseline_year in future_years_df['Year'].values:
        baseline_emissions = future_years_df.loc[future_years_df['Year'] == baseline_year, 'Predicted_CO2_Emissions'].values[0]
    else:
        return {"error": "Baseline year out of range"}

    future_years_df['Carbon_Credits'] = (baseline_emissions - future_years_df['Predicted_CO2_Emissions']) / 1e6
    future_years_df['Carbon_Credits'] = future_years_df['Carbon_Credits'].apply(lambda x: max(0, x))

    # Plot emissions prediction
    plt.figure(figsize=(10, 5))
    sns.lineplot(data=country_data, x='Year', y=scaler.inverse_transform(country_data[['CO2_Emissions']]).ravel(), label='Actual Emissions')
    sns.lineplot(data=future_years_df, x='Year', y='Predicted_CO2_Emissions', label='Predicted Emissions', linestyle='dashed')
    plt.xlabel('Year')
    plt.ylabel('CO2 Emissions')
    plt.title(f'CO2 Emissions Prediction for {country}')
    plt.legend()
    plt.show()

    return future_years_df.to_dict(orient='records')

In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("2tIWeD2wRHNsyRcAdmMUcFn2TKP_6z1eHa5c4gRMdDCdtXJZW") # Replace YOUR_AUTHTOKEN with your actual token

if __name__ == "__main__":
    nest_asyncio.apply()
    public_url = ngrok.connect(8000)
    print("Public URL:", public_url)
    uvicorn.run(app, host="0.0.0.0", port=8000)

Public URL: NgrokTunnel: "https://51ba-34-106-217-217.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [272]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [272]
