# Energy Production forecast webapp

Done using the Our World in Data Energy dataset : https://github.com/owid/energy-data?tab=readme-ov-file


In [10]:
pip install gradio



In [11]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import gradio as gr
import plotly.graph_objects as go

In [12]:
from google.colab import drive
drive.mount('/content/drive')

!ls /content/drive/MyDrive/'Colab Notebooks'/'Time series'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
'Draft - Energy production & consumption forecast.ipynb'   owid-energy-data.csv
'Energy forecast webapp.ipynb'				   owid-energy-data.json
'Energy production & consumption forecast.ipynb'


In [13]:
import json

path = "/content/drive/MyDrive/Colab Notebooks/Time series/"
file = "owid-energy-data.json"

def get_json(path, file):
  with open(path + file, 'r', encoding='utf-8') as f:
    data = json.load(f)
    return data

def get_country_list(df):
    regions = df['region'].unique()
    list_regions = []
    for country in regions:
        subdf = df[df['region'] == country]
        subdf = subdf[['coal_production', 'oil_production', 'gas_production']]
        isnull_ratio = subdf.isnull().sum().mean() / len(subdf.columns)
        if isnull_ratio <= 0.2:
            list_regions.append(country)
    return sorted(list_regions)

def json_to_df(data):
  data = get_json(path, file)
  all_data = []
  for region, region_data in data.items():
    for entry in region_data["data"]:
        entry["region"] = region
        all_data.append(entry)
  df = pd.DataFrame(all_data)
  columns = ['region'] + [col for col in df.columns if col != 'region']
  df = df[columns]
  return df

def format_df(df, country, column):
  df = df[df['region'] == country]
  #dfProduction = df[['region', 'year', 'coal_production', 'oil_production', 'gas_production']]
  dfPredict = df[['year', f'{column}_production']].fillna(method='bfill')
  dfPredict = dfPredict.set_index('year')
  return dfPredict

In [14]:
def scale_split_data(dfPredict):
  scaler = MinMaxScaler()
  scaled_data = scaler.fit_transform(dfPredict.values)

  train_size = int(len(scaled_data) * 0.8)
  train_data, test_data = scaled_data[0:train_size], scaled_data[train_size:]
  return scaler, scaled_data, train_data, test_data

def create_sequences(data, seq_length):
    sequences, targets = [], []
    for i in range(len(data) - seq_length):
        sequence = data[i:i+seq_length, 0]
        target = data[i+seq_length, 0]
        sequences.append(sequence)
        targets.append(target)
    return np.array(sequences), np.array(targets)

def train_model(train, test, seq_length):
  X_train, y_train = create_sequences(train, seq_length)
  X_test, y_test = create_sequences(test, seq_length)

  X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
  X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

  model = Sequential([
  LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1)),Dense(1)])

  model.compile(optimizer='adam', loss='mse')
  model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))
  return model


def predict(model, scaler, scaled_data, seq_length, future_steps):
  predicted_sequence = []
  full_sequence = scaled_data.flatten()

  input_sequence = np.array(full_sequence[-seq_length:])

  for _ in range(future_steps):
    input_sequence_reshaped = input_sequence.reshape(1, seq_length, 1)
    predicted_value = model.predict(input_sequence_reshaped)[0, 0]

    predicted_sequence.append(predicted_value)
    input_sequence = np.append(input_sequence[1:], [predicted_value])

  predicted_sequence = np.array(predicted_sequence).reshape(-1, 1)
  predicted_sequence = scaler.inverse_transform(predicted_sequence)
  return predicted_sequence.flatten()

def plot_predictions(df_predict, df_future, column):
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_predict.index,
        y=df_predict[f"{column}_production"],
        mode='lines',
        name="Historical Data",
        line=dict(color="blue")
    ))

    fig.add_trace(go.Scatter(
        x=df_future.index,
        y=df_future['predictions'],
        mode='lines+markers',
        name="Future Predictions",
        line=dict(color="red", dash="dashdot")
    ))

    fig.update_layout(
        title="Energy Production Forecast (TWh)",
        xaxis_title="Year",
        yaxis_title="Production (TWh)",
        legend=dict(x=0, y=1),
        template="plotly_white"
    )

    return fig

In [15]:
def forecast(country, column, seq_length, future_steps, df):
    df_predict = format_df(df,country, column)
    scaler, scaled_data, train_data, test_data = scale_split_data(df_predict)
    model = train_model(train_data, test_data, seq_length)
    predictions = predict(model, scaler, scaled_data, seq_length, future_steps)

    next_years = [df_predict.index[-1] + i for i in range(1, future_steps + 1)]
    df_future = pd.DataFrame(data=predictions, index=next_years, columns=['predictions'])

    fig = plot_predictions(df_predict, df_future, column)

    return predictions.tolist(), fig

In [18]:
path = "/content/drive/MyDrive/Colab Notebooks/Time series/"
file = "owid-energy-data.json"
data = get_json(path, file)
df = json_to_df(data)
country_list = get_country_list(df)


with gr.Blocks() as demo:
    gr.Markdown("## 📈 LSTM Time Series Forecaster")

    gr.Markdown("""
    This webapp uses a LSTM-based Time Series Forecaster to predict the future values for the production of different energy sources.
    This tool allows you to predict future energy production based on historical data using a **Long Short-Term Memory (LSTM) neural network**.

    ### 🔹 How to Use:
    1. **Select a Country** from the dropdown list.
    2. **Choose an Energy Type** (Oil, Gas, or Coal).
    3. **Set the Sequence Length** (how many past years to consider for prediction).
    4. **Select Future Steps** (how many years ahead to predict).
    5. Click **"Generate Forecast"** to view predictions and a graph of historical + forecasted values.

    📊 The output includes:
    - A **list of predicted values** for the selected future years.
    - A **graph** displaying both historical data and future predictions.
    """)

    with gr.Column():
        country = gr.Dropdown(country_list, label="Country")
        column = gr.Dropdown(["oil", "gas", "coal"], label="Type of energy")
        seq_length = gr.Slider(1, 5, step=1, value=3, label="Sequence Length")
        future_steps = gr.Slider(1, 10, step=1, value=5, label="Future Steps")
        submit_button = gr.Button("Generate Forecast")

    with gr.Column():
        predicted_values = gr.Textbox(label="Predicted Values")
        graph_output = gr.Plot(label="Historical & Forecasted Data")

    submit_button.click(fn=lambda country, column, seq_length, future_steps: forecast(country, column, seq_length, future_steps, df),
                        inputs=[country, column, seq_length, future_steps],
                        outputs=[predicted_values, graph_output])

demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2ac04bb3e9cfa42de5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


