# Get Past 1 Month Prediction

**This notebook fetches data of past 1 month, gathers model predictions, and saves to predictions.json as `history`**


In [1]:
import requests
import pandas as pd
import json
import numpy as np

# Get Weather Data


Geo coords for Dhaka, Chittagong, and Patuakhali


In [2]:
# tuple as (lat, lon)
dhk_coords = (23.7104, 90.40744)
cht_coords = (22.3384, 91.83168)
pat_coords = (22.36833, 90.3458)

In [3]:
from datetime import datetime, timedelta

start_date = datetime.today() - timedelta(days=31)  # 1 month ago date
start_date = start_date.strftime("%Y-%m-%d")
end_date = datetime.today() - timedelta(days=1)  # yesterday date
end_date = end_date.strftime("%Y-%m-%d")

In [4]:
start_date

'2025-07-29'

In [5]:
def get_city_weather(coords, start_date, end_date):
    weather_api_url = f"https://historical-forecast-api.open-meteo.com/v1/forecast?latitude={coords[0]}&longitude={coords[1]}&start_date={start_date}&end_date={end_date}&daily=relative_humidity_2m_mean,temperature_2m_max,temperature_2m_min,temperature_2m_mean,rain_sum,sunshine_duration"
    # weather_api_url = f"https://historical-forecast-api.open-meteo.com/v1/forecast?latitude={coords[0]}&longitude={coords[1]}&start_date={start_date}&end_date={end_date}%hourly=temperature_2m"
    response = requests.get(weather_api_url)

    try:
        response.raise_for_status()
        data = response.json()

        return data
    except requests.exceptions.RequestException as e:
        print(e)

In [6]:
dhaka_data = get_city_weather(dhk_coords, start_date, end_date)
# dhaka_data

In [7]:
city_weather_df = pd.DataFrame(
    columns=[
        "Date",  # Remove Date before passed to model
        "Rainfall",
        "Sunshine",
        "Humidity",
        "Temp_mean",
        "Temp_max",
        "Temp_min",
        "Year",
        "Month",
        # "loadshed_prev",
        # "generation_prev",
    ]
)
city_weather_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month


In [8]:
daily_data = dhaka_data["daily"]

city_weather_df["Date"] = daily_data["time"]
city_weather_df["Date"] = pd.to_datetime(city_weather_df["Date"], format="%Y-%m-%d")
city_weather_df["Year"] = city_weather_df["Date"].dt.year
city_weather_df["Month"] = city_weather_df["Date"].dt.month

city_weather_df["Rainfall"] = daily_data["rain_sum"]
city_weather_df["Sunshine"] = daily_data["sunshine_duration"]
city_weather_df["Sunshine"] = city_weather_df["Sunshine"] / (60 * 60)
city_weather_df["Humidity"] = daily_data["relative_humidity_2m_mean"]

city_weather_df["Temp_mean"] = daily_data["temperature_2m_mean"]
city_weather_df["Temp_max"] = daily_data["temperature_2m_max"]
city_weather_df["Temp_min"] = daily_data["temperature_2m_min"]

city_weather_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month
0,2025-07-29,0.0,4.805611,90,27.6,30.7,26.1,2025,7
1,2025-07-30,0.1,5.285469,89,27.6,30.5,26.0,2025,7
2,2025-07-31,0.7,0.244497,93,27.1,29.4,26.0,2025,7
3,2025-08-01,0.0,4.141239,90,28.0,31.6,26.3,2025,8
4,2025-08-02,0.0,4.494117,89,28.4,31.9,26.7,2025,8
5,2025-08-03,0.0,5.759817,87,28.9,32.9,26.8,2025,8
6,2025-08-04,0.9,3.831717,92,28.0,29.8,26.3,2025,8
7,2025-08-05,0.0,1.178514,93,27.2,30.3,25.7,2025,8
8,2025-08-06,0.0,5.884031,88,27.8,30.9,25.9,2025,8
9,2025-08-07,0.0,2.812619,89,27.7,30.2,25.8,2025,8


In [9]:
def get_city_weather_df(daily_data):
    city_df = pd.DataFrame(
        columns=[
            "Date",  # Remove Date before passed to model
            "Rainfall",
            "Sunshine",
            "Humidity",
            "Temp_mean",
            "Temp_max",
            "Temp_min",
            "Year",
            "Month",
            # "loadshed_prev",
            # "generation_prev",
        ]
    )

    city_df["Date"] = daily_data["time"]
    city_df["Date"] = pd.to_datetime(city_df["Date"], format="%Y-%m-%d")
    city_df["Year"] = city_df["Date"].dt.year
    city_df["Month"] = city_df["Date"].dt.month

    city_df["Rainfall"] = daily_data["rain_sum"]
    city_df["Sunshine"] = daily_data["sunshine_duration"]
    # Convert seconds to hours
    city_df["Sunshine"] = city_df["Sunshine"] / (60 * 60)
    city_df["Humidity"] = daily_data["relative_humidity_2m_mean"]

    city_df["Temp_mean"] = daily_data["temperature_2m_mean"]
    city_df["Temp_max"] = daily_data["temperature_2m_max"]
    city_df["Temp_min"] = daily_data["temperature_2m_min"]

    return city_df

## Get weather data of all locations


In [10]:
dhk_data = get_city_weather(dhk_coords, start_date, end_date)
dhk_weather_df = get_city_weather_df(dhk_data["daily"])
dhk_weather_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month
0,2025-07-29,0.0,4.805611,90,27.6,30.7,26.1,2025,7
1,2025-07-30,0.1,5.285469,89,27.6,30.5,26.0,2025,7
2,2025-07-31,0.7,0.244497,93,27.1,29.4,26.0,2025,7
3,2025-08-01,0.0,4.141239,90,28.0,31.6,26.3,2025,8
4,2025-08-02,0.0,4.494117,89,28.4,31.9,26.7,2025,8
5,2025-08-03,0.0,5.759817,87,28.9,32.9,26.8,2025,8
6,2025-08-04,0.9,3.831717,92,28.0,29.8,26.3,2025,8
7,2025-08-05,0.0,1.178514,93,27.2,30.3,25.7,2025,8
8,2025-08-06,0.0,5.884031,88,27.8,30.9,25.9,2025,8
9,2025-08-07,0.0,2.812619,89,27.7,30.2,25.8,2025,8


In [11]:
cht_data = get_city_weather(cht_coords, start_date, end_date)
cht_weather_df = get_city_weather_df(cht_data["daily"])
cht_weather_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month
0,2025-07-29,0.0,0.199089,90,27.3,27.8,26.7,2025,7
1,2025-07-30,2.4,0.0,89,27.3,28.0,26.5,2025,7
2,2025-07-31,0.3,0.115594,91,27.3,28.3,26.5,2025,7
3,2025-08-01,0.0,5.133656,91,27.6,28.2,26.6,2025,8
4,2025-08-02,0.0,0.943383,91,27.8,28.4,27.3,2025,8
5,2025-08-03,0.0,9.1762,89,27.9,28.6,26.9,2025,8
6,2025-08-04,0.0,8.194686,89,27.9,28.5,26.8,2025,8
7,2025-08-05,0.1,7.145914,87,28.2,29.1,27.0,2025,8
8,2025-08-06,0.6,6.204339,89,27.6,28.4,26.8,2025,8
9,2025-08-07,0.0,1.117558,89,27.5,28.0,26.8,2025,8


In [12]:
pat_data = get_city_weather(pat_coords, start_date, end_date)
pat_weather_df = get_city_weather_df(pat_data["daily"])
pat_weather_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month
0,2025-07-29,0.7,0.0,95,27.0,28.1,26.1,2025,7
1,2025-07-30,12.0,0.0,94,27.0,29.1,26.1,2025,7
2,2025-07-31,1.9,0.0,93,27.3,28.2,26.6,2025,7
3,2025-08-01,0.0,4.315425,91,28.6,31.5,26.9,2025,8
4,2025-08-02,0.0,6.525975,88,29.1,32.5,27.1,2025,8
5,2025-08-03,0.0,6.399994,86,29.2,31.7,27.5,2025,8
6,2025-08-04,0.0,10.695436,87,29.2,31.9,27.3,2025,8
7,2025-08-05,0.0,9.589525,85,29.4,32.6,27.0,2025,8
8,2025-08-06,0.6,2.230311,89,28.1,30.1,26.8,2025,8
9,2025-08-07,0.0,2.007642,90,27.7,30.4,26.1,2025,8


In [13]:
all_weather_df = pd.concat([dhk_weather_df, cht_weather_df, pat_weather_df])

all_weather_df = (
    all_weather_df.groupby("Date")
    .agg(
        {
            "Rainfall": "mean",
            "Sunshine": "mean",
            "Humidity": "mean",
            "Temp_mean": "mean",
            "Temp_max": "max",
            "Temp_min": "min",
            "Year": "first",
            "Month": "first",
            # "loadshed_prev": "first",
            # "generation_prev": "first",
        }
    )
    .reset_index()
)

all_weather_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month
0,2025-07-29,0.233333,1.668233,91.666667,27.3,30.7,26.1,2025,7
1,2025-07-30,4.833333,1.761823,90.666667,27.3,30.5,26.0,2025,7
2,2025-07-31,0.966667,0.120031,92.333333,27.233333,29.4,26.0,2025,7
3,2025-08-01,0.0,4.530106,90.666667,28.066667,31.6,26.3,2025,8
4,2025-08-02,0.0,3.987825,89.333333,28.433333,32.5,26.7,2025,8
5,2025-08-03,0.0,7.112004,87.333333,28.666667,32.9,26.8,2025,8
6,2025-08-04,0.3,7.573946,89.333333,28.366667,31.9,26.3,2025,8
7,2025-08-05,0.033333,5.971318,88.333333,28.266667,32.6,25.7,2025,8
8,2025-08-06,0.4,4.772894,88.666667,27.833333,30.9,25.9,2025,8
9,2025-08-07,0.0,1.979273,89.333333,27.633333,30.4,25.8,2025,8


# Get Last Month's Electricity Data


In [14]:
# from datetime import datetime, timedelta

# yesterday_date = datetime.today() - timedelta(days=1)

# yesterday_date = yesterday_date.strftime("%d-%m-%Y")
# yesterday_date

In [15]:
import io

all_dfs = []

start_page = 1
end_page = 16

for page in range(start_page, end_page + 1):
    print(f"Fetching page {page}")

    try:
        url = f"https://erp.pgcb.gov.bd/w/generations/view_generations?page={page}"
        res = requests.get(url, verify=False)
        res.raise_for_status()
        html = res.text

        tables = pd.read_html(io.StringIO(html))

        page_df = tables[0]

        all_dfs.append(page_df)
    except requests.exceptions.RequestException as e:
        print(e)

Fetching page 1




Fetching page 2




Fetching page 3
Fetching page 4




Fetching page 5
Fetching page 6




Fetching page 7
Fetching page 8




Fetching page 9
Fetching page 10




Fetching page 11
Fetching page 12




Fetching page 13
Fetching page 14




Fetching page 15
Fetching page 16




In [16]:
# Need one extra day so that final day's shifted values are not Nan
power_start_date = datetime.strftime(
    datetime.strptime(start_date, "%Y-%m-%d") - timedelta(days=1), "%Y-%m-%d"
)

In [26]:
power_df = pd.concat(all_dfs, ignore_index=True)
power_df.columns = power_df.columns.droplevel(0)

power_df = power_df[["Date", "Generation(MW)", "Demand(MW)", "Loadshed"]].copy()

power_df.rename(
    columns={
        "Generation(MW)": "Generation",
        "Demand(MW)": "Demand",
    },
    inplace=True,
)

# Convert MW to GW
power_df["Generation"] = power_df["Generation"] / 1000
power_df["Loadshed"] = power_df["Loadshed"] / 1000
power_df["Demand"] = power_df["Demand"] / 1000

power_df = power_df.groupby("Date").sum().reset_index()

In [27]:
power_df["Date"] = pd.to_datetime(power_df["Date"], format="%d-%m-%Y")
power_df["Date"] = power_df["Date"].dt.strftime("%Y-%m-%d")
power_df["Date"] = pd.to_datetime(power_df["Date"], format="%Y-%m-%d")

power_df = power_df[
    (power_df["Date"] >= power_start_date) & (power_df["Date"] <= end_date)
]
power_df = power_df.sort_values(by="Date", ascending=True)

power_df

Unnamed: 0,Date,Generation,Demand,Loadshed
28,2025-07-28,340.058,340.557,0.477
30,2025-07-29,337.235,338.068,0.795
32,2025-07-30,343.849,344.65,0.765
33,2025-07-31,333.455,333.943,0.466
0,2025-08-01,306.883,306.939,0.054
1,2025-08-02,333.336,333.424,0.084
2,2025-08-03,342.033,342.17,0.129
3,2025-08-04,333.394,333.394,0.0
4,2025-08-05,294.485,295.198,0.68
5,2025-08-06,326.322,327.296,0.93


In [29]:
power_df["generation_prev"] = power_df["Generation"].shift(1)
power_df["loadshed_prev"] = power_df["Loadshed"].shift(1)

power_df = power_df.iloc[1:].reset_index(drop=True)

# power_df = power_df[["Date", "generation_prev", "loadshed_prev"]]
power_df

Unnamed: 0,Date,Generation,Demand,Loadshed,generation_prev,loadshed_prev
0,2025-07-30,343.849,344.65,0.765,337.235,0.795
1,2025-07-31,333.455,333.943,0.466,343.849,0.765
2,2025-08-01,306.883,306.939,0.054,333.455,0.466
3,2025-08-02,333.336,333.424,0.084,306.883,0.054
4,2025-08-03,342.033,342.17,0.129,333.336,0.084
5,2025-08-04,333.394,333.394,0.0,342.033,0.129
6,2025-08-05,294.485,295.198,0.68,333.394,0.0
7,2025-08-06,326.322,327.296,0.93,294.485,0.68
8,2025-08-07,337.36,337.867,0.485,326.322,0.93
9,2025-08-08,296.527,296.527,0.0,337.36,0.485


In [30]:
input_df = all_weather_df.copy()

input_df = pd.merge(input_df, power_df, on="Date")
input_df

Unnamed: 0,Date,Rainfall,Sunshine,Humidity,Temp_mean,Temp_max,Temp_min,Year,Month,Generation,Demand,Loadshed,generation_prev,loadshed_prev
0,2025-07-30,4.833333,1.761823,90.666667,27.3,30.5,26.0,2025,7,343.849,344.65,0.765,337.235,0.795
1,2025-07-31,0.966667,0.120031,92.333333,27.233333,29.4,26.0,2025,7,333.455,333.943,0.466,343.849,0.765
2,2025-08-01,0.0,4.530106,90.666667,28.066667,31.6,26.3,2025,8,306.883,306.939,0.054,333.455,0.466
3,2025-08-02,0.0,3.987825,89.333333,28.433333,32.5,26.7,2025,8,333.336,333.424,0.084,306.883,0.054
4,2025-08-03,0.0,7.112004,87.333333,28.666667,32.9,26.8,2025,8,342.033,342.17,0.129,333.336,0.084
5,2025-08-04,0.3,7.573946,89.333333,28.366667,31.9,26.3,2025,8,333.394,333.394,0.0,342.033,0.129
6,2025-08-05,0.033333,5.971318,88.333333,28.266667,32.6,25.7,2025,8,294.485,295.198,0.68,333.394,0.0
7,2025-08-06,0.4,4.772894,88.666667,27.833333,30.9,25.9,2025,8,326.322,327.296,0.93,294.485,0.68
8,2025-08-07,0.0,1.979273,89.333333,27.633333,30.4,25.8,2025,8,337.36,337.867,0.485,326.322,0.93
9,2025-08-08,0.0,1.446719,90.333333,26.833333,28.5,25.3,2025,8,296.527,296.527,0.0,337.36,0.485


# Load Model


In [31]:
import lightgbm as lgb

generation_model = lgb.Booster(model_file="models/generation_lgbm_model.txt")
loadshed_model = lgb.Booster(model_file="models/loadshed_lgbm_model.txt")

In [None]:
history = []

for idx in range(len(input_df)):
    row = input_df.iloc[idx].to_frame().T.infer_objects()

    generation_pred = generation_model.predict(
        row.drop(columns=["Date", "Generation", "Demand", "Loadshed"])
    )[0]
    generation_previous = generation_pred

    loadshed_pred = loadshed_model.predict(
        row.drop(columns=["Date", "Generation", "Demand", "Loadshed"])
    )[0]
    loadshed_previous = loadshed_pred

    daily = {
        "date": row["Date"].astype(str).values[0],
        "prediction": {
            "generation": generation_pred,
            "loadshed": loadshed_pred,
        },
        "label": {
            "generation": row["Generation"].values[0],
            "loadshed": row["Loadshed"].values[0],
        },
    }
    history.append(daily)

In [37]:
history

[{'date': '2025-07-30',
  'prediction': {'generation': 181.33953357660496,
   'loadshed': 1.1985977559148289},
  'label': {'generation': 343.849, 'loadshed': 0.765}},
 {'date': '2025-07-31',
  'prediction': {'generation': 182.03576950013633,
   'loadshed': 0.7035800571368294},
  'label': {'generation': 333.455, 'loadshed': 0.46599999999999997}},
 {'date': '2025-08-01',
  'prediction': {'generation': 181.94742097634003,
   'loadshed': 1.2822423997400394},
  'label': {'generation': 306.883, 'loadshed': 0.054000000000000006}},
 {'date': '2025-08-02',
  'prediction': {'generation': 183.05844272529734,
   'loadshed': 4.9359197879883645},
  'label': {'generation': 333.336, 'loadshed': 0.08399999999999999}},
 {'date': '2025-08-03',
  'prediction': {'generation': 185.958510173318,
   'loadshed': 4.930823123351596},
  'label': {'generation': 342.033, 'loadshed': 0.129}},
 {'date': '2025-08-04',
  'prediction': {'generation': 185.58985460544213,
   'loadshed': 1.1503643340536647},
  'label': {'g

## Write to prediction.json file


In [38]:
from pathlib import Path
import json

pred_file = Path("predictions.json")

if pred_file.exists():
    with open(pred_file, "r") as f:
        preds = json.load(f)
else:
    preds = {"forecast": [], "history": []}

preds["history"] = history

try:
    with open(pred_file, "w") as f:
        json.dump(preds, f, indent=2)

    print("Successfully saved predictions")
except Exception as e:
    print(f"Error occurred: {e}")

Successfully saved predictions
