# Srilanka Dengue Outbreak Forecasting using Darts

## Environment Setup

In [1]:
!pip install darts
!pip install dask[dataframe]
!pip install statsmodels



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. Imports

In [3]:
import os
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from darts import TimeSeries

%matplotlib inline

## 2. Load Dataset

In [4]:
df = pd.read_csv("/content/drive/MyDrive/Datasets/Sri_Lanka_Dengue_Cases_and_Weather_Data_updated.csv")

In [5]:
df.head()

Unnamed: 0,District,Number_of_Cases,Week_Start_Date,Year,Month,Week,Week_End_Date,Avg Max Temp (°C),Avg Min Temp (°C),Avg Apparent Max Temp (°C),Avg Apparent Min Temp (°C),Total Precipitation (mm),Avg Wind Speed (km/h),Max Wind Gusts (km/h),Avg Daylight Duration (hours)
0,Ampara,0,2007-01-01,2007,1,1,2007-01-08,26.9375,21.8625,28.85,24.1,8.0,18.2,44.3,11.714583
1,Ampara,0,2007-01-08,2007,1,2,2007-01-15,26.35,23.325,30.2625,26.7,86.1,17.025,43.6,11.733333
2,Ampara,0,2007-01-15,2007,1,3,2007-01-22,27.2125,23.1625,29.575,25.9625,3.1,20.1875,48.2,11.754167
3,Ampara,0,2007-01-22,2007,1,4,2007-01-29,26.725,23.0375,29.95,26.1125,53.6,17.2,42.5,11.783333
4,Ampara,0,2007-01-29,2007,1,5,2007-02-05,27.3625,22.9625,30.75,26.125,8.2,17.2625,40.0,11.8125


In [6]:
len(df)

23250

In [7]:
districts = df['District'].unique()
districts

array(['Ampara', 'Anuradhapura', 'Badulla', 'Batticaloa', 'Colombo',
       'Galle', 'Gampaha', 'Hambantota', 'Jaffna', 'Kalutara', 'Kandy',
       'Kegalle', 'Kilinochchi', 'Kurunegala', 'Mannar', 'Matale',
       'Matara', 'Monaragala', 'Mullaitivu', 'NuwaraEliya', 'Polonnaruwa',
       'Puttalam', 'Ratnapura', 'Trincomalee', 'Vavuniya'], dtype=object)

## 3. Small Visualization

In [8]:
# Check Duplicate values
duplicates = df[['District', 'Week_Start_Date', 'Week_End_Date']].duplicated().any()
if duplicates:
    print("There are duplicate values.")
else:
    print("No duplicates found.")

No duplicates found.


## 4. Minor pre-processing
1. Only taking single timestamp for each row. Either `Week_Start_Date` or `Week_Start_Date`. We gonna take `Week_End_Date`.
2. Convert the `Week_End_Date` to Python Datetime format.
3. Eliminate un-used features such as `Week_Start_Date`, `Month`, `Year`, and `Week`

In [9]:
df['Week_End_Date'] = pd.to_datetime(df['Week_End_Date'])

In [10]:
df.head()

Unnamed: 0,District,Number_of_Cases,Week_Start_Date,Year,Month,Week,Week_End_Date,Avg Max Temp (°C),Avg Min Temp (°C),Avg Apparent Max Temp (°C),Avg Apparent Min Temp (°C),Total Precipitation (mm),Avg Wind Speed (km/h),Max Wind Gusts (km/h),Avg Daylight Duration (hours)
0,Ampara,0,2007-01-01,2007,1,1,2007-01-08,26.9375,21.8625,28.85,24.1,8.0,18.2,44.3,11.714583
1,Ampara,0,2007-01-08,2007,1,2,2007-01-15,26.35,23.325,30.2625,26.7,86.1,17.025,43.6,11.733333
2,Ampara,0,2007-01-15,2007,1,3,2007-01-22,27.2125,23.1625,29.575,25.9625,3.1,20.1875,48.2,11.754167
3,Ampara,0,2007-01-22,2007,1,4,2007-01-29,26.725,23.0375,29.95,26.1125,53.6,17.2,42.5,11.783333
4,Ampara,0,2007-01-29,2007,1,5,2007-02-05,27.3625,22.9625,30.75,26.125,8.2,17.2625,40.0,11.8125


** The trend of cases of all 25 districts

In [11]:
import plotly.express as px

# List of all districts
districts = df['District'].unique()

for district in districts:
    # Filter data for the current district
    df_district = df[df['District'] == district]

    # Create an interactive line plot using Plotly without markers
    fig = px.line(
        df_district,
        x="Week_End_Date",
        y="Number_of_Cases",
        title=f"Number of Cases Over Time - {district}",
        labels={'Week_End_Date': 'Week End Date', 'Number_of_Cases': 'Number of Cases'}
    )

    # Customize the layout for a bigger and clearer plot
    fig.update_layout(
        width=1000,  # Width of the figure
        height=600,  # Height of the figure
        title_font_size=18,
        xaxis_title_font_size=14,
        yaxis_title_font_size=14,
        hovermode="x"  # Show hover data for the x-axis
    )

    # Show the interactive plot with zoom and pan features
    fig.show()


## 5. Modelling
List of regression models.
- RegressionModel
- LinearRegressionModel
- RandomForest
- LightGBMModel
- XGBModel
- CatBoostModel



In [12]:
from darts.models import RegressionModel, RandomForest, LinearRegressionModel, XGBModel, LightGBMModel, CatBoostModel
from sklearn.linear_model import Ridge
from darts.metrics import rmse, mae, smape, rmsle
import plotly.graph_objects as go

In [13]:
!pip install "u8darts[all]"



In [24]:
reg = RegressionModel(
    lags=[-1, -2, -3, -13, -26, -52],
    model = Ridge(),
    output_chunk_length=12,  # Number of time steps predicted at once (per chunk) by the internal model.
)
lr  = LinearRegressionModel(
    lags=[-1, -2, -3, -13, -26, -52],
    output_chunk_length=12, # Number of time steps predicted at once (per chunk) by the internal model.
    random_state=42
)

rf = RandomForest(
    lags=[-1, -2, -3, -13, -26, -52, -104],
    output_chunk_length=12, # Number of time steps predicted at once (per chunk) by the internal model.
    random_state=42
)

lgbm = LightGBMModel(
    lags=[-1, -2, -3, -13, -26, -52, -104],
    output_chunk_length=12, # Number of time steps predicted at once (per chunk) by the internal model.
    random_state=42
)
xgb = XGBModel(
    lags=[-1, -2, -3, -13, -26, -52, -104],
    output_chunk_length=12, # Number of time steps predicted at once (per chunk) by the internal model.
    random_state=42
)
cat = CatBoostModel(
    lags=[-1, -2, -3, -13, -26, -52, -104],
    output_chunk_length=12, # Number of time steps predicted at once (per chunk) by the internal model.
    random_state=42
)

In [25]:
models = [reg, lr, rf, lgbm, xgb, cat]

In [26]:
def train_and_forecast(train_series, val_series, model, district_name):
    model.fit(train_series)

    # Forecast for the next 12 weeks
    forecast = model.predict(n=len(val_series))

    # Calculate metrics
    val_values = val_series.values()
    forecast_values = forecast.values()

    # Ensure the forecast and validation values are the same length
    assert len(val_values) == len(forecast_values), "Forecast and validation series must have the same length for metrics calculation."

    # Compute metrics
    error_rmse = rmse(val_series, forecast)
    error_mae = mae(val_series, forecast)
    error_smape = smape(val_series, forecast)
    error_rmsle = rmsle(val_series, forecast)

    errors = {
        "RMSE": error_rmse,
        "MAE": error_mae,
        "SMAPE": error_smape,
        "RMSLE": error_rmsle
    }
    results = [forecast, errors]
    return results

In [27]:
def main(data, models):
    districts = data['District'].unique()
    final_output = []
    for district in districts:
      # Filter data for the specified district
      district_data = data[data['District'] == district]

      # Ensure the 'Week_End_Date' column is in datetime format
      district_data['Week_End_Date'] = pd.to_datetime(district_data['Week_End_Date'])

      # Prepare the data for ARIMA
      series = TimeSeries.from_dataframe(district_data, time_col='Week_End_Date', value_cols=['Number_of_Cases'])

      # Split series
      # Set aside the last 12 weeks as a validation series
      train_data, val_data = series[:-12], series[-12:]

      print(f"Training length: {len(train_data)}")
      print(f"Validation length: {len(val_data)}")
      district_output = []
      for m in models:
        result = train_and_forecast(train_data, val_data, m, district)
        district_output.append(result)

      final_output.append(district_output)

    return final_output


In [28]:
results = main(df, models)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 494
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 24.158157
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 494
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 24.176837
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 494
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 345
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 12.996264
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 345
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 13.004981
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 345
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 449
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 18.938979
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 449
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 18.941469
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 449
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 662
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 38.607721
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 662
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 38.630137
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 662
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1784
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 217.386052
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1784
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 217.493151
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1784
[LightGBM] [Info] Number of data points in the train set: 803, number of used f



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 638
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 37.540473
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 638
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 37.574097
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 638
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1584
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 135.127024
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1584
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 135.174346
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1584
[LightGBM] [Info] Number of data points in the train set: 803, number of used f



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 438
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 17.784558
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 438
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 17.779577
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 438
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 712
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 52.343711
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 712
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 52.352428
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 712
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur


invalid value encountered in log


invalid value encountered in log



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 836
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 59.100872
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 836
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 59.130760
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 836
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 944
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 79.201743
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 944
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 79.256538
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 944
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 623
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 40.774595
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 623
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 40.768369
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 623
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000513 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 148
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 2.651308
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 148
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 2.657534
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 148
[LightGBM] [Info] Number of data points in the train set: 803, number of used features



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 48.769614
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 48.799502
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 212
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 4.265255
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 212
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 4.276463
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 212
[LightGBM] [Info] Number of data points in the train set: 803, number of used features


invalid value encountered in log



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 434
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 19.046077
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 434
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 19.026152
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 434
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur


invalid value encountered in log



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000096 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 555
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 28.409714
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 555
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 28.394770
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 555
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000047 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 303
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 10.569116
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 303
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 10.580324
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 303
[LightGBM] [Info] Nu



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 1.953923
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 1.955168
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Numb



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 196
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 5.316314
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 196
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 5.315068
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000036 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 196
[LightGBM] [Info] Numb



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 221
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 7.052304
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 221
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 7.058531
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 221
[LightGBM] [Info] Number of data points in the train set: 803, number of used features



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 587
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 33.678705
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000153 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 587
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 33.687422
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000069 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 587
[LightGBM] [Info] Nu



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 720
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 43.942715
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 720
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 44.057285
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 720
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 425
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 21.382316
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 425
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 21.387298
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 425
[LightGBM] [Info] Number of data points in the train set: 803, number of used featur



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Training length: 918
Validation length: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 252
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 6.640100
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 252
[LightGBM] [Info] Number of data points in the train set: 803, number of used features: 7
[LightGBM] [Info] Start training from score 6.641345
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 252
[LightGBM] [Info] Number of data points in the train set: 803, number of used features

## 6. Result

In [29]:
# Initialize a list to store metrics
metrics_list = []
districts = df['District'].unique()

# Iterate over results and extract metrics
for district_index, district_results in enumerate(results):
    for model_index, (forecast, metrics) in enumerate(district_results):
        metrics_list.append({
            'District': f'{districts[district_index]}',
            'Model': type(models[model_index]).__name__,
            'RMSE': metrics['RMSE'],
            'MAE': metrics['MAE'],
            'SMAPE': metrics['SMAPE'],
            'RMSLE': metrics['RMSLE']
        })

# Create a DataFrame from the metrics list
df_metrics = pd.DataFrame(metrics_list)
df_metrics.set_index(['District', 'Model'], inplace=True)

In [None]:
from IPython.display import display
from IPython.display import HTML

def display_district_metrics(df_metrics):
    districts = df_metrics.index.get_level_values('District').unique()

    for district in districts:
        print(f"Metrics for {district}:")
        district_df = df_metrics.loc[district]
        styled_df = district_df.style \
         .highlight_min(color="lightgreen", axis=0)

        display(styled_df)

# Example usage
display_district_metrics(df_metrics)



Metrics for Ampara:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,11.155262,10.166828,129.742382,1.673854
LinearRegressionModel,11.155248,10.166815,129.742377,1.673853
RandomForest,10.741804,10.373333,135.461435,1.673553
LightGBMModel,12.17343,9.685199,121.442563,1.576265
XGBModel,13.456584,12.035868,135.494692,1.758306
CatBoostModel,15.806149,14.233781,140.432449,1.878595


Metrics for Anuradhapura:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,6.018147,5.723136,76.557051,0.782487
LinearRegressionModel,6.018145,5.723134,76.557035,0.782487
RandomForest,6.098905,5.7875,76.760592,0.785005
LightGBMModel,6.864115,5.579514,71.10992,0.787804
XGBModel,6.335084,5.600231,74.605468,0.774099
CatBoostModel,6.692566,5.760256,73.88218,0.806778


Metrics for Badulla:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,6.874704,5.931438,52.268033,0.609943
LinearRegressionModel,6.874698,5.931434,52.268015,0.609942
RandomForest,7.170343,6.515,55.864869,0.620495
LightGBMModel,6.285598,5.442504,49.540655,0.553622
XGBModel,7.046039,6.012258,53.156885,0.617106
CatBoostModel,4.202087,3.361645,34.324905,0.397317


Metrics for Batticaloa:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,21.068897,17.272434,75.407743,1.062149
LinearRegressionModel,21.068905,17.272441,75.407757,1.06215
RandomForest,20.306487,15.685,71.215602,1.035913
LightGBMModel,20.173823,15.136849,69.178518,0.982208
XGBModel,32.428957,22.935401,76.774283,1.248689
CatBoostModel,30.871951,25.973268,90.112733,1.278938


Metrics for Colombo:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,44.495426,40.028072,17.320017,0.194852
LinearRegressionModel,44.495428,40.028074,17.320018,0.194852
RandomForest,30.596024,25.619167,11.422328,0.13512
LightGBMModel,54.719429,49.484218,24.274492,0.281507
XGBModel,79.74946,68.518133,28.914219,0.3357
CatBoostModel,46.369629,40.040224,16.606445,0.190576


Metrics for Galle:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,13.402245,11.64801,42.744085,0.482429
LinearRegressionModel,13.402262,11.648028,42.744171,0.48243
RandomForest,15.284616,10.933333,28.149315,0.390998
LightGBMModel,16.750664,13.651336,52.136473,0.742588
XGBModel,18.984678,15.713537,42.245152,0.494191
CatBoostModel,11.541911,9.735203,31.551145,0.361956


Metrics for Gampaha:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,34.070832,30.892122,25.375797,0.288422
LinearRegressionModel,34.070826,30.892115,25.375792,0.288422
RandomForest,19.72418,16.119167,14.102224,0.17415
LightGBMModel,78.875311,60.586682,39.669668,0.509937
XGBModel,52.506839,36.060448,28.238674,0.376874
CatBoostModel,35.276634,26.722812,21.361174,0.263167


Metrics for Hambantota:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,3.734744,3.317126,32.416472,0.342278
LinearRegressionModel,3.734743,3.317125,32.416462,0.342278
RandomForest,7.761666,6.916667,55.637511,0.597429
LightGBMModel,4.817713,4.203478,37.902057,0.396789
XGBModel,5.215127,4.295068,39.158781,0.41389
CatBoostModel,4.21735,3.706644,34.598946,0.363959


Metrics for Jaffna:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,31.227895,27.924966,111.302041,1.351492
LinearRegressionModel,31.227891,27.92496,111.302025,1.351492
RandomForest,5.867675,4.9725,45.012661,0.541653
LightGBMModel,11.819747,8.648344,77.989874,0.564921
XGBModel,9.720397,7.654922,79.477097,0.929228
CatBoostModel,15.371426,11.329096,121.941614,1.238842


Metrics for Kalutara:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,10.23276,9.026111,21.762116,0.243781
LinearRegressionModel,10.232764,9.026115,21.762126,0.243781
RandomForest,7.930108,6.841667,16.801483,0.193231
LightGBMModel,14.967694,10.373745,22.669574,0.328891
XGBModel,20.382811,16.070277,36.779619,0.452634
CatBoostModel,8.280853,6.920549,16.781283,0.203776


Metrics for Kandy:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,16.195586,13.880567,19.594931,0.228203
LinearRegressionModel,16.195573,13.880556,19.594918,0.228203
RandomForest,22.256019,20.963333,27.92576,0.302008
LightGBMModel,30.073048,27.053701,34.054269,0.379677
XGBModel,56.12254,44.024324,46.131851,0.593635
CatBoostModel,42.950498,39.585059,45.269015,0.498827


Metrics for Kegalle:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,12.587744,10.984253,42.14985,0.496314
LinearRegressionModel,12.587747,10.984256,42.149858,0.496314
RandomForest,11.713838,10.046667,39.36264,0.469812
LightGBMModel,23.864817,20.400464,62.726489,0.756989
XGBModel,19.701776,18.363166,62.245862,0.66671
CatBoostModel,26.120705,23.204165,67.828444,0.807735


Metrics for Kilinochchi:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,3.214951,2.771449,126.725022,1.222843
LinearRegressionModel,3.215065,2.771539,126.725219,1.222864
RandomForest,2.692158,2.386667,127.441521,1.1074
LightGBMModel,2.07968,1.886678,129.702362,0.899805
XGBModel,2.502756,2.017674,129.130899,1.011386
CatBoostModel,2.440634,1.945375,119.366417,1.026069


Metrics for Kurunegala:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,16.574288,15.256736,59.16655,0.726961
LinearRegressionModel,16.574283,15.256731,59.166537,0.726961
RandomForest,14.731631,12.856667,52.780556,0.680847
LightGBMModel,16.511793,13.317106,51.521659,0.69513
XGBModel,13.46835,10.506868,43.502396,0.630125
CatBoostModel,19.811717,18.432615,66.453422,0.80638


Metrics for Mannar:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,4.745783,3.725278,68.639467,0.690777
LinearRegressionModel,4.745796,3.725284,68.63955,0.690779
RandomForest,4.518147,3.606667,68.747114,0.629908
LightGBMModel,5.648317,4.443881,94.327306,0.967823
XGBModel,5.676298,4.53786,105.793938,1.111274
CatBoostModel,5.694821,4.424326,93.705646,1.274672


Metrics for Matale:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,12.049943,7.290248,34.959698,0.508137
LinearRegressionModel,12.049951,7.290249,34.959694,0.508137
RandomForest,10.914865,7.505833,37.731242,0.473031
LightGBMModel,14.310404,11.420739,66.316763,0.772549
XGBModel,16.371367,10.89991,68.292582,0.814754
CatBoostModel,13.754708,9.443185,52.123978,0.656922


Metrics for Matara:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,9.455898,7.4656,32.007473,0.438637
LinearRegressionModel,9.455899,7.465601,32.007475,0.438637
RandomForest,9.505558,6.891667,29.403621,0.451534
LightGBMModel,17.673794,14.267345,48.395338,0.668326
XGBModel,13.317216,11.425872,43.943162,0.566471
CatBoostModel,13.569231,10.991015,41.602878,0.570654


Metrics for Monaragala:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,16.329703,13.081334,95.983083,1.106963
LinearRegressionModel,16.329749,13.081388,95.98386,1.106971
RandomForest,11.540117,8.483333,50.010217,0.57422
LightGBMModel,10.710429,7.541735,42.946352,0.50789
XGBModel,10.712424,7.159084,43.403861,0.542137
CatBoostModel,11.276619,8.460095,49.89139,0.557951


Metrics for Mullaitivu:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,1.26773,1.15145,102.082539,0.634846
LinearRegressionModel,1.267726,1.151443,102.082784,0.63484
RandomForest,5.584219,4.293028,131.696765,1.30774
LightGBMModel,4.098581,3.308081,128.261051,1.137013
XGBModel,5.661476,4.476077,139.96151,1.290945
CatBoostModel,4.527033,3.466085,126.586513,1.180963


Metrics for NuwaraEliya:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,2.198803,1.729589,51.519792,0.568262
LinearRegressionModel,2.198797,1.729584,51.519799,0.56826
RandomForest,4.084886,3.509167,75.418577,0.872484
LightGBMModel,3.325226,2.772227,64.64345,0.766272
XGBModel,3.548718,2.643753,58.022157,0.799734
CatBoostModel,2.788711,2.363676,60.580864,0.691356


Metrics for Polonnaruwa:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,3.773305,3.430435,60.025465,0.616034
LinearRegressionModel,3.773315,3.430442,60.025521,0.616036
RandomForest,6.682985,5.6275,75.417569,0.869394
LightGBMModel,6.94683,5.787379,75.992923,0.863023
XGBModel,8.1814,6.261157,75.781475,0.875899
CatBoostModel,8.41139,6.938629,82.480247,0.958843


Metrics for Puttalam:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,5.303756,4.731115,28.111983,0.307744
LinearRegressionModel,5.303751,4.731112,28.111965,0.307744
RandomForest,6.166489,4.893333,29.143664,0.343932
LightGBMModel,8.918317,7.570195,41.68275,0.477876
XGBModel,9.018227,7.075513,48.025943,0.611834
CatBoostModel,8.86603,7.326373,42.585179,0.500475


Metrics for Ratnapura:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,13.431205,11.404443,25.437712,0.283472
LinearRegressionModel,13.431217,11.40445,25.437729,0.283472
RandomForest,18.834385,14.576667,29.238905,0.390366
LightGBMModel,20.275871,16.108393,33.178576,0.419012
XGBModel,20.247841,16.144883,31.92342,0.408427
CatBoostModel,26.433626,21.769032,39.160014,0.494028


Metrics for Trincomalee:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,13.006898,12.128539,119.624338,1.441113
LinearRegressionModel,13.006909,12.12855,119.624381,1.441114
RandomForest,16.47627,14.170833,120.756973,1.542185
LightGBMModel,20.017653,17.09182,125.742729,1.678313
XGBModel,17.378768,13.840279,112.863823,1.555937
CatBoostModel,19.252336,16.699407,125.054361,1.679708


Metrics for Vavuniya:


Unnamed: 0_level_0,RMSE,MAE,SMAPE,RMSLE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RegressionModel,4.383647,3.955142,150.36392,1.512663
LinearRegressionModel,4.383639,3.955136,150.363974,1.512662
RandomForest,3.830438,3.269167,148.36029,1.368019
LightGBMModel,3.780097,3.082462,144.6728,1.327971
XGBModel,2.739586,2.153837,145.043979,1.103131
CatBoostModel,3.588796,3.03519,148.434679,1.309723


## 7. Conclusion
To assess each models we need to see the error metrics of each model and then we have to decide which model outperformed.

Note: `We also need to do backtesting to make sure model is performing as intended`