In [1]:
import numpy as np
import pandas as pd
from scipy.special import boxcox, inv_boxcox
from scipy.stats import boxcox_normmax
import matplotlib.pyplot as plt
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import MAE, DistributionLoss
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
df_up = pd.read_csv("../data/5_yr_data/UP5_years.csv")
df_up['datetime'] = pd.to_datetime(df_up['date'])
df_up.drop(columns=["Unnamed: 0"], axis=1, inplace=True)
df_up.sort_values(by="datetime", ascending=True, inplace=True)
df_up.head()

Unnamed: 0,district_name,market_name,commodity,variety,grade,min_rs_quintal,max_rs_quintal,modal_rs_quintal,date,year,month,day_of_month,datetime
714742,Bijnor,Bijnaur,Onion,Red,FAQ,2950.0,3040.0,3000.0,01 Jan 2018,2018,Jan,1,2018-01-01
433767,Mau(Maunathbhanjan),Kopaganj,Wheat,Dara,FAQ,1525.0,1625.0,1575.0,01 Jan 2018,2018,Jan,1,2018-01-01
439485,Gorakhpur,Gorakhpur,Wheat,Dara,FAQ,1560.0,1590.0,1575.0,01 Jan 2018,2018,Jan,1,2018-01-01
83163,Shahjahanpur,Tilhar,Potato,Potato,FAQ,490.0,510.0,500.0,01 Jan 2018,2018,Jan,1,2018-01-01
730321,Bulandshahar,Divai,Onion,Red,FAQ,2800.0,3000.0,2900.0,01 Jan 2018,2018,Jan,1,2018-01-01


In [3]:
TRAIN_LEN = int(0.8 * len(df_up))
up_train, up_test = (df_up[:TRAIN_LEN],df_up[TRAIN_LEN:])
up_train.set_index('datetime', inplace=True)
up_train.sort_index(inplace=True)
up_test.set_index('datetime', inplace=True)
up_test.sort_index(inplace=True)

In [4]:
for item in df_up.commodity.unique():
    print(item)

Onion
Wheat
Potato
Rice


In [5]:
# commodity = "Rice"
# df_train_commodity = up_train[up_train['commodity'] == commodity]
# df_train_commodity_dt = df_train_commodity.groupby("datetime").agg({"modal_rs_quintal":"mean"})
# df_train_commodity_dt

In [6]:
# df_test_commodity = up_test[up_test['commodity'] == commodity]
# df_test_commodity_dt = df_test_commodity.groupby("datetime").agg({"modal_rs_quintal":"mean"})
# df_test_commodity_dt.head()

In [7]:
# df_train_commodity_dt.reset_index(inplace=True)
# df_train_commodity_dt['unique_id'] = commodity
# df_train_commodity_dt.rename(columns={"datetime" : "ds", "modal_rs_quintal" : "y"}, inplace=True)
# df_train_commodity_dt.head()

In [8]:
# df_test_commodity_dt.reset_index(inplace=True)
# df_test_commodity_dt['unique_id'] = commodity
# df_test_commodity_dt.rename(columns={"datetime" : "ds", "modal_rs_quintal" : "y"}, inplace=True)
# df_test_commodity_dt.head()

In [9]:
# df_train_commodity_dt.shape

In [10]:
# df_test_commodity_dt.shape

In [11]:
# Required imports
import pandas as pd
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, DeepAR, TFT, LSTM, RNN, GRU
from neuralforecast.losses.pytorch import DistributionLoss, MAE, MSE, MAPE, SMAPE
import torch

from darts import TimeSeries
from darts.models import (
    NBEATSModel,
    NHiTSModel,
    BlockRNNModel,
    TCNModel,
    TiDEModel,
    TransformerModel,
    RandomForest,
    LightGBMModel,
    XGBModel,
    Prophet,
)


def create_nixtla_models(input_size=120, output_size=368):
    """
    Create a collection of Nixtla models with correct parameters
    """
    # Common parameters
    common_params = {
        "input_size": input_size,
        "h": output_size,
        "max_steps": 100,
        "val_check_steps": 16,
        "early_stop_patience_steps": 4,
    }

    # N-BEATS model
    nbeats = NBEATS(
        **common_params,
        loss=DistributionLoss(distribution="Normal", level=[80, 90]),
        stack_types=["trend", "seasonality"],
        num_blocks=[3, 3],
        num_layers=[4, 4],
        layer_widths=[256, 2048],
        expansion_coefficient_lengths=[5, 7],
        trend_polynomial_degree=2,
    )

    # N-HiTS model
    nhits = NHITS(
        **common_params,
        loss=DistributionLoss(distribution="Normal", level=[80, 90]),
        num_stacks=3,  # Default is 3
        hidden_size=128,  # Units per hidden layer
        n_freq_downsample=[168, 24, 1],  # Pooling factor per stack
        pooling_kernel_sizes=[168, 24, 1],
        interpretation=False,
        activation="ReLU",
    )

    # DeepAR model
    deepar = DeepAR(
        **common_params,
        loss=DistributionLoss(distribution="StudentT", level=[80, 90]),
        hidden_size=128,
        rnn_layers=2,
        dropout=0.1,
        cell_type="LSTM",
    )

    # Temporal Fusion Transformer
    tft = TFT(
        **common_params,
        loss=DistributionLoss(distribution="Normal", level=[80, 90]),
        hidden_size=128,  # Hidden state size
        lstm_hidden_size=64,  # Size of LSTM hidden states
        num_attention_heads=4,  # Number of attention heads
        dropout=0.1,  # Dropout rate
        hidden_continuous_size=64,  # Size for processing continuous variables
    )

    # LSTM model
    lstm = LSTM(
        **common_params,
        loss=MSE(),
        hidden_size=128,
        num_layers=2,
        dropout=0.1,
        batch_normalization=True,
    )

    # RNN model
    rnn = RNN(
        **common_params,
        loss=MAE(),
        hidden_size=128,
        num_layers=2,
        dropout=0.1,
        cell_type="GRU",
    )

    # GRU model
    gru = GRU(**common_params, loss=SMAPE(), hidden_size=128, num_layers=2, dropout=0.1)

    # Create NeuralForecast object with all models
    fcst = NeuralForecast(models=[nbeats, nhits, deepar, tft, lstm, rnn, gru], freq="D")

    return fcst


def create_darts_models(input_chunk_length=120, output_chunk_length=30, n_epochs=100):
    """
    Create a collection of Darts models with correct parameters
    """
    # Common parameters for neural networks
    nn_params = {
        "input_chunk_length": input_chunk_length,
        "output_chunk_length": output_chunk_length,
        "n_epochs": n_epochs,
        "batch_size": 32,
        "force_reset": True,
    }

    models = {
        # # Neural network based models
        # "nbeats": NBEATSModel(
        #     **nn_params,
        #     generic_architecture=False,
        #     num_stacks=2,
        #     num_blocks=3,
        #     num_layers=4,
        #     layer_widths=256,
        #     expansion_coefficient_dim=5,
        #     trend_polynomial_degree=2,
        # ),
        "nhits": NHiTSModel(
            **nn_params,
            num_stacks=3,
            num_blocks=1,
            num_layers=2,
            layer_widths=512,
            pooling_kernel_sizes=None,
            n_freq_downsample=None,
            dropout=0.1,
            activation="ReLU",
            MaxPool1d=True,
        ),
        # "block_rnn": BlockRNNModel(
        #     **nn_params,
        #     model="LSTM",
        #     hidden_dim=128,
        #     n_rnn_layers=2,
        #     dropout=0.1,
        # ),
        # "tcn": TCNModel(
        #     **nn_params,
        #     num_filters=64,
        #     kernel_size=3,
        #     dilation_base=2,
        #     dropout=0.1,
        #     weight_norm=True,
        # ),
        # "tide": TiDEModel(
        #     **nn_params,
        #     num_encoder_layers=2,
        #     num_decoder_layers=2,
        #     temporal_width_past=24,
        #     temporal_width_future=12,
        #     temporal_decoder_hidden=32,
        # ),
        # "transformer": TransformerModel(
        #     **nn_params,
        #     d_model=64,
        #     nhead=4,
        #     num_encoder_layers=3,
        #     num_decoder_layers=3,
        #     dim_feedforward=256,
        #     dropout=0.1,
        #     activation="gelu",
        # ),
        # Traditional ML models
        # "random_forest": RandomForest(
        #     lags=input_chunk_length,
        #     n_estimators=100,
        #     max_depth=None,
        #     min_samples_split=2,
        # ),
        
        # "xgboost": XGBModel(
        #     lags=input_chunk_length, n_estimators=100, max_depth=6, learning_rate=0.1
        # ),
        
    }

    return models


def train_and_forecast(df_train, df_test, use_nixtla=True):
    """
    Train models and generate forecasts using either Nixtla or Darts
    """
    if use_nixtla:
        # Nixtla workflow
        fcst = create_nixtla_models()

        # Ensure df_train has the required columns
        if "unique_id" not in df_train.columns:
            df_train["unique_id"] = "series0"
        if "ds" not in df_train.columns:
            df_train = df_train.rename(columns={"date": "ds"})
        if "y" not in df_train.columns:
            df_train = df_train.rename(columns={"value": "y"})

        # Similarly for test data
        if "unique_id" not in df_test.columns:
            df_test["unique_id"] = "series0"
        if "ds" not in df_test.columns:
            df_test = df_test.rename(columns={"date": "ds"})
        if "y" not in df_test.columns:
            df_test = df_test.rename(columns={"value": "y"})

        fcst.fit(df=df_train, val_size=488)
        forecasts = fcst.predict(futr_df=df_test)
        return forecasts
    else:
        # Darts workflow
        # Convert pandas DataFrame to Darts TimeSeries
        series = TimeSeries.from_dataframe(df_train, "ds", "y")

        # Create and train models
        models = create_darts_models()
        forecasts = {}

        for name, model in models.items():
            print(f"Training {name} model...")
            model.fit(series)
            forecast = model.predict(len(df_test))
            forecasts[name] = forecast

        return forecasts


# Example usage:
"""
# For Nixtla models
nixtla_forecasts = train_and_forecast(df_train_commodity_dt, df_test_commodity_dt, use_nixtla=True)

# For Darts models
darts_forecasts = train_and_forecast(df_train_commodity_dt, df_test_commodity_dt, use_nixtla=False)
"""

'\n# For Nixtla models\nnixtla_forecasts = train_and_forecast(df_train_commodity_dt, df_test_commodity_dt, use_nixtla=True)\n\n# For Darts models\ndarts_forecasts = train_and_forecast(df_train_commodity_dt, df_test_commodity_dt, use_nixtla=False)\n'

In [12]:
# y_train = df_train_commodity_dt['y']


In [13]:
# nixtla_forecasts = train_and_forecast(
#     df_train_commodity_dt, df_test_commodity_dt, use_nixtla=False
# )

In [14]:
# nixtla_forecasts

In [15]:
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
import plotly.express as px

In [16]:
scores = dict()


def get_scores(y_test, y_pred):
    r2_ = r2_score(y_test, y_pred)
    rmse_ = root_mean_squared_error(y_test, y_pred)
    mae_ = mean_absolute_error(y_test, y_pred)
    return {"r2": r2_, "mae": mae_, "rmse": rmse_}


In [19]:
import os

In [20]:
ers = dict()
for commodity in df_up.commodity.unique():
    df_train_commodity = up_train[up_train["commodity"] == commodity]
    df_train_commodity_dt = df_train_commodity.groupby("datetime").agg(
        {"modal_rs_quintal": "mean"}
    )
    print(df_train_commodity_dt)
    df_test_commodity = up_test[up_test["commodity"] == commodity]
    df_test_commodity_dt = df_test_commodity.groupby("datetime").agg(
        {"modal_rs_quintal": "mean"}
    )
    print(df_test_commodity)
    df_train_commodity_dt.reset_index(inplace=True)
    df_train_commodity_dt["unique_id"] = commodity
    df_train_commodity_dt.rename(
        columns={"datetime": "ds", "modal_rs_quintal": "y"}, inplace=True
    )
    print(df_train_commodity_dt)
    df_test_commodity_dt.reset_index(inplace=True)
    df_test_commodity_dt["unique_id"] = commodity
    df_test_commodity_dt.rename(
        columns={"datetime": "ds", "modal_rs_quintal": "y"}, inplace=True
    )
    print(df_test_commodity_dt)
    print(df_train_commodity_dt.shape, df_test_commodity_dt.shape)
    nixtla_forecasts = train_and_forecast(
        df_train_commodity_dt, df_test_commodity_dt, use_nixtla=False
    )
    for model, data in nixtla_forecasts.items():
        nixtla_forecasts[model] = pd.DataFrame(nixtla_forecasts[model].values())[0]
    result = pd.DataFrame(nixtla_forecasts)
    result_y = df_test_commodity_dt["y"]
    results = pd.concat([result, result_y], axis=1)
    for column in results.columns:
        scores[column] = get_scores(results["y"], results[column])
    results["commodity"] = commodity
    os.makedirs(f'./model_results/{commodity}/',exist_ok=True)
    results.to_csv(f'./model_results/{commodity}/results.csv')
    error_results = pd.DataFrame(scores)
    error_results.to_csv(f"./model_results/{commodity}/errors.csv")
    error_results["commodity"] = commodity
    ers[commodity] = {'results':results,'error_results':error_results}
    px.line(
        results,
        x=results.index,
        y=[
            "y",
            # "nbeats",
            "nhits",
            # "tcn",
            # "tide",
            # "transformer",
            # "random_forest",
            # "xgboost",
        ],
    )

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 968 K  | train
-------------------------------------------------------------
907 K     Trainable params
61.6 K    Non-trainable params
968 K     Total params
3.876     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode


            modal_rs_quintal
datetime                    
2018-01-01       2747.222222
2018-01-02       2757.333333
2018-01-03       2722.439394
2018-01-04       2798.666667
2018-01-05       2753.147541
...                      ...
2023-04-29       1127.578616
2023-04-30       1115.596154
2023-05-01       1137.429530
2023-05-02       1119.237179
2023-05-03       1122.735632

[1949 rows x 1 columns]
            district_name     market_name commodity variety grade  \
datetime                                                            
2023-05-03      Barabanki       Barabanki     Onion     Red   FAQ   
2023-05-03         Ballia           Rasda     Onion     Red   FAQ   
2023-05-03         Deoria          Barhaj     Onion     Red   FAQ   
2023-05-03         Jhansi      Mauranipur     Onion     Red   FAQ   
2023-05-03    Maharajganj        Nautnava     Onion     Red   FAQ   
...                   ...             ...       ...     ...   ...   
2024-09-01           Agra  Fatehpur Sikri     

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 968 K  | train
-------------------------------------------------------------
907 K     Trainable params
61.6 K    Non-trainable params
968 K     Total params
3.876     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode


            modal_rs_quintal
datetime                    
2018-01-01       1587.087591
2018-01-02       1585.783582
2018-01-03       1594.664179
2018-01-04       1581.481752
2018-01-05       1586.769231
...                      ...
2023-04-29       2136.592105
2023-04-30       2141.430233
2023-05-01       2141.048611
2023-05-02       2142.161972
2023-05-03       2139.000000

[1949 rows x 1 columns]
            district_name market_name commodity      variety grade  \
datetime                                                             
2023-05-03      Ghaziabad   Ghaziabad     Wheat         Dara   FAQ   
2023-05-03  Jalaun (Orai)         Ait     Wheat         Dara   FAQ   
2023-05-03         Mahoba      Mahoba     Wheat         Dara   FAQ   
2023-05-03         Badaun   Wazirganj     Wheat         Dara   FAQ   
2023-05-03        Auraiya     Achalda     Wheat         Dara   FAQ   
...                   ...         ...       ...          ...   ...   
2024-09-01      Firozabad   Firozabad 

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 968 K  | train
-------------------------------------------------------------
907 K     Trainable params
61.6 K    Non-trainable params
968 K     Total params
3.876     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode


            modal_rs_quintal
datetime                    
2018-01-01        479.433333
2018-01-02        482.694805
2018-01-03        480.148148
2018-01-04        482.852761
2018-01-05        482.987261
...                      ...
2023-04-29        839.362069
2023-04-30        830.459459
2023-05-01        838.407643
2023-05-02        836.303571
2023-05-03        842.264151

[1949 rows x 1 columns]
            district_name    market_name commodity  variety grade  \
datetime                                                            
2023-05-03         Badaun      Wazirganj    Potato     Desi   FAQ   
2023-05-03      Lakhimpur  Golagokarnath    Potato  Badshah   FAQ   
2023-05-03      Firozabad      Firozabad    Potato     Desi   FAQ   
2023-05-03          Unnao      Bangarmau    Potato     Desi   FAQ   
2023-05-03       Mainpuri        Ghiraur    Potato    Local   FAQ   
...                   ...            ...       ...      ...   ...   
2024-09-01  Muzaffarnagar  Muzzafarnagar    Po

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 968 K  | train
-------------------------------------------------------------
907 K     Trainable params
61.6 K    Non-trainable params
968 K     Total params
3.876     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode


            modal_rs_quintal
datetime                    
2018-01-01       2244.840426
2018-01-02       2261.681818
2018-01-03       2235.193548
2018-01-04       2258.073684
2018-01-05       2291.752941
...                      ...
2023-04-29       2648.738739
2023-04-30       2665.293103
2023-05-01       2614.902913
2023-05-02       2653.594340
2023-05-03       2596.029412

[1949 rows x 1 columns]
                district_name market_name commodity variety grade  \
datetime                                                            
2023-05-03       Kanpur Dehat        Rura      Rice     III   FAQ   
2023-05-03        Maharajganj    Partaval      Rice     III   FAQ   
2023-05-03           Fatehpur    Fatehpur      Rice     III   FAQ   
2023-05-03              Unnao       Purwa      Rice     III   FAQ   
2023-05-03       Bulandshahar      Khurja      Rice     III   FAQ   
...                       ...         ...       ...     ...   ...   
2024-09-01           Fatehpur   Jahanabad     

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [31]:
results = ers['Potato']['results']

In [32]:
results['y']

0       823.650794
1       844.357143
2       857.364238
3       845.740964
4       860.964286
          ...     
483    2188.571429
484    2197.319277
485    2211.416667
486    2202.714286
487    2132.000000
Name: y, Length: 488, dtype: float64

In [33]:
px.line(
    results,
    x=results.index,
    y=[
        "y",
        # "nbeats",
        "nhits",
        # "tcn",
        # "tide",
        # "random_forest",
        # "xgboost",
    ],
)