In [2]:
DATA_SOURCE = "../../sih_2024_data_source/statewise_results/"

In [3]:
import pandas as pd

In [4]:
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
import plotly.express as px

In [5]:
scores = dict()


def get_scores(y_test, y_pred):
    r2_ = r2_score(y_test, y_pred)
    rmse_ = root_mean_squared_error(y_test, y_pred)
    mae_ = mean_absolute_error(y_test, y_pred)
    return {"r2": r2_, "mae": mae_, "rmse": rmse_}

In [6]:
import pandas as pd
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, DeepAR, TFT, LSTM, RNN, GRU
from neuralforecast.losses.pytorch import DistributionLoss, MAE, MSE, MAPE, SMAPE
import torch

from darts import TimeSeries
from darts.models import (
    NHiTSModel,)

def create_darts_models(input_chunk_length=120, output_chunk_length=30, n_epochs=100):
    """
    Create a collection of Darts models with correct parameters
    """
    # Common parameters for neural networks
    nn_params = {
        "input_chunk_length": input_chunk_length,
        "output_chunk_length": output_chunk_length,
        "n_epochs": n_epochs,
        "batch_size": 32,
        "force_reset": True,
    }

    models = {
       
        "nhits": NHiTSModel(
            **nn_params,
            num_stacks=3,
            num_blocks=1,
            num_layers=2,
            layer_widths=512,
            pooling_kernel_sizes=None,
            n_freq_downsample=None,
            dropout=0.1,
            activation="ReLU",
            MaxPool1d=True,
        ),
               
    }

    return models

def train_and_forecast(df_train, df_test):
    """
    Train models and generate forecasts using either Nixtla or Darts
    """

    # Darts workflow
    # Convert pandas DataFrame to Darts TimeSeries
    series = TimeSeries.from_dataframe(df_train, "ds", "y",fill_missing_dates=True, freq=None)

    # Create and train models
    models = create_darts_models()
    forecasts = {}

    for name, model in models.items():
        print(f"Training {name} model...")
        model.fit(series)
        forecast = model.predict(len(df_test))
        
        forecasts[name] = {"data":forecast,"model":model}

    return forecasts


In [7]:


def create_interpolated_ranges(dataframe,date_col,value_col):
    dataframe[date_col] = pd.to_datetime(dataframe[date_col])
    date_range = pd.date_range(start=dataframe[date_col].min(), end=dataframe[date_col].max())
    full_df = pd.DataFrame({date_col: date_range})
    merged_df = pd.merge(full_df, dataframe, on=date_col, how='left')
    merged_df[value_col] = merged_df[value_col].interpolate()
    merged_df[value_col] = merged_df[value_col].fillna(method='bfill').fillna(method='ffill')
    return merged_df

In [None]:
from ast import mod
import os
from re import I

ers = {}
for commodity in os.listdir(DATA_SOURCE):
    print(commodity)
    ers[commodity] = {}
    path = DATA_SOURCE + commodity
    for state_csv in os.listdir(path):
        sub_path = path + '/'+ state_csv
        state = state_csv.partition("_")[0]
        df = pd.read_csv(sub_path)
        # df['datetime'] = pd.to_datetime(df['date'])
        df.drop(columns=["Unnamed: 0"], axis=1, inplace=True)
        df.sort_values(by="datetime", ascending=True, inplace=True)
        # print(df.head())
        TRAIN_LEN = int(0.8 * len(df))
        df_train, df_test = (df[:TRAIN_LEN],df[TRAIN_LEN:])
        if(df_train.shape[0]<151  or df_test.shape[0]<15):
            continue
        df_train.set_index('datetime', inplace=True)
        df_train.sort_index(inplace=True)
        df_test.set_index('datetime', inplace=True)
        df_test.sort_index(inplace=True)
        df_train_dt = df_train.groupby("datetime").agg( {"modal_rs_quintal": "mean"})
        df_test_dt = df_test.groupby("datetime").agg( {"modal_rs_quintal": "mean"})
        df_train_dt.reset_index(inplace=True)
        df_train_dt.rename(
            columns={"datetime": "ds", "modal_rs_quintal": "y"}, inplace=True
        )
        df_test_dt.reset_index(inplace=True)
        df_test_dt.rename(
            columns={"datetime": "ds", "modal_rs_quintal": "y"}, inplace=True
        )
        ######
        print(df_test_dt.shape,df_train_dt.shape)
        print(df_train_dt['ds'].unique())
        df_train_dt = create_interpolated_ranges(df_train_dt,"ds","y")
        df_test_dt = create_interpolated_ranges(df_test_dt,"ds","y")
        print(df_train_dt.head(20))
        if(df_train_dt.shape[0]<151  or df_test_dt.shape[0]<15):
            continue
        nhits_forecast = train_and_forecast(df_train=df_train_dt,df_test=df_test_dt)
        print(nhits_forecast)
        for name,data_model in nhits_forecast.items():
            os.makedirs(f'./model_results/{commodity}/{state}/',exist_ok=True)
            nhits_forecast[name]["model"].save(f'./model_results/{commodity}/{state}/nhits.pkt')
            nhits_forecast[name] = pd.DataFrame(nhits_forecast[name]['data'].values())[0]
        result = pd.DataFrame(nhits_forecast)
        result_y = df_test_dt['y']
        results = pd.concat([result,result_y],axis= 1)
        for column in results.columns:
            scores[column] = get_scores(results["y"], results[column])
        

        results.to_csv(f'./model_results/{commodity}/results.csv')
        error_results = pd.DataFrame(scores)
        error_results.to_csv(f"./model_results/{commodity}/errors.csv")
        ers[state] = {'results':results,'error_results':error_results}
        px.line(
        results,
        x=results.index,
        y=[
            "y",
            "nhits",
        ],
    )



Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 968 K  | train
-------------------------------------------------------------
907 K     Trainable params
61.6 K    Non-trainable params
968 K     Total params
3.876     Total estimated model params size (MB)
42

masur dal
(472, 2) (2290, 2)
['2014-09-01' '2014-09-02' '2014-09-03' ... '2023-05-15' '2023-05-16'
 '2023-05-17']
           ds            y
0  2014-09-01  6100.000000
1  2014-09-02  5425.000000
2  2014-09-03  5312.500000
3  2014-09-04  5706.250000
4  2014-09-05  6100.000000
5  2014-09-06  5425.000000
6  2014-09-07  4450.000000
7  2014-09-08  6175.000000
8  2014-09-09  5480.000000
9  2014-09-10  5287.500000
10 2014-09-11  6175.000000
11 2014-09-12  5512.500000
12 2014-09-13  4850.000000
13 2014-09-14  4450.000000
14 2014-09-15  6190.000000
15 2014-09-16  6200.000000
16 2014-09-17  5340.000000
17 2014-09-18  6200.000000
18 2014-09-19  5656.666667
19 2014-09-20  5113.333333
Training nhits model...


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

{'nhits': {'data': <TimeSeries (DataArray) (ds: 474, component: 1, sample: 1)> Size: 4kB
array([[[8280.9573568 ]],

       [[8042.49521448]],

       [[8211.87203618]],

       [[8367.67024876]],

       [[8152.1777231 ]],

       [[8244.03373449]],

       [[8179.7940839 ]],

       [[8303.70533694]],

       [[8091.52649214]],

       [[8222.23531568]],

...

       [[6161.91480013]],

       [[6133.79681261]],

       [[6189.91066976]],

       [[6187.05537323]],

       [[6163.62787101]],

       [[6132.5225462 ]],

       [[6160.59801986]],

       [[6105.17460864]],

       [[6050.08444573]],

       [[6089.30653557]]])
Coordinates:
  * ds         (ds) datetime64[ns] 4kB 2023-05-18 2023-05-19 ... 2024-09-02
  * component  (component) object 8B 'y'
Dimensions without coordinates: sample
Attributes:
    static_covariates:  None
    hierarchy:          None, 'model': NHiTSModel(output_chunk_shift=0, num_stacks=3, num_blocks=1, num_layers=2, layer_widths=512, pooling_kernel_sizes=Non


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 968 K  | train
-------------------------------------------------------------
907 K     Trainable params
61.6 K    Non-trainable params
968 K     Total params
3.876     Total estimated model params size (MB)
42

(644, 2) (2007, 2)
['2014-09-01' '2014-09-06' '2014-09-09' ... '2022-01-31' '2022-02-01'
 '2022-02-02']
           ds       y
0  2014-09-01  6350.0
1  2014-09-02  6290.0
2  2014-09-03  6230.0
3  2014-09-04  6170.0
4  2014-09-05  6110.0
5  2014-09-06  6050.0
6  2014-09-07  6050.0
7  2014-09-08  6050.0
8  2014-09-09  6050.0
9  2014-09-10  6100.0
10 2014-09-11  6100.0
11 2014-09-12  6100.0
12 2014-09-13  6100.0
13 2014-09-14  6162.5
14 2014-09-15  6225.0
15 2014-09-16  6350.0
16 2014-09-17  6225.0
17 2014-09-18  5825.0
18 2014-09-19  6300.0
19 2014-09-20  6225.0
Training nhits model...


Training: |          | 0/? [00:00<?, ?it/s]