## Config

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from datetime import datetime, timedelta

In [2]:
from nixtlats import TimeGPT
token = 'REDACTED'

timegpt = TimeGPT(token = token)

timegpt.validate_token()

INFO:nixtlats.timegpt:Happy Forecasting! :), If you have questions or need support, please email ops@nixtla.io


True

## Read data

In [3]:
df = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M3 Data/M3_month_processed.parquet')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167562 entries, 0 to 167561
Data columns (total 7 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   Series       167562 non-null  int64         
 1   N            167562 non-null  int64         
 2   NF           167562 non-null  int64         
 3   Category     167562 non-null  object        
 4   Measurement  167562 non-null  int64         
 5   Value        167562 non-null  float64       
 6   Date         167562 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(4), object(1)
memory usage: 8.9+ MB


In [4]:
print(len(df['Series'].unique()))

1428


## Function to perform predictions

### Perform a test to validate

In [5]:
trimmed_df = df.loc[df['Series'] == 1402]
trimmed_df

Unnamed: 0,Series,N,NF,Category,Measurement,Value,Date
0,1402,68,18,MICRO,1,2640.0,1990-01-01
1,1402,68,18,MICRO,2,2640.0,1990-02-01
2,1402,68,18,MICRO,3,2160.0,1990-03-01
3,1402,68,18,MICRO,4,4200.0,1990-04-01
4,1402,68,18,MICRO,5,3360.0,1990-05-01
...,...,...,...,...,...,...,...
63,1402,68,18,MICRO,64,3720.0,1995-04-01
64,1402,68,18,MICRO,65,2160.0,1995-05-01
65,1402,68,18,MICRO,66,480.0,1995-06-01
66,1402,68,18,MICRO,67,2040.0,1995-07-01


In [6]:
# trimmed_df.drop(columns = 'Category', inplace=True)
n = int(trimmed_df['N'].iloc[0])
nf = int(trimmed_df['NF'].iloc[0]) 

trimmed_df = trimmed_df.head(n - nf)
print(n)
print(nf)
print(len(trimmed_df))

68
18
50


In [7]:
print(type(n))
print(type(nf))

<class 'int'>
<class 'int'>


In [8]:
forecast_df = timegpt.forecast(trimmed_df, h=nf, id_col='Series', time_col='Date', target_col='Value')
forecast_df

INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: MS
INFO:nixtlats.timegpt:Calling Forecast Endpoint...


Unnamed: 0,Series,Date,TimeGPT
0,1402,1994-03-01,2861.64502
1,1402,1994-04-01,2963.255127
2,1402,1994-05-01,2896.082031
3,1402,1994-06-01,2988.525635
4,1402,1994-07-01,2976.000488
5,1402,1994-08-01,2703.038574
6,1402,1994-09-01,3515.004395
7,1402,1994-10-01,2896.811523
8,1402,1994-11-01,3228.747803
9,1402,1994-12-01,3026.74585


Monthly forecast horizon is significantly longer than the other ones, could be interesting to review differene between normal model and timegpt-long-horizon model

In [9]:
print(df['N'].unique())
print(df['NF'].unique())

[ 68  69 126 141 144 134 140 128 133 136 142 139  96 143 122 137  98 110
 121  86 132 108 129  84  66 120 107 104  74 115 100  81  83  99  89  76
  94  79  78  73 114  70 135  72  71 138]
[18]


### Actual function

In [10]:
def simple_forecast_with_timegpt(df):
    forecasts_list = []  # List to hold forecast dataframes for each series
    call_count = 0  # Initialize call count
    start_time = datetime.now()  # Track the start time
    
    for series_id in df['Series'].unique():
        current_time = datetime.now()
        
        # Check if we are approaching the rate limit
        if call_count >= 200:
            # Calculate remaining time to the next minute
            time_to_next_minute = 60 - current_time.second + (1000000 - current_time.microsecond) / 1000000.0
            print(f"Pausing for {time_to_next_minute:.2f} seconds to comply with rate limit.")
            time.sleep(time_to_next_minute)  # Pause execution
            
            # Reset the call count and start time for the new minute
            call_count = 0
            start_time = datetime.now()
        
        series_df = df[df['Series'] == series_id].sort_values('Date')  # Filter and sort by date
        series_df.dropna(subset=['Value'], inplace=True)  # Drop missing values
        
        n = int(series_df['N'].iloc[0])  
        nf = int(series_df['NF'].iloc[0]) 
        
        # Ensure the dataframe passed does not use more than (N - NF) examples
        trimmed_df = series_df.head(n - nf)
        
        try:
            forecast_df = timegpt.forecast(trimmed_df, h=nf, id_col='Series', time_col='Date', target_col='Value')
            forecasts_list.append(forecast_df) 
        # Call TimeGPT model with nf steps forecast
        except Exception as e:
            print(f"Error encountered for series {series_id}: {e}")
        
        call_count += 1  # Increment call count after each API call
        
        # Check if a minute has passed; if so, reset the counter
        if (datetime.now() - start_time).seconds >= 60:
            call_count = 0
            start_time = datetime.now()

    # Concatenate all forecast DataFrames
    final_forecasts_df = pd.concat(forecasts_list, ignore_index=True)
    
    return final_forecasts_df

## Run the function

In [11]:
# all_forecasts = simple_forecast_with_timegpt(df)
### Took 90 minutes to run

INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: MS
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: MS
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: MS
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: MS
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: MS
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing d

In [12]:
all_forecasts.reset_index(drop=True, inplace=True)

In [13]:
all_forecasts

Unnamed: 0,Series,Date,TimeGPT
0,1402,1994-03-01,2861.645020
1,1402,1994-04-01,2963.255127
2,1402,1994-05-01,2896.082031
3,1402,1994-06-01,2988.525635
4,1402,1994-07-01,2976.000488
...,...,...,...
25699,2829,1988-07-01,1437.689209
25700,2829,1988-08-01,1442.109741
25701,2829,1988-09-01,1439.912842
25702,2829,1988-10-01,1437.093018


In [15]:
print(len(all_forecasts['Series'].unique())) # All series have been forecasted

1428


### Save to parquet

In [16]:
all_forecasts['Date'] = pd.to_datetime(all_forecasts['Date'])
all_forecasts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25704 entries, 0 to 25703
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Series   25704 non-null  int64         
 1   Date     25704 non-null  datetime64[ns]
 2   TimeGPT  25704 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 602.6 KB


In [18]:
all_forecasts.to_parquet('M3_month_simple_forecasts.parquet', compression='snappy', engine='pyarrow')