## Config

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from datetime import datetime, timedelta

In [2]:
from nixtlats import TimeGPT
token = '0IEAIZ41DvUuDTmpjyYgPEdOwLsnoUbaW5z4uWZm0yi6yzTU1w2xiKQeuq0nyChGv1TLxG7scd3QqNSOxqUPFVqWizI0GMCpsMMmHmOjyrJRppyYyNeIRCBniD3J1BwsRGuSg3FPrK9hbihF5bMLTp3lpLmRK9QGGzKJwxhqFo3E31P9njLgUDbB5ViGzt45MvLeBExwVhHc5ZBV5aDjhZMd1JThvBp1iNKoMceIt84mes9yH5HxJxOI0t1YlvIi'

timegpt = TimeGPT(token = token)

timegpt.validate_token()

INFO:nixtlats.timegpt:Happy Forecasting! :), If you have questions or need support, please email ops@nixtla.io


True

## Read data

In [7]:
df = pd.read_parquet('M3_quarter_processed.parquet')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37004 entries, 0 to 37003
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Series       37004 non-null  int64         
 1   N            37004 non-null  int64         
 2   NF           37004 non-null  int64         
 3   Category     37004 non-null  object        
 4   Measurement  37004 non-null  int64         
 5   Value        37004 non-null  float64       
 6   Date         37004 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(4), object(1)
memory usage: 2.0+ MB


In [8]:
print(len(df['Series'].unique()))

756


## Function to perform predictions

### Perform a test to validate

In [9]:
trimmed_df = df.loc[df['Series'] == 646]
trimmed_df

Unnamed: 0,Series,N,NF,Category,Measurement,Value,Date
0,646,44,8,MICRO,1,3142.63,1984-01-01
1,646,44,8,MICRO,2,3190.75,1984-04-01
2,646,44,8,MICRO,3,3178.69,1984-07-01
3,646,44,8,MICRO,4,3170.94,1984-10-01
4,646,44,8,MICRO,5,3124.38,1985-01-01
5,646,44,8,MICRO,6,3170.0,1985-04-01
6,646,44,8,MICRO,7,3200.94,1985-07-01
7,646,44,8,MICRO,8,3176.75,1985-10-01
8,646,44,8,MICRO,9,3170.44,1986-01-01
9,646,44,8,MICRO,10,3268.67,1986-04-01


In [10]:
# trimmed_df.drop(columns = 'Category', inplace=True)
n = int(trimmed_df['N'].iloc[0])  # Assuming N is constant within each series
nf = int(trimmed_df['NF'].iloc[0])  # Assuming NF is constant within each series

trimmed_df = trimmed_df.head(n - nf)
print(n)
print(nf)
print(len(trimmed_df))

44
8
36


In [11]:
print(type(n))
print(type(nf))

<class 'int'>
<class 'int'>


In [12]:
forecast_df = timegpt.forecast(trimmed_df, h=nf, id_col='Series', time_col='Date', target_col='Value')
forecast_df

INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...


Unnamed: 0,Series,Date,TimeGPT
0,646,1993-01-01,5437.876953
1,646,1993-04-01,5440.134766
2,646,1993-07-01,5454.375
3,646,1993-10-01,5564.521973
4,646,1994-01-01,5614.047852
5,646,1994-04-01,5599.242188
6,646,1994-07-01,5581.577148
7,646,1994-10-01,5516.174805


In [13]:
print(df['N'].unique())
print(df['NF'].unique())

[44 46 43 45 36 40 42 59 64 68 32 71 69 35 51 56 72 52 48 50 24 34 49 53
 66 61 39 63 62 70 47 67 37 58 38 57 54 60 55]
[8]


### Actual function

In [14]:
def simple_forecast_with_timegpt(df):
    forecasts_list = []  # List to hold forecast dataframes for each series
    call_count = 0  # Initialize call count
    start_time = datetime.now()  # Track the start time
    
    for series_id in df['Series'].unique():
        current_time = datetime.now()
        
        # Check if we are approaching the rate limit
        if call_count >= 200:
            # Calculate remaining time to the next minute
            time_to_next_minute = 60 - current_time.second + (1000000 - current_time.microsecond) / 1000000.0
            print(f"Pausing for {time_to_next_minute:.2f} seconds to comply with rate limit.")
            time.sleep(time_to_next_minute)  # Pause execution
            
            # Reset the call count and start time for the new minute
            call_count = 0
            start_time = datetime.now()
        
        series_df = df[df['Series'] == series_id].sort_values('Date')  # Filter and sort by date
        series_df.dropna(subset=['Value'], inplace=True)  # Drop missing values
        
        n = int(series_df['N'].iloc[0])  # Assuming N is constant within each series
        nf = int(series_df['NF'].iloc[0])  # Assuming NF is constant within each series
        
        # Ensure the dataframe passed does not use more than (N - NF) examples
        trimmed_df = series_df.head(n - nf)
        
        try:
            forecast_df = timegpt.forecast(trimmed_df, h=nf, id_col='Series', time_col='Date', target_col='Value')
            forecasts_list.append(forecast_df)  # Collect results
        # Call TimeGPT model with nf steps forecast
        except Exception as e:
            print(f"Error encountered for series {series_id}: {e}")
        
        call_count += 1  # Increment call count after each API call
        
        # Check if a minute has passed; if so, reset the counter
        if (datetime.now() - start_time).seconds >= 60:
            call_count = 0
            start_time = datetime.now()

    # Concatenate all forecast DataFrames
    final_forecasts_df = pd.concat(forecasts_list, ignore_index=True)
    
    return final_forecasts_df

## Run the function

In [15]:
all_forecasts = simple_forecast_with_timegpt(df)

INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.tim

Error encountered for series 764: status_code: 502, body: {'data': None, 'message': 'Request failed with status code 502', 'code': 'B30', 'requestID': 'X8P4LNNQFR', 'support': 'If you have questions or need support, please email ops@nixtla.io'}


INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Attempt 1 failed...
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.time

Error encountered for series 1057: status_code: 500, body: {'data': None, 'message': 'Request failed with status code 500', 'code': 'B30', 'requestID': '9TFQ9858JP', 'support': 'If you have questions or need support, please email ops@nixtla.io'}


INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...
INFO:nixt

Error encountered for series 764: status_code: 502, body: {'data': None, 'message': 'Request failed with status code 502', 'code': 'B30', 'requestID': 'X8P4LNNQFR', 'support': 'If you have questions or need support, please email ops@nixtla.io'}

Error encountered for series 1057: status_code: 500, body: {'data': None, 'message': 'Request failed with status code 500', 'code': 'B30', 'requestID': '9TFQ9858JP', 'support': 'If you have questions or need support, please email ops@nixtla.io'}

In [22]:
all_forecasts.loc[all_forecasts['Series'] == 764]

Unnamed: 0,Series,Date,TimeGPT


In [21]:
all_forecasts.loc[all_forecasts['Series'] == 1057]

Unnamed: 0,Series,Date,TimeGPT


### Do failed requests seperately

In [26]:
failed1 = df.loc[df['Series'] == 764]

In [25]:
failed2 = df.loc[df['Series'] == 1057]

In [27]:
# failed1.drop(columns = 'Category', inplace=True)
n = int(failed1['N'].iloc[0])  # Assuming N is constant within each series
nf = int(failed1['NF'].iloc[0])  # Assuming NF is constant within each series

failed1 = failed1.head(n - nf)
print(n)
print(nf)
print(len(failed1))

44
8
36


In [28]:
forecast_df1 = timegpt.forecast(failed1, h=nf, id_col='Series', time_col='Date', target_col='Value')
forecast_df1

INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...


Unnamed: 0,Series,Date,TimeGPT
0,764,1993-01-01,6513.61377
1,764,1993-04-01,6576.021484
2,764,1993-07-01,6619.428711
3,764,1993-10-01,6726.428711
4,764,1994-01-01,6830.522461
5,764,1994-04-01,6838.104492
6,764,1994-07-01,6898.68457
7,764,1994-10-01,6921.140625


In [31]:
# failed1.drop(columns = 'Category', inplace=True)
n = int(failed2['N'].iloc[0])  # Assuming N is constant within each series
nf = int(failed2['NF'].iloc[0])  # Assuming NF is constant within each series

failed1 = failed1.head(n - nf)
print(n)
print(nf)
print(len(failed2))

52
8
52


In [32]:
forecast_df2 = timegpt.forecast(failed2, h=nf, id_col='Series', time_col='Date', target_col='Value')
forecast_df2

INFO:nixtlats.timegpt:Validating inputs...
INFO:nixtlats.timegpt:Preprocessing dataframes...
INFO:nixtlats.timegpt:Inferred freq: QS-OCT
INFO:nixtlats.timegpt:Calling Forecast Endpoint...


Unnamed: 0,Series,Date,TimeGPT
0,1057,1993-01-01,7732.729004
1,1057,1993-04-01,7777.152832
2,1057,1993-07-01,7818.040039
3,1057,1993-10-01,8105.858398
4,1057,1994-01-01,8343.125
5,1057,1994-04-01,8437.335938
6,1057,1994-07-01,8519.332031
7,1057,1994-10-01,8771.615234


In [33]:
df_to_file = pd.concat([all_forecasts, forecast_df1, forecast_df2], ignore_index=True)
df_to_file.sort_values(['Series', 'Date'], inplace=True)

In [34]:
df_to_file

Unnamed: 0,Series,Date,TimeGPT
0,646,1993-01-01 00:00:00,5437.876953
1,646,1993-04-01 00:00:00,5440.134766
2,646,1993-07-01 00:00:00,5454.375000
3,646,1993-10-01 00:00:00,5564.521973
4,646,1994-01-01 00:00:00,5614.047852
...,...,...,...
6027,1401,1974-10-01 00:00:00,4579.938477
6028,1401,1975-01-01 00:00:00,4569.582031
6029,1401,1975-04-01 00:00:00,4653.845703
6030,1401,1975-07-01 00:00:00,4854.271484


In [44]:
print(len(df_to_file['Series'].unique()))
print(df_to_file['Series'].min())
print(df_to_file['Series'].max())

756
646
1401


In [37]:
df_to_file.reset_index(drop=True, inplace=True)

### Save to parquet

In [40]:
df_to_file['Date'] = pd.to_datetime(df_to_file['Date'])
df_to_file.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6048 entries, 0 to 6047
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Series   6048 non-null   int64         
 1   Date     6048 non-null   datetime64[ns]
 2   TimeGPT  6048 non-null   float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 141.9 KB


In [41]:
df_to_file.to_parquet('M3_quarter_simple_forecasts.parquet', compression='snappy', engine='pyarrow')