In [1]:
!pip install nnetsauce

Collecting nnetsauce
  Downloading nnetsauce-0.22.4-py2.py3-none-any.whl.metadata (822 bytes)
Downloading nnetsauce-0.22.4-py2.py3-none-any.whl (168 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.4/168.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: nnetsauce
Successfully installed nnetsauce-0.22.4


In [2]:
!pip install git+https://github.com/Techtonique/mlsauce.git --verbose

Using pip 23.3.2 from /opt/conda/lib/python3.10/site-packages/pip (python 3.10)
Collecting git+https://github.com/Techtonique/mlsauce.git
  Cloning https://github.com/Techtonique/mlsauce.git to /tmp/pip-req-build-4jki394q
  Running command git version
  git version 2.25.1
  Running command git clone --filter=blob:none https://github.com/Techtonique/mlsauce.git /tmp/pip-req-build-4jki394q
  Cloning into '/tmp/pip-req-build-4jki394q'...
  Running command git rev-parse HEAD
  186e88974b95a139d0c1cea983b22989e7d7d6df
  Resolved https://github.com/Techtonique/mlsauce.git to commit 186e88974b95a139d0c1cea983b22989e7d7d6df
  Running command git rev-parse HEAD
  186e88974b95a139d0c1cea983b22989e7d7d6df
  Running command python setup.py egg_info
    tree = Parsing.p_module(s, pxd, full_module_name)
    tree = Parsing.p_module(s, pxd, full_module_name)
    tree = Parsing.p_module(s, pxd, full_module_name)
    tree = Parsing.p_module(s, pxd, full_module_name)
    tree = Parsing.p_module(s, pxd, f

In [3]:
import numpy as np
import pandas as pd
import mlsauce as ms 
import nnetsauce as ns
from sklearn.linear_model import Ridge 
from time import time 

In [4]:
def generate_synthetic_mts(n_steps=1000, n_series=3, frequency=24, amplitude=20, base_noise_std=5, volatility_increase_rate=0.01, seed=42):
    """
    Generate a synthetic multivariate time series with random trends, seasonality, and one series with increasing volatility.

    Parameters:
    - n_steps: Number of time steps
    - n_series: Number of series
    - frequency: Frequency of the seasonal component
    - amplitude: Amplitude of the seasonal component
    - base_noise_std: Base standard deviation of the noise
    - volatility_increase_rate: Rate at which the volatility increases for the first series
    - seed: Random seed for reproducibility

    Returns:
    - A pandas DataFrame containing the generated multivariate time series
    """
    np.random.seed(seed)

    # Define a dynamically generated correlation matrix
    correlation_matrix = np.ones((n_series, n_series))
    for i in range(n_series):
        for j in range(i+1, n_series):
            correlation_matrix[i, j] = correlation_matrix[j, i] = 0.8 ** abs(i - j)

    # Compute the covariance matrix from the correlation matrix
    noise_std = np.array([base_noise_std] * n_series)
    covariance_matrix = np.outer(noise_std, noise_std) * correlation_matrix

    # Create a time index
    time = np.arange(n_steps)

    # Initialize an empty array to store the time series data
    data = np.zeros((n_steps, n_series))

    # Generate correlated noise using multivariate normal distribution
    noise = np.random.multivariate_normal(np.zeros(n_series), covariance_matrix, n_steps)

    # Generate random slopes for the trends (including negative slopes) with a wider range
    slopes = np.random.uniform(low=-0.3, high=0.3, size=n_series)

    for i in range(n_series):
        # Trend component with a random slope (positive or negative)
        trend = slopes[i] * time

        # Seasonal component with increased amplitude
        seasonality = amplitude * np.sin(2 * np.pi * time / frequency + np.random.randn())

        if i == 0:
            # For the first series, increase the volatility over time
            increasing_noise = noise[:, i] * (1 + volatility_increase_rate * time)
            data[:, i] = trend + seasonality + increasing_noise
        else:
            # For other series, use constant volatility
            data[:, i] = trend + seasonality + noise[:, i]

    # Combine into a multivariate time series dataset
    df = pd.DataFrame(data, columns=[f'Series_{i+1}' for i in range(n_series)], index=pd.date_range(start='2023-01-01', periods=n_steps))

    return df

In [5]:
df = generate_synthetic_mts(n_steps=10000, n_series=100, 
                            amplitude=40, seed=14531)
df_ = df.diff().dropna()

# Display the first few rows of the dataset
print(df_.head())

            Series_1  Series_2  Series_3  Series_4  Series_5  Series_6  \
2023-01-02     22.41     15.56     19.87      6.56     10.82     12.09   
2023-01-03      7.10     -1.53      1.20      1.53     -1.22      7.75   
2023-01-04     -2.44      3.53      8.63      6.06      2.85      7.64   
2023-01-05     20.46     23.78     19.72     14.81      2.72     14.96   
2023-01-06     10.06     11.02      7.73     12.76     -4.89      4.61   

            Series_7  Series_8  Series_9  Series_10  ...  Series_91  \
2023-01-02     10.25     10.39     -3.30       2.23  ...      13.68   
2023-01-03     -0.03      9.96     -3.55      11.30  ...     -10.46   
2023-01-04      0.22      2.45     -4.71      10.50  ...       9.18   
2023-01-05      2.57     18.55      6.58       3.43  ...      -6.99   
2023-01-06     -5.07      2.47      8.14       5.50  ...       7.88   

            Series_92  Series_93  Series_94  Series_95  Series_96  Series_97  \
2023-01-02      13.66       7.86      10.00     

# Example 1

In [9]:
regr = Ridge()
obj_MTS = ns.MTS(regr,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="cpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting Ridge to multivariate time series... 
 


100%|██████████| 100/100 [01:01<00:00,  1.64it/s]

Elapsed: 64.46652388572693





In [11]:
obj_MTS.predict(h=10)

Unnamed: 0_level_0,Series_1,Series_2,Series_3,Series_4,Series_5,Series_6,Series_7,Series_8,Series_9,Series_10,...,Series_91,Series_92,Series_93,Series_94,Series_95,Series_96,Series_97,Series_98,Series_99,Series_100
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2050-05-19,-797.65,-13.81,-9.29,-4.87,2.5,-11.09,2.92,-5.26,2.17,-2.35,...,7.38,-6.08,8.95,-7.32,-2.45,13.57,1.21,2.46,-5.99,-1.13
2050-05-20,88.26,-7.44,-5.85,-8.67,8.85,-4.9,3.93,-6.39,-8.37,-6.6,...,2.75,-12.73,4.06,-12.97,-9.14,6.5,-1.08,2.63,-13.85,-12.23
2050-05-21,-10.57,-11.58,-3.1,-8.14,10.25,1.95,10.3,0.67,-2.69,3.19,...,9.92,-10.34,7.5,-8.28,-2.51,10.83,5.2,9.45,-5.04,-3.51
2050-05-22,186.03,-6.36,-4.62,-10.75,9.38,-4.28,4.69,-6.67,-12.6,-2.92,...,2.95,-13.98,5.65,-11.96,-6.58,2.93,2.96,5.43,-10.36,-6.82
2050-05-23,-100.65,-10.53,-3.16,-12.82,4.83,-3.21,4.9,0.56,-12.42,1.26,...,19.08,-0.73,19.08,6.2,9.01,7.7,11.79,10.6,-2.16,-1.75
2050-05-24,76.41,-4.12,3.56,-6.82,12.27,6.4,15.32,6.56,-7.21,7.37,...,9.88,-5.64,8.56,-3.25,3.11,1.55,7.41,8.39,-3.89,-1.11
2050-05-25,-149.78,-5.34,1.71,-8.29,8.03,5.15,8.61,6.2,-8.67,7.72,...,6.22,-6.31,7.98,2.23,6.48,2.26,13.24,13.69,3.61,8.75
2050-05-26,128.25,2.22,10.43,-1.04,12.37,11.2,10.19,6.58,-9.45,9.95,...,7.31,-0.61,7.22,3.9,8.66,-5.34,7.73,7.96,3.92,4.8
2050-05-27,-28.48,3.37,6.69,-3.19,5.74,8.62,8.46,11.76,-5.1,8.82,...,4.53,-0.26,1.14,4.82,8.12,-8.39,8.09,6.49,4.11,5.14
2050-05-28,47.37,4.66,10.35,2.85,6.07,9.97,4.88,9.06,-4.9,10.49,...,7.3,5.92,2.87,9.31,12.6,-7.18,7.94,5.57,8.74,9.76


In [12]:
regr = Ridge()
obj_MTS = ns.MTS(regr,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="gpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting Ridge to multivariate time series... 
 


100%|██████████| 100/100 [00:37<00:00,  2.68it/s]

Elapsed: 40.53069853782654





In [13]:
obj_MTS.predict(h=10)

Unnamed: 0_level_0,Series_1,Series_2,Series_3,Series_4,Series_5,Series_6,Series_7,Series_8,Series_9,Series_10,...,Series_91,Series_92,Series_93,Series_94,Series_95,Series_96,Series_97,Series_98,Series_99,Series_100
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2050-05-19,-797.73,-13.81,-9.29,-4.87,2.5,-11.09,2.92,-5.26,2.16,-2.35,...,7.38,-6.08,8.95,-7.32,-2.46,13.57,1.21,2.46,-5.99,-1.13
2050-05-20,88.37,-7.44,-5.85,-8.67,8.85,-4.9,3.93,-6.39,-8.37,-6.6,...,2.75,-12.73,4.06,-12.97,-9.14,6.5,-1.08,2.63,-13.85,-12.23
2050-05-21,-10.64,-11.59,-3.11,-8.14,10.25,1.95,10.3,0.66,-2.69,3.19,...,9.92,-10.34,7.5,-8.28,-2.5,10.83,5.2,9.45,-5.04,-3.51
2050-05-22,186.01,-6.36,-4.62,-10.75,9.38,-4.28,4.69,-6.67,-12.6,-2.92,...,2.95,-13.99,5.65,-11.96,-6.58,2.93,2.96,5.43,-10.36,-6.82
2050-05-23,-100.54,-10.53,-3.16,-12.82,4.83,-3.21,4.9,0.56,-12.42,1.25,...,19.09,-0.73,19.08,6.21,9.01,7.7,11.79,10.61,-2.16,-1.75
2050-05-24,76.34,-4.12,3.56,-6.82,12.27,6.4,15.32,6.56,-7.21,7.36,...,9.88,-5.64,8.56,-3.25,3.11,1.55,7.41,8.39,-3.89,-1.11
2050-05-25,-149.74,-5.34,1.71,-8.29,8.03,5.15,8.61,6.2,-8.67,7.72,...,6.22,-6.31,7.98,2.23,6.48,2.26,13.24,13.69,3.61,8.75
2050-05-26,128.23,2.22,10.43,-1.04,12.37,11.2,10.18,6.58,-9.45,9.95,...,7.31,-0.61,7.22,3.9,8.66,-5.34,7.73,7.96,3.92,4.8
2050-05-27,-28.48,3.37,6.7,-3.19,5.74,8.62,8.46,11.76,-5.1,8.82,...,4.53,-0.26,1.14,4.82,8.12,-8.39,8.09,6.49,4.11,5.14
2050-05-28,47.37,4.66,10.34,2.85,6.07,9.97,4.88,9.06,-4.9,10.49,...,7.3,5.92,2.87,9.31,12.6,-7.18,7.94,5.57,8.74,9.76


In [15]:
regr = ms.RidgeRegressor(reg_lambda=1.0, backend="cpu")
obj_MTS = ns.MTS(regr,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="gpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting RidgeRegressor to multivariate time series... 
 


100%|██████████| 100/100 [01:13<00:00,  1.35it/s]

Elapsed: 77.08472633361816





In [16]:
obj_MTS.predict(h=10)

Unnamed: 0_level_0,Series_1,Series_2,Series_3,Series_4,Series_5,Series_6,Series_7,Series_8,Series_9,Series_10,...,Series_91,Series_92,Series_93,Series_94,Series_95,Series_96,Series_97,Series_98,Series_99,Series_100
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2050-05-19,-797.65,-13.81,-9.29,-4.87,2.5,-11.09,2.92,-5.26,2.17,-2.35,...,7.38,-6.08,8.95,-7.32,-2.45,13.57,1.21,2.46,-5.99,-1.13
2050-05-20,88.26,-7.44,-5.85,-8.67,8.85,-4.9,3.93,-6.39,-8.37,-6.6,...,2.75,-12.73,4.06,-12.97,-9.14,6.5,-1.08,2.63,-13.85,-12.23
2050-05-21,-10.57,-11.58,-3.1,-8.14,10.25,1.95,10.3,0.67,-2.69,3.19,...,9.92,-10.34,7.5,-8.28,-2.51,10.83,5.2,9.45,-5.04,-3.51
2050-05-22,186.03,-6.36,-4.62,-10.75,9.38,-4.28,4.69,-6.67,-12.6,-2.92,...,2.95,-13.98,5.65,-11.96,-6.58,2.93,2.96,5.43,-10.36,-6.82
2050-05-23,-100.65,-10.53,-3.16,-12.82,4.83,-3.21,4.9,0.56,-12.42,1.26,...,19.08,-0.73,19.08,6.2,9.01,7.7,11.79,10.6,-2.16,-1.75
2050-05-24,76.41,-4.12,3.56,-6.82,12.27,6.4,15.32,6.56,-7.21,7.37,...,9.88,-5.64,8.56,-3.25,3.11,1.55,7.41,8.39,-3.89,-1.11
2050-05-25,-149.78,-5.34,1.71,-8.29,8.03,5.15,8.61,6.2,-8.67,7.72,...,6.22,-6.31,7.98,2.23,6.48,2.26,13.24,13.69,3.61,8.75
2050-05-26,128.25,2.22,10.43,-1.04,12.37,11.2,10.19,6.58,-9.45,9.95,...,7.31,-0.61,7.22,3.9,8.66,-5.34,7.73,7.96,3.92,4.8
2050-05-27,-28.48,3.37,6.69,-3.19,5.74,8.62,8.46,11.76,-5.1,8.82,...,4.53,-0.26,1.14,4.82,8.12,-8.39,8.09,6.49,4.11,5.14
2050-05-28,47.37,4.66,10.35,2.85,6.07,9.97,4.88,9.06,-4.9,10.49,...,7.3,5.92,2.87,9.31,12.6,-7.18,7.94,5.57,8.74,9.76


In [17]:
regr = ms.RidgeRegressor(reg_lambda=1.0, backend="gpu")
obj_MTS = ns.MTS(regr,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="gpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting RidgeRegressor to multivariate time series... 
 


100%|██████████| 100/100 [00:20<00:00,  4.89it/s]

Elapsed: 23.551459312438965





In [18]:
obj_MTS.predict(h=10)

Unnamed: 0_level_0,Series_1,Series_2,Series_3,Series_4,Series_5,Series_6,Series_7,Series_8,Series_9,Series_10,...,Series_91,Series_92,Series_93,Series_94,Series_95,Series_96,Series_97,Series_98,Series_99,Series_100
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2050-05-19,-797.68,-13.82,-9.29,-4.88,2.51,-11.09,2.92,-5.26,2.16,-2.35,...,7.39,-6.08,8.95,-7.32,-2.45,13.57,1.21,2.47,-5.99,-1.13
2050-05-20,88.32,-7.44,-5.86,-8.67,8.84,-4.9,3.93,-6.4,-8.36,-6.61,...,2.75,-12.73,4.05,-12.97,-9.15,6.5,-1.09,2.62,-13.85,-12.23
2050-05-21,-10.57,-11.59,-3.1,-8.15,10.26,1.96,10.31,0.67,-2.69,3.19,...,9.93,-10.34,7.5,-8.28,-2.5,10.83,5.21,9.46,-5.04,-3.5
2050-05-22,186.01,-6.36,-4.62,-10.74,9.37,-4.29,4.68,-6.68,-12.59,-2.93,...,2.94,-13.98,5.64,-11.96,-6.58,2.93,2.95,5.42,-10.37,-6.83
2050-05-23,-100.73,-10.54,-3.16,-12.83,4.84,-3.2,4.9,0.56,-12.43,1.26,...,19.09,-0.73,19.09,6.21,9.01,7.7,11.8,10.62,-2.16,-1.75
2050-05-24,76.47,-4.11,3.56,-6.81,12.27,6.4,15.32,6.56,-7.21,7.37,...,9.87,-5.63,8.56,-3.24,3.11,1.55,7.41,8.38,-3.89,-1.11
2050-05-25,-149.91,-5.35,1.71,-8.29,8.02,5.14,8.6,6.2,-8.67,7.71,...,6.22,-6.31,7.98,2.23,6.48,2.26,13.23,13.69,3.62,8.75
2050-05-26,128.29,2.22,10.43,-1.04,12.37,11.2,10.19,6.58,-9.45,9.94,...,7.31,-0.61,7.22,3.9,8.66,-5.34,7.73,7.95,3.92,4.8
2050-05-27,-28.41,3.37,6.7,-3.18,5.74,8.62,8.46,11.76,-5.1,8.82,...,4.53,-0.26,1.13,4.83,8.12,-8.4,8.09,6.49,4.11,5.14
2050-05-28,47.62,4.66,10.35,2.85,6.08,9.98,4.89,9.06,-4.9,10.49,...,7.3,5.92,2.87,9.32,12.61,-7.18,7.94,5.57,8.74,9.76


# Example 2

In [7]:
import xgboost as xgb 

xgb1 = xgb.XGBRegressor()
xgb2 = xgb.XGBRegressor(tree_method = "gpu_hist")

In [8]:
obj_MTS = ns.MTS(xgb1,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="cpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting XGBRegressor to multivariate time series... 
 


100%|██████████| 100/100 [54:42<00:00, 32.82s/it]

Elapsed: 3285.469851732254





In [9]:
obj_MTS = ns.MTS(xgb1,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="gpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting XGBRegressor to multivariate time series... 
 


100%|██████████| 100/100 [54:23<00:00, 32.64s/it]

Elapsed: 3266.556359767914





In [10]:
obj_MTS = ns.MTS(xgb2,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="cpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting XGBRegressor to multivariate time series... 
 


100%|██████████| 100/100 [04:06<00:00,  2.46s/it]

Elapsed: 249.40361714363098





In [11]:
obj_MTS = ns.MTS(xgb2,
                 lags = 15,
                 n_hidden_features=5,
                 nodes_sim="uniform",
                 backend="gpu",
                 verbose = 1)
start = time()
obj_MTS.fit(df_)
print(f"Elapsed: {time()-start}")


 Adjusting XGBRegressor to multivariate time series... 
 


100%|██████████| 100/100 [04:06<00:00,  2.46s/it]

Elapsed: 249.10436272621155



