In [2]:
import pandas as pd
import numpy as np
import sys
import os
# Add the directory containing the utility module to the system path
sys.path.append(os.path.abspath('/workspaces/flask_server'))


In [4]:
df = pd.read_csv('/workspaces/flask_server/test/data/candy_production.csv', index_col=0, parse_dates=True)
df.head()

Unnamed: 0_level_0,IPG3113N
observation_date,Unnamed: 1_level_1
1972-01-01,85.6945
1972-02-01,71.82
1972-03-01,66.0229
1972-04-01,64.5645
1972-05-01,65.01


In [5]:
df_new = pd.DataFrame(index=df.index)
df_new

1972-01-01
1972-02-01
1972-03-01
1972-04-01
1972-05-01
...
2017-04-01
2017-05-01
2017-06-01
2017-07-01
2017-08-01


In [3]:

def compute_sma(df_arg, ts_type="univariate", window_sizes=[5, 10, 20]):
    df = df_arg.copy(deep=True)
    sma_df = df.copy()

    for window_size in window_sizes:
        if ts_type == "univariate":
            sma = df.iloc[:, -1].rolling(window=window_size).mean()
            sma_df[f"target_sma_{window_size}"] = sma

        else:  # Multivariate case
            for i in range(len(df.columns)):
                sma = df.iloc[:, i].rolling(window=window_size).mean()
                column_name = df.columns[i]
                sma_df[f"{column_name}_sma_{window_size}"] = sma

    return sma_df

In [6]:
# Step 2: Apply the compute_sma function for univariate and multivariate time series
# Test with univariate data
print("Univariate SMA Calculation:")
univariate_result = compute_sma(df, ts_type="univariate", window_sizes=[5, 10, 20])
print(univariate_result.head())


Univariate SMA Calculation:
                  IPG3113N  target_sma_5  target_sma_10  target_sma_20
observation_date                                                      
1972-01-01         85.6945           NaN            NaN            NaN
1972-02-01         71.8200           NaN            NaN            NaN
1972-03-01         66.0229           NaN            NaN            NaN
1972-04-01         64.5645           NaN            NaN            NaN
1972-05-01         65.0100      70.62238            NaN            NaN


In [8]:
df_multi = pd.read_csv('/workspaces/flask_server/test/data/apple.csv', index_col = 0, parse_dates=True)
df_multi.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-20,184.410004,186.100006,184.410004,185.009995,184.282257,49799100
2023-06-21,184.899994,185.410004,182.589996,183.960007,183.236404,49515700
2023-06-22,183.740005,187.050003,183.669998,187.0,186.264435,51245300
2023-06-23,185.550003,187.559998,185.009995,186.679993,185.945679,53079300
2023-06-26,186.830002,188.050003,185.229996,185.270004,184.54126,48088700


In [10]:
# Test with multivariate data
print("\nMultivariate SMA Calculation:")
multivariate_result = compute_sma(df_multi, ts_type="multivariate", window_sizes=[5, 10, 20])
multivariate_result.head()


Multivariate SMA Calculation:


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Open_sma_5,High_sma_5,Low_sma_5,Close_sma_5,...,Low_sma_10,Close_sma_10,Adj Close_sma_10,Volume_sma_10,Open_sma_20,High_sma_20,Low_sma_20,Close_sma_20,Adj Close_sma_20,Volume_sma_20
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-20,184.410004,186.100006,184.410004,185.009995,184.282257,49799100,,,,,...,,,,,,,,,,
2023-06-21,184.899994,185.410004,182.589996,183.960007,183.236404,49515700,,,,,...,,,,,,,,,,
2023-06-22,183.740005,187.050003,183.669998,187.0,186.264435,51245300,,,,,...,,,,,,,,,,
2023-06-23,185.550003,187.559998,185.009995,186.679993,185.945679,53079300,,,,,...,,,,,,,,,,
2023-06-26,186.830002,188.050003,185.229996,185.270004,184.54126,48088700,185.086002,186.834003,184.181998,185.584,...,,,,,,,,,,
