#Imports

In [0]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
import math
import warnings
warnings.simplefilter('ignore', np.RankWarning)
from logic import *
import ast

# Widgets

In [0]:
dbutils.widgets.dropdown("TS_FeatureEng", "False",["True","False"],label=None)
dbutils.widgets.dropdown("TS_store_split_raw", "False",["True","False"],label=None)
dbutils.widgets.dropdown("TS_store_feature_eng", "False",["True","False"],label=None)
dbutils.widgets.text("SerieNumber",defaultValue="0")
dbutils.widgets.text("Modified_serie",defaultValue="False")

In [0]:
TS_FeatureEng = dbutils.widgets.get("TS_FeatureEng") in ['True']
TS_store_split_raw = dbutils.widgets.get("TS_store_split_raw") in ['True']
TS_store_feature_eng = dbutils.widgets.get("TS_store_feature_eng") in ['True']
SerieNumber = dbutils.widgets.get('SerieNumber')
Modified_serie = ast.literal_eval(dbutils.widgets.get('Modified_serie'))

#Notebook validation

When running the job (workflow) this block checks if the rest of the notebook should be executed

In [0]:
# Generating a random string to identify this run of the workflow
from logic import generate_random_string
_random_string_ = generate_random_string(7)
print(_random_string_)
dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/Experiments_info/{_random_string_}')
dbutils.jobs.taskValues.set(key="job_reference", value=_random_string_)

In [0]:
if not TS_FeatureEng:
    dbutils.notebook.exit("Skipping")

# Verifying setup

TS_store_split_raw -> tells if we are storing the split time-series (without engineered features) in the container

TS_store_feature_eng -> tells if we are storing the splited time-serie (with engineered features) in the container

Subsequent tasks (like regression run) use the stored data, then TS_store_feature_eng is just reccomended to be false just when troubleshooting mode on notebook

In [0]:
print(f"TS_store_feature_eng: {TS_store_feature_eng}")
print(f"TS_store_split_raw: {TS_store_split_raw}")

#Feature engineering

In [0]:
if not Modified_serie:
    decorator = ""
else:
    decorator = f"-{Modified_serie[0]}-{Modified_serie[1]}"

In [0]:
print(decorator)

##Definition

In [0]:
class timeseries_solver:
      def __init__(self, _df_, _SerieNumber_):

        self.df = _df_
        self.df_engineered = 0
        self.list_train = 0
        self.list_test = 0

        self.y_train_list_expanding = 0
        self.x_train_list_expanding = 0
        self.y_test_list_expanding = 0
        self.x_test_list_expanding = 0

        self.regressor_list = []
        self.y_pred_list = []

        self.frequencies_calculated = []

        self.SerieNumber = _SerieNumber_

        #No season -> for comparison

        self.regressor_list_no_season = []
        self.y_pred_list_no_season = []

        self.list_train_no_season = []
        self.list_test_no_season = []

        self.y_train_list_no_season_expanding = []
        self.x_train_list_no_season_expanding = []
        self.y_test_list_no_season_expanding = []
        self.x_test_list_no_season_expanding = []


        self.list_of_models = []

        # Error Metrics

        self.no_season_mae = 0
        self.no_season_mse = 0
        self.no_season_rmse = 0
        self.no_season_r2 = 0
        self.no_season_smape = 0

        self.naive_mae = 0
        self.naive_mse = 0
        self.naive_rmse = 0
        self.naive_r2 = 0
        self.naive_smape = 0

        self.regression_mae = 0
        self.regression_mse = 0
        self.regression_rmse = 0
        self.regression_r2 = 0
        self.regression_smape = 0

        self.s_path = f'/mnt/automated_mounts_sas/0juliostoragetest/master_data'
        
      def expanding_split(self,test_size,train_size):

        # test_size  -> forecast horizont, how many steps(points) in the future we'll forecast
        # train_size -> How many points we'll use to train our models in each window

        df_length = self.df.shape[0]

        n_windows = math.floor((df_length-train_size)/test_size)

        list_i_train = []
        list_f_train = []
        list_i_test = []
        list_f_test = []

        for i in range(n_windows):
          list_i_train.append(i*test_size)

        for i in list_i_train:
          list_f_train.append(i+(train_size-1))

        list_i_train = []
        for i in range(n_windows):
          list_i_train.append(0)

        for i in list_f_train:
          list_i_test.append(i+1)

        for i in list_i_test:
          list_f_test.append(i+(test_size-1))

        List_lista_treino = []
        List_lista_teste = []

        for i in range(n_windows):

            list_treino = np.arange(list_i_train[i], list_f_train[i]+1)
            list_teste = np.arange(list_i_test[i], list_f_test[i]+1)

            a_train_index = list_treino
            test_index = list_teste
            a_train_index = a_train_index.tolist()
            test_index = test_index.tolist()
            List_lista_treino.append(list_treino)
            List_lista_teste.append(list_teste)

        self.df = self.df.copy()
        self.df['index'] = self.df.copy().index

        lista_dfs_treino = []
        lista_dfs_teste = []

        if TS_store_split_raw:
          dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set/',True)
          dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set/',True)
          dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set')
          dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set')

        for i in range(len(List_lista_treino)):
          lista_dfs_treino.append(self.df[self.df['index'].isin(List_lista_treino[i])])
          if TS_store_split_raw:          
            lista_dfs_treino[-1].to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set/window_{i}.csv")

          lista_dfs_teste.append(self.df[self.df['index'].isin(List_lista_teste[i])])
          if TS_store_split_raw:
            lista_dfs_teste[-1].to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set/window_{i}.csv")

        self.list_train = lista_dfs_treino
        self.list_test = lista_dfs_teste

        # self.y_train_list_expanding, self.x_train_list_expanding, self.y_test_list_expanding, self.x_test_list_expanding = train_test_convert_2_regressor(train_train,test_list)

      def feature_eng(self):
        for i in range(len(self.list_train)):

          train_partial = self.list_train[i].copy()

          ########list_train
          ##################non-seasonal features
          train_partial['Last-observation'] = train_partial['observation'].shift(+1)

          train_partial['SMA-1'] = train_partial.observation.rolling(window=20).mean()
          train_partial['SMA-1'] = train_partial['SMA-1'].shift(+1)

          train_partial['EMA-1'] = train_partial.observation.ewm(alpha=0.1, adjust=False).mean()
          train_partial['EMA-2'] = train_partial.observation.ewm(alpha=0.45, adjust=False).mean()
          train_partial['EMA-3'] = train_partial.observation.ewm(alpha=0.9, adjust=False).mean()

          train_partial['EMA-1'] = train_partial['EMA-1'].shift(+1)
          train_partial['EMA-2'] = train_partial['EMA-2'].shift(+1)
          train_partial['EMA-3'] = train_partial['EMA-3'].shift(+1)

          train_partial['std-1'] = train_partial.observation.rolling(window=20).std()
          train_partial['std-1'] = train_partial['std-1'].shift(+1)

          # Min - Max
          train_partial["last_max"] = train_partial.observation.rolling(window=20).max()
          train_partial['last_max'] = train_partial['last_max'].shift(+1)

          train_partial["last_min"] = train_partial.observation.rolling(window=20).min()
          train_partial['last_min'] = train_partial['last_min'].shift(+1)
          # When we truncate the dataset below we may be removing the max min from the train window

          train_partial = train_partial.iloc[20:]
          train_partial = train_partial.copy() #--> added not to face warnings

          train_partial["observation_detrended"] = train_partial['Last-observation'] - train_partial['SMA-1']

          train_partial["observation_detrended_non_neg"] = train_partial["observation_detrended"] - min(train_partial.observation_detrended)+10

          # Compute the FFT
          fft_result = np.fft.fft(train_partial["observation_detrended_non_neg"])

          # Frequency values corresponding to the FFT result

          freq = np.fft.fftfreq(len(train_partial["observation_detrended_non_neg"]), 1.0 / len(train_partial["observation_detrended_non_neg"]))*1000/len(train_partial["observation_detrended_non_neg"])

          # Find the index of the maximum amplitude in the FFT result
          index_of_max_amplitude = np.argmax(np.abs(fft_result))

          # Extract the fundamental frequency
          fundamental_frequency = np.abs(freq[index_of_max_amplitude])

          peaks, _ = find_peaks(np.abs(fft_result), height=100, distance=10)
          peak_frequencies = freq[peaks]
          peak_frequencies = [x for x in peak_frequencies if x > 0]

          self.frequencies_calculated.append(peak_frequencies)

          if len(self.frequencies_calculated[i]) == 0:
            pass
          else:
            add_repeating_sequence_column(round(1000/self.frequencies_calculated[i][0]),train_partial)

          if len(self.frequencies_calculated[i]) >= 2:
            add_repeating_sequence_column(round(1000/self.frequencies_calculated[i][1]),train_partial,column_name="Repeating_Sequence_2")
          else:
            pass

          ammount_of_dummy_cols_Repeating_Sequence = 5
          ammount_of_dummy_cols_Repeating_Sequence_2 = 5

          if len(self.frequencies_calculated[i]) == 0:
            pass
          else:
            # For when the ammont of dummies matches the ammount of repeating sequences
            # ammount_of_dummy_cols_Repeating_Sequence   = round(1000/self.frequencies_calculated[i][0])

            train_partial = add_columns_dummy(train_partial.copy(), ammount_of_dummy_cols_Repeating_Sequence, "Repeating_Sequence")
          if len(self.frequencies_calculated[i]) >= 2:
            # For when the ammont of dummies matches the ammount of repeating sequences            
            # ammount_of_dummy_cols_Repeating_Sequence_2 = round(1000/self.frequencies_calculated[i][1])

            train_partial = add_columns_dummy(train_partial.copy(), ammount_of_dummy_cols_Repeating_Sequence_2, "Repeating_Sequence_2")
          else: pass


          # Group by the 'sequence' column and apply rolling average
          #try 1st Repeating_Sequence_2 to keep with the shorter lenght period
          try:
            train_partial['seasonal_avg_2'] = train_partial.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_avg(x, 2))
            train_partial['seasonal_avg_2'] = train_partial['seasonal_avg_2'].shift(+1)
            train_partial['seasonal_avg_3'] = train_partial.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_avg(x, 3))
            train_partial['seasonal_avg_3'] = train_partial['seasonal_avg_3'].shift(+1)

            train_partial['seasonal_avg_diff'] = train_partial['seasonal_avg_2'] - train_partial['seasonal_avg_3']

            train_partial['seasonal_std_2'] = train_partial.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_std(x, 2))
            train_partial['seasonal_std_2'] = train_partial['seasonal_std_2'].shift(+1)
            train_partial['seasonal_std_3'] = train_partial.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_std(x, 3))
            train_partial['seasonal_std_3'] = train_partial['seasonal_std_3'].shift(+1)

            train_partial['seasonal_std_diff'] = train_partial['seasonal_std_2'] - train_partial['seasonal_std_3']

            train_partial['seasonal_min'] = train_partial.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_min(x, 3))
            train_partial['seasonal_min'] = train_partial['seasonal_min'].shift(+1)
            train_partial['seasonal_max'] = train_partial.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_max(x, 3))
            train_partial['seasonal_max'] = train_partial['seasonal_max'].shift(+1)

          except:
            try:
              train_partial['seasonal_avg_2'] = train_partial.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_avg(x, 2))
              train_partial['seasonal_avg_2'] = train_partial['seasonal_avg_2'].shift(+1)
              train_partial['seasonal_avg_3'] = train_partial.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_avg(x, 3))
              train_partial['seasonal_avg_3'] = train_partial['seasonal_avg_3'].shift(+1)

              train_partial['seasonal_avg_diff'] = train_partial['seasonal_avg_2'] - train_partial['seasonal_avg_3']
              
              train_partial['seasonal_std_2'] = train_partial.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_std(x, 2))
              train_partial['seasonal_std_2'] = train_partial['seasonal_std_2'].shift(+1)
              train_partial['seasonal_std_3'] = train_partial.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_std(x, 3))
              train_partial['seasonal_std_3'] = train_partial['seasonal_std_3'].shift(+1)

              train_partial['seasonal_std_diff'] = train_partial['seasonal_std_2'] - train_partial['seasonal_std_3']

              train_partial['seasonal_min'] = train_partial.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_min(x, 3))
              train_partial['seasonal_min'] = train_partial['seasonal_min'].shift(+1)
              train_partial['seasonal_max'] = train_partial.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_max(x, 3))
              train_partial['seasonal_max'] = train_partial['seasonal_max'].shift(+1)
            except: pass


          # remooving the rows containing nulls (from the rolling seasonal calculations)
          train_partial = train_partial.dropna()

          train_partial = reorder_df(train_partial)

          # Falta replicar para o test e nao esquecer de organizar a ordem das colunas antes de escrever para storage
          # falta resolver o problema dos nulls 
          # falta revisar!!!!

          ########list_test

          result = pd.concat([self.list_train[i], self.list_test[i]], ignore_index=True, sort=False)

          result['Last-observation'] = result['observation'].shift(+1)

          result['SMA-1'] = result.observation.rolling(window=20).mean()
          result['SMA-1'] = result['SMA-1'].shift(+1)

          result['EMA-1'] = result.observation.ewm(alpha=0.1, adjust=False).mean()
          result['EMA-2'] = result.observation.ewm(alpha=0.45, adjust=False).mean()
          result['EMA-3'] = result.observation.ewm(alpha=0.9, adjust=False).mean()

          result['EMA-1'] = result['EMA-1'].shift(+1)
          result['EMA-2'] = result['EMA-2'].shift(+1)
          result['EMA-3'] = result['EMA-3'].shift(+1)

          result['std-1'] = result.observation.rolling(window=20).std()
          result['std-1'] = result['std-1'].shift(+1)

          # Min - Max
          result["last_max"] = result.observation.rolling(window=20).max()
          result['last_max'] = result['last_max'].shift(+1)

          result["last_min"] = result.observation.rolling(window=20).min()
          result['last_min'] = result['last_min'].shift(+1)

          result = result.iloc[20:]
          result = result.copy() #--> added not to face warnings


          # result = result.iloc[20:]
          result["observation_detrended"] = result['Last-observation'] - result['SMA-1']

          result["observation_detrended_non_neg"] = result["observation_detrended"] - min(result.observation_detrended)+10


          # Identify columns with 'SI' in their names
          cols_to_drop = result.filter(like='SI').columns
          # Drop the identified columns
          result = result.drop(columns=cols_to_drop)

          if len(self.frequencies_calculated[i]) == 0:
            pass
          else:
            add_repeating_sequence_column(round(1000/self.frequencies_calculated[i][0]), result)

          if len(self.frequencies_calculated[i]) >= 2:
            add_repeating_sequence_column(round(1000/self.frequencies_calculated[i][1]),result, column_name="Repeating_Sequence_2")
          else:
            pass

          if len(self.frequencies_calculated[i]) == 0:
            pass
          else:
            # For when the ammont of dummies matches the ammount of repeating sequences
            # ammount_of_dummy_cols_Repeating_Sequence   = round(1000/self.frequencies_calculated[i][0])

            result = add_columns_dummy(result.copy(), ammount_of_dummy_cols_Repeating_Sequence, "Repeating_Sequence")
          if len(self.frequencies_calculated[i]) >= 2:
            # For when the ammont of dummies matches the ammount of repeating sequences
            # ammount_of_dummy_cols_Repeating_Sequence_2 = round(1000/self.frequencies_calculated[i][1])

            result = add_columns_dummy(result.copy(), ammount_of_dummy_cols_Repeating_Sequence, "Repeating_Sequence_2")
          else: pass

          # Group by the 'sequence' column and apply rolling average
          #try 1st Repeating_Sequence_2 to keep with the shorter lenght period
          try:
            result['seasonal_avg_2'] = result.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_avg(x, 2))
            result['seasonal_avg_2'] = result['seasonal_avg_2'].shift(+1)
            result['seasonal_avg_3'] = result.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_avg(x, 3))
            result['seasonal_avg_3'] = result['seasonal_avg_3'].shift(+1)

            result['seasonal_avg_diff'] = result['seasonal_avg_2'] - result['seasonal_avg_3']

            result['seasonal_std_2'] = result.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_std(x, 2))
            result['seasonal_std_2'] = result['seasonal_std_2'].shift(+1)
            result['seasonal_std_3'] = result.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_std(x, 3))
            result['seasonal_std_3'] = result['seasonal_std_3'].shift(+1)

            result['seasonal_std_diff'] = result['seasonal_std_2'] - result['seasonal_std_3']

            result['seasonal_min'] = result.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_min(x, 3))
            result['seasonal_min'] = result['seasonal_min'].shift(+1)
            result['seasonal_max'] = result.groupby('Repeating_Sequence_2')['observation'].apply(lambda x: compute_rolling_max(x, 3))
            result['seasonal_max'] = result['seasonal_max'].shift(+1)

          except:
            try:
              result['seasonal_avg_2'] = result.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_avg(x, 2))
              result['seasonal_avg_2'] = result['seasonal_avg_2'].shift(+1)
              result['seasonal_avg_3'] = result.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_avg(x, 3))
              result['seasonal_avg_3'] = result['seasonal_avg_3'].shift(+1)

              result['seasonal_avg_diff'] = result['seasonal_avg_2'] - result['seasonal_avg_3']

              result['seasonal_std_2'] = result.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_std(x, 2))
              result['seasonal_std_2'] = result['seasonal_std_2'].shift(+1)
              result['seasonal_std_3'] = result.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_std(x, 3))
              result['seasonal_std_3'] = result['seasonal_std_3'].shift(+1)

              result['seasonal_std_diff'] = result['seasonal_std_2'] - result['seasonal_std_3']

              result['seasonal_min'] = result.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_min(x, 3))
              result['seasonal_min'] = result['seasonal_min'].shift(+1)
              result['seasonal_max'] = result.groupby('Repeating_Sequence')['observation'].apply(lambda x: compute_rolling_max(x, 3))
              result['seasonal_max'] = result['seasonal_max'].shift(+1)
            except: pass

          self.list_test[i] = result.tail(1)
          # Identify columns with 'Repeating_Sequence' in their names
          repeating_sequence_columns = [col for col in self.list_test[i].columns if 'Repeating_Sequence' in col]
          non_repeating_sequence_columns = [col for col in self.list_test[i].columns if 'Repeating_Sequence' not in col]

          # Reorder columns
          new_column_order = non_repeating_sequence_columns + repeating_sequence_columns
          self.list_test[i] = self.list_test[i][new_column_order]

          # remooving unecessary columns
          train_partial = train_partial.drop('observation_detrended', axis=1)
          train_partial = train_partial.drop('observation_detrended_non_neg', axis=1)

          self.list_train[i] = train_partial

          self.list_test[i] = self.list_test[i].drop('observation_detrended', axis=1)
          self.list_test[i] = self.list_test[i].drop('observation_detrended_non_neg', axis=1)
        test_list = self.list_test.copy()
        train_list = self.list_train.copy()

        if TS_store_feature_eng:
          dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set_featured/',True)
          dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set_featured/',True)
          dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set_featured')
          dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set_featured')

          for i in range(len(test_list)):
            test_list[i].to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set_featured/window_{i}.csv")
            train_list[i].to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set_featured/window_{i}.csv")

      def feature_eng_no_season(self):

        if TS_store_feature_eng:
          dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set_featured_no_season/',True)
          dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set_featured_no_season/',True)
          dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set_featured_no_season')
          dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set_featured_no_season')

        for i in range(len(self.list_train)):

          self.list_train_no_season.append(self.list_train[i][["observation",	"index",	"Last-observation",	"SMA-1",	"EMA-1",	"EMA-2",	"EMA-3",	"std-1",	"last_max",	"last_min"]])

          self.list_test_no_season.append(self.list_test[i][["observation",	"index",	"Last-observation",	"SMA-1",	"EMA-1",	"EMA-2",	"EMA-3",	"std-1",	"last_max",	"last_min"]])

          if TS_store_feature_eng:          
            self.list_train_no_season[i].to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/train_set_featured_no_season/window_{i}.csv")
            self.list_test_no_season[i].to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{self.SerieNumber}{decorator}/test_set_featured_no_season/window_{i}.csv")

In [0]:
if not Modified_serie:
    traintest_df = pd.read_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/Input/input_{SerieNumber}.csv", index_col=0)
else:
    traintest_df = pd.read_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/Input/input_{SerieNumber}.csv", index_col=0)
    traintest_df["observation"] = traintest_df[Modified_serie[0]] + traintest_df[Modified_serie[1]]

# Feature engineer just the 1st Serie
traintest_df = traintest_df[["observation"]]
# traintest_df
serie_solv = timeseries_solver(traintest_df, SerieNumber)
serie_solv.expanding_split(1,40)
serie_solv.feature_eng()
serie_solv.feature_eng_no_season()

In [0]:
# Find the length of the longest list
max_length = max(len(lst) for lst in serie_solv.frequencies_calculated)

columns_freq = []
for i in range(max_length):
    columns_freq.append("f" + str(i+1))
freq_calculated = pd.DataFrame(serie_solv.frequencies_calculated, columns=columns_freq)
# freq_calculated

In [0]:
#Storing the CSVs
dbutils.fs.rm(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{SerieNumber}{decorator}/Feature_eng/',True)
dbutils.fs.mkdirs(f'/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{SerieNumber}{decorator}/Feature_eng')
freq_calculated.to_csv(f"/dbfs/mnt/automated_mounts_sas/0juliostoragetest/julio/master_data/SerieNumber_{SerieNumber}{decorator}/Feature_eng/freq_calculated.csv")
