# Packages

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import pandas as pd
import pickle
import tensorflow as tf
import seaborn as sns
import scipy as sp

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold, TimeSeriesSplit, train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from tsfresh.feature_selection.selection import select_features

from tsfresh.utilities.dataframe_functions import roll_time_series
from tsfresh import extract_features, select_features,extract_relevant_features
from tsfresh.utilities.dataframe_functions import impute,make_forecasting_frame
from tsfresh.feature_extraction import settings, EfficientFCParameters,MinimalFCParameters 

from tqdm import tqdm

In [3]:
from utils.model_summary_functions import feature_importance, metrics, summary_plot,multivariate_importance
from utils.plotting import plot_autocorrelation

In [4]:
import warnings # Serve para ignorar mensagens de alerta que aparecem 
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
plt.style.use('default')

plt.rcParams.update({
    'font.size': 16,
    'axes.linewidth': 2,
    'axes.titlesize': 20,
    'axes.edgecolor': 'black',
    'axes.labelsize': 20,
    'axes.grid': True,
    'lines.linewidth': 1.5,
    'lines.markersize': 6,
    'figure.figsize': (15, 6),
    'xtick.labelsize': 14,
    'ytick.labelsize': 14,
    'font.family': 'Arial',
    'legend.fontsize': 13,
    'legend.framealpha': 1,
    'legend.edgecolor': 'black',
    'legend.shadow': False,
    'legend.fancybox': True,
    'legend.frameon': True,
    'text.color': 'black',
    'axes.labelcolor': 'black',
})


In [6]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Importing the reduced dataset

In [7]:
df = pd.read_csv('Processed_datasets/reduced_dataset.csv.zip')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,83,84,85,86,87,88,89,90,91,92
0,5.472871,1.217982,6.230086,-0.950462,0.863161,-1.352218,4.725135,-4.429206,1.363932,-2.402084,...,0.042324,-0.628976,0.303736,1.050924,-0.319772,0.452098,-0.454401,-0.395371,-0.434333,-0.886681
1,5.456736,1.030217,6.275587,-0.838791,0.975329,-1.566040,4.830630,-4.338888,1.385022,-2.258257,...,0.149707,-0.568629,0.224101,0.962598,-0.370399,0.411446,-0.487105,-0.412622,-0.438104,-0.947290
2,5.456897,0.833749,6.308572,-0.730643,1.086719,-1.799117,4.927997,-4.225563,1.400620,-2.127380,...,0.244525,-0.515302,0.145613,0.862979,-0.416856,0.365906,-0.515017,-0.417493,-0.444786,-1.012555
3,5.472629,0.629589,6.327787,-0.628478,1.196396,-2.050700,5.015677,-4.089995,1.407393,-2.009958,...,0.322812,-0.469722,0.069574,0.750927,-0.458162,0.317323,-0.536380,-0.407113,-0.455174,-1.079685
4,5.501610,0.419481,6.331751,-0.534753,1.303558,-2.319721,5.092144,-3.933463,1.401821,-1.906113,...,0.380772,-0.431966,-0.002469,0.625161,-0.493436,0.267659,-0.549715,-0.378990,-0.469812,-1.145273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,-4.149586,1.311779,-1.733645,2.995828,-9.173312,2.279188,-0.460554,2.574104,1.066336,-3.507364,...,-0.038440,-0.516034,-0.068059,-1.804518,0.456283,0.363398,0.108329,-0.124854,-0.227933,0.457089
5996,-3.869611,1.194403,-1.635748,3.000731,-9.144402,2.275107,-0.266543,2.343572,1.135160,-3.420423,...,-0.124407,-0.503931,-0.087439,-1.899951,0.439299,0.353236,0.017841,-0.117174,-0.337113,0.505647
5997,-3.593313,1.100201,-1.565852,3.003578,-9.102322,2.267642,-0.078799,2.113092,1.207276,-3.346010,...,-0.207155,-0.505051,-0.082344,-1.931616,0.433744,0.329172,-0.068791,-0.108082,-0.390954,0.545995
5998,-3.312633,1.021631,-1.519267,3.001866,-9.045092,2.256996,0.095221,1.893864,1.292517,-3.293154,...,-0.283526,-0.518721,-0.052140,-1.902229,0.440545,0.296793,-0.149900,-0.107087,-0.395280,0.572288


In [8]:
eigenvector = 90
series = pd.DataFrame(df[df.columns[eigenvector]])  # Grabbing one of the 92 eigenvectors
series

Unnamed: 0,90
0,-0.395371
1,-0.412622
2,-0.417493
3,-0.407113
4,-0.378990
...,...
5995,-0.124854
5996,-0.117174
5997,-0.108082
5998,-0.107087


# Feature extraction - Tsfresh

In [9]:
# With the following function we can select highly correlated features
# it will remove the first feature that is correlated with any other feature

def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            # we are interested in absolute coeff value
            if abs(corr_matrix.iloc[i, j]) > threshold:
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

In [10]:
def create_input_dataset(eigenvector,MEMORY=20,HORIZON=1):
     
    # Extract an eigenvector from the dataframe
    time_series = eigenvector[eigenvector.columns[0]]
        
    # Create input dataset and target variable (1 step ahead)
    df_shift, _ = make_forecasting_frame(time_series,
                                          kind='Eigenvector',
                                          max_timeshift=MEMORY,
                                          rolling_direction=1)

    
    print('Feature extraction started')
        
    # Feature extraction using tsfresh
    X = extract_features(df_shift,
                             column_id="id",
                             column_sort="time",
                             column_value="value",
                             impute_function=impute,
                             n_jobs=6,
                             show_warnings=False,
                             disable_progressbar=True)
        
    print('Feature extraction completed')
        
    # Remove constant features
    X = X.loc[:, X.apply(pd.Series.nunique) != 1] 
        
    # Remove redundant features using univariate correlation
    corr_features = correlation(X, 0.95) 
    X_filtered = X.drop(corr_features, axis=1)
    X_selected = X_filtered.reset_index(level=0, drop=True)
    
    # Create dataset - Multistep ahead forecasting instead of one step ahead
    y = time_series.shift(-HORIZON)
    y.dropna(inplace=True,axis=0)

    X = X_selected[X_selected.index.isin(y.index)]
    y = y[y.index.isin(X_selected.index)]
    
    y = np.ravel(y)
    print(X.shape)
    
    
    # Remove features from statistical sets
    # X_selected = select_features(X,y,n_jobs=6)
        
    # Multivariate correlation
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0,shuffle=False,test_size=0.15)
    # multi_select = multivariate_importance(X_train, X_test, y_train, y_test)
    # multi_select.plot(relative=False,topn=4)
    # plt.show()
    
    # Train model
    mdl = GradientBoostingRegressor(n_estimators=100, random_state=0)
    mdl.fit(X_train, y_train)
    yhat = mdl.predict(X_test)
    memory_list_before_fs.append(mean_squared_error(y_test, yhat))

    
    # Multivariate feature selection
    selector = SelectFromModel(estimator=mdl, threshold='6*mean')
    selector.fit(X_train, y_train)

    selected_feat = X_train.columns[(selector.get_support())]
    X_final = X[selected_feat]
    print(X_final.shape)
    
    X_train, X_test, y_train, y_test = train_test_split(X_final, y, random_state=0, shuffle=False, test_size=0.15)
    mdl.fit(X_train, y_train)
    yhat = mdl.predict(X_test)
    memory_list_after_fs.append(mean_squared_error(y_test, yhat))
    
    
    df_final = pd.concat([X_final, pd.DataFrame(y)], axis=1)
    df_final.dropna(inplace=True, axis=0)

    print('Feature selection completed')
    
        

    metrics(yhat,y_test)
    print('Function has finished')
        
    return X,y


In [11]:
# memory_list_before_fs = []
# memory_list_after_fs = []


# rabe = np.arange(2, 14, 1)
# for memory in tqdm(rabe):
#     X, y = create_input_dataset(series, MEMORY=memory,HORIZON=15)

In [12]:
# plt.plot(rabe,memory_list_before_fs,label='Before')
# plt.plot(rabe, memory_list_after_fs,label='After')
# plt.legend()

In [13]:
HORIZON = 16
MLE = 0.094
dt = 0.25

print('This dataset forecasts {} Lyapunov times into the future'.format(HORIZON*dt*MLE))

This dataset forecasts 0.376 Lyapunov times into the future


# Generating dataset for all time series

In [14]:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from tsfresh import extract_features
from sklearn.model_selection import cross_val_score

In [15]:
def create_dataset_for_each_eigenvector(df,MEMORY,HORIZON):
    
    eigenvectors = np.arange(0,93,1)
    
    for i in tqdm(eigenvectors):
        
        # Extract an eigenvector from the dataframe
        time_series = pd.DataFrame(df[df.columns[i]]) 
        
        # Create input dataset and target variable
        df_shift, _ = make_forecasting_frame(time_series[time_series.columns[0]],
                                          kind='Eigenvector',
                                             max_timeshift=MEMORY,
                                          rolling_direction=1)
        
        print('Feature extraction started')
        
        # Feature extraction using tsfresh
        X = extract_features(df_shift,
                             column_id="id",
                             column_sort="time",
                             column_value="value",
                             impute_function=impute,
                             n_jobs=6,
                             show_warnings=False,
                             disable_progressbar=True)
        
        print('Feature extraction completed')
        
        # Remove constant features
        X = X.loc[:, X.apply(pd.Series.nunique) != 1] 
        
        # Remove redundant features using univariate correlation
        corr_features = correlation(X, 0.95) 
        X_filtered = X.drop(corr_features, axis=1)
        X_filtered = X_filtered.reset_index(level=0, drop=True)
        
        # Create dataset - Multistep ahead forecasting instead of one step ahead
        y = time_series.shift(-HORIZON)
        y.dropna(inplace=True, axis=0)

        X = X_filtered[X_filtered.index.isin(y.index)]
        y = y[y.index.isin(X_filtered.index)]

        y = np.ravel(y)
        
        print(X.shape)
        
        # Remove features from statistical sets
        X_selected = select_features(X, y, n_jobs=6)

        # Training a baseline model
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, shuffle=False, test_size=0.2)
        extra_trees = GradientBoostingRegressor(n_estimators=100, random_state=0)
        extra_trees.fit(X_train, y_train)
        
        # Multivariate feature selection
        selector = SelectFromModel(estimator=extra_trees, threshold='8*mean')
        selector.fit(X_train, y_train)
        
        print('Feature selection completed')
        
        selected_feat = X_train.columns[(selector.get_support())]
        X_final = X[selected_feat]
        df_final = pd.concat([X_final, pd.DataFrame(y)], axis=1)
        df_final.dropna(inplace=True, axis=0)
        
        print(X_final.shape)
        
        # Dataset after feature selection
        #df_final = pd.concat([X_selected, pd.DataFrame(y)], axis=1)
        
        
        # Print results from cross validation
        # cv = TimeSeriesSplit(n_splits=3)
        # scores = cross_val_score(extra_trees, X_final, y, scoring='r2', cv=cv)
        # print('Eigenvector '+str(i), 'R2 score: ',str(scores.mean()), '+-', str(scores.std()))
        
        # Export dataset
        #df_final.to_csv('dataframe_time_series/df_eigenvector_{0}__r2_cv_score_{1}.csv'.format(i, round(scores.mean(),4)))
        df_final.to_csv('dataframe_time_series/df_eigenvector_{0}.csv'.format(i))


In [17]:
create_dataset_for_each_eigenvector(df,MEMORY=4,HORIZON=12)

Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.90it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


  1%|          | 1/93 [01:22<2:06:22, 82.41s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:09<00:00,  3.23it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


  2%|▏         | 2/93 [02:52<2:11:26, 86.66s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:09<00:00,  3.12it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


  3%|▎         | 3/93 [04:22<2:12:32, 88.36s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.87it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


  4%|▍         | 4/93 [05:49<2:10:35, 88.04s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.30it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


  5%|▌         | 5/93 [07:28<2:14:34, 91.76s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.93it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


  6%|▋         | 6/93 [09:00<2:13:11, 91.85s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.73it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


  8%|▊         | 7/93 [10:31<2:11:12, 91.54s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.85it/s]


Feature extraction started
Feature extraction completed
(5987, 79)


  9%|▊         | 8/93 [12:00<2:08:36, 90.79s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.83it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 10%|▉         | 9/93 [13:33<2:08:13, 91.59s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.45it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 11%|█         | 10/93 [15:09<2:08:24, 92.82s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.79it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 12%|█▏        | 11/93 [16:44<2:07:47, 93.51s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.81it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 13%|█▎        | 12/93 [18:24<2:08:55, 95.50s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.74it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 14%|█▍        | 13/93 [20:13<2:12:58, 99.73s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.32it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 15%|█▌        | 14/93 [22:02<2:14:46, 102.36s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.26it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 16%|█▌        | 15/93 [23:51<2:15:37, 104.33s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.29it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 17%|█▋        | 16/93 [25:33<2:13:12, 103.80s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.70it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 18%|█▊        | 17/93 [27:12<2:09:20, 102.12s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.64it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 19%|█▉        | 18/93 [28:52<2:06:53, 101.52s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 20%|██        | 19/93 [30:29<2:03:37, 100.23s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.32it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 22%|██▏       | 20/93 [32:16<2:04:25, 102.27s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.29it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 23%|██▎       | 21/93 [34:12<2:07:50, 106.53s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.24it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 24%|██▎       | 22/93 [36:11<2:10:15, 110.08s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 25%|██▍       | 23/93 [38:09<2:11:25, 112.65s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s]


Feature extraction started
Feature extraction completed
(5987, 82)


 26%|██▌       | 24/93 [40:16<2:14:22, 116.85s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.21it/s]


Feature extraction started
Feature extraction completed
(5987, 80)


 27%|██▋       | 25/93 [42:16<2:13:33, 117.85s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.17it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 28%|██▊       | 26/93 [44:15<2:11:54, 118.12s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.17it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 29%|██▉       | 27/93 [46:14<2:10:12, 118.37s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.17it/s]


Feature extraction started
Feature extraction completed
(5987, 82)


 30%|███       | 28/93 [48:12<2:08:14, 118.37s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.22it/s]


Feature extraction started
Feature extraction completed
(5987, 82)


 31%|███       | 29/93 [50:10<2:06:02, 118.16s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.17it/s]


Feature extraction started
Feature extraction completed
(5987, 81)


 32%|███▏      | 30/93 [52:12<2:05:16, 119.30s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:14<00:00,  2.14it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 33%|███▎      | 31/93 [54:32<2:09:50, 125.65s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.40it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 34%|███▍      | 32/93 [56:28<2:04:38, 122.60s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.45it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 35%|███▌      | 33/93 [58:18<1:58:51, 118.86s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.23it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 37%|███▋      | 34/93 [1:00:11<1:55:14, 117.20s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.24it/s]


Feature extraction started
Feature extraction completed
(5987, 82)


 38%|███▊      | 35/93 [1:01:58<1:50:05, 113.88s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.55it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 39%|███▊      | 36/93 [1:03:50<1:47:52, 113.55s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 40%|███▉      | 37/93 [1:05:37<1:44:07, 111.56s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.31it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 41%|████      | 38/93 [1:07:25<1:41:14, 110.44s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.62it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 42%|████▏     | 39/93 [1:09:05<1:36:41, 107.44s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.68it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 43%|████▎     | 40/93 [1:10:45<1:32:48, 105.06s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 44%|████▍     | 41/93 [1:12:26<1:29:57, 103.79s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.56it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 45%|████▌     | 42/93 [1:14:07<1:27:40, 103.15s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.59it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 46%|████▌     | 43/93 [1:15:52<1:26:11, 103.43s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.62it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 47%|████▋     | 44/93 [1:17:31<1:23:30, 102.25s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.66it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 48%|████▊     | 45/93 [1:19:11<1:21:17, 101.62s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.59it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 49%|████▉     | 46/93 [1:20:53<1:19:43, 101.78s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 51%|█████     | 47/93 [1:22:33<1:17:31, 101.12s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.26it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 52%|█████▏    | 48/93 [1:24:18<1:16:43, 102.30s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 53%|█████▎    | 49/93 [1:26:03<1:15:35, 103.09s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.36it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 54%|█████▍    | 50/93 [1:27:54<1:15:35, 105.48s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.44it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 55%|█████▍    | 51/93 [1:29:41<1:14:10, 105.97s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.36it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 56%|█████▌    | 52/93 [1:31:29<1:12:54, 106.69s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.43it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 57%|█████▋    | 53/93 [1:33:19<1:11:44, 107.62s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.34it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 58%|█████▊    | 54/93 [1:35:05<1:09:38, 107.14s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.49it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 59%|█████▉    | 55/93 [1:36:50<1:07:25, 106.45s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.56it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 60%|██████    | 56/93 [1:38:36<1:05:30, 106.22s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.53it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 61%|██████▏   | 57/93 [1:40:21<1:03:30, 105.86s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.54it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 62%|██████▏   | 58/93 [1:42:02<1:00:56, 104.48s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.60it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 63%|██████▎   | 59/93 [1:43:43<58:36, 103.43s/it]  

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.50it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 65%|██████▍   | 60/93 [1:45:24<56:31, 102.78s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 66%|██████▌   | 61/93 [1:47:06<54:36, 102.40s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.40it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 67%|██████▋   | 62/93 [1:48:50<53:07, 102.82s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.50it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 68%|██████▊   | 63/93 [1:50:29<50:54, 101.83s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.59it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 69%|██████▉   | 64/93 [1:52:16<49:54, 103.26s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.54it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 70%|██████▉   | 65/93 [1:54:04<48:54, 104.80s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 71%|███████   | 66/93 [1:55:46<46:43, 103.85s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.58it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 72%|███████▏  | 67/93 [1:57:26<44:33, 102.85s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.66it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 73%|███████▎  | 68/93 [1:59:08<42:43, 102.54s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.68it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 74%|███████▍  | 69/93 [2:00:45<40:22, 100.93s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.63it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 75%|███████▌  | 70/93 [2:02:21<38:07, 99.46s/it] 

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:10<00:00,  2.74it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 76%|███████▋  | 71/93 [2:04:01<36:32, 99.68s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.25it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 77%|███████▋  | 72/93 [2:05:58<36:37, 104.64s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.36it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 78%|███████▊  | 73/93 [2:07:53<35:58, 107.92s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.26it/s]


Feature extraction started
Feature extraction completed
(5987, 83)


 80%|███████▉  | 74/93 [2:09:52<35:12, 111.19s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:14<00:00,  2.06it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 81%|████████  | 75/93 [2:11:55<34:22, 114.57s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.16it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 82%|████████▏ | 76/93 [2:13:56<33:02, 116.59s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:14<00:00,  2.06it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 83%|████████▎ | 77/93 [2:16:04<32:00, 120.04s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:14<00:00,  2.13it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 84%|████████▍ | 78/93 [2:18:08<30:17, 121.16s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.35it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 85%|████████▍ | 79/93 [2:20:05<28:00, 120.01s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:14<00:00,  2.10it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 86%|████████▌ | 80/93 [2:22:10<26:18, 121.45s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.39it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 87%|████████▋ | 81/93 [2:24:03<23:45, 118.83s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.54it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 88%|████████▊ | 82/93 [2:25:50<21:10, 115.47s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.43it/s]


Feature extraction started
Feature extraction completed
(5987, 84)


 89%|████████▉ | 83/93 [2:27:35<18:43, 112.35s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.47it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 90%|█████████ | 84/93 [2:29:20<16:29, 110.00s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.41it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 91%|█████████▏| 85/93 [2:31:05<14:28, 108.54s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.50it/s]


Feature extraction started
Feature extraction completed
(5987, 85)


 92%|█████████▏| 86/93 [2:32:49<12:29, 107.11s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.48it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 94%|█████████▎| 87/93 [2:34:32<10:36, 106.06s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.39it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 95%|█████████▍| 88/93 [2:36:18<08:49, 105.88s/it]

Feature selection completed
(5987, 4)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.43it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


 96%|█████████▌| 89/93 [2:38:02<07:01, 105.41s/it]

Feature selection completed
(5987, 5)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.47it/s]


Feature extraction started
Feature extraction completed
(5987, 82)


 97%|█████████▋| 90/93 [2:39:47<05:15, 105.30s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:12<00:00,  2.39it/s]


Feature extraction started
Feature extraction completed
(5987, 86)


 98%|█████████▊| 91/93 [2:41:36<03:33, 106.50s/it]

Feature selection completed
(5987, 2)


Rolling: 100%|██████████| 30/30 [00:13<00:00,  2.20it/s]


Feature extraction started
Feature extraction completed
(5987, 88)


 99%|█████████▉| 92/93 [2:43:28<01:47, 107.90s/it]

Feature selection completed
(5987, 3)


Rolling: 100%|██████████| 30/30 [00:11<00:00,  2.55it/s]


Feature extraction started
Feature extraction completed
(5987, 87)


100%|██████████| 93/93 [2:45:14<00:00, 106.61s/it]

Feature selection completed
(5987, 2)





Now that we have save the dataframe for all eigenvectors, the next notebook will use envolve finding the best model and hyperparameter turning