In [1]:
# !pip install tsfeatures

In [23]:
import pandas as pd
import tsfeatures as tf
from tqdm.notebook import tqdm
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [3]:
# Setting input and output paths for convinient cycle iterations
input_list = [ 'data\danish_atm_daily.csv',
                'data\mipt_alpha.csv',
                'data/nn5.csv']
output_list = ['manual_features\danish_manual_features.csv',
               'manual_features\mipt_manual_features.csv',
               'manual_features/nn_manual_features.csv']

In [4]:
def features_extr(df, column):
    
    '''
    Extracting features

    Args:
        df (pd.DataFrame): DataFrame from where extract features
        column (str): Column wich correspond one time series
    '''
    
    feat_list = [
        tf.holt_parameters(df[column].values),
        tf.count_entropy(df[column].values),
        tf.crossing_points(df[column].values),
        tf.entropy(df[column].values),
        tf.flat_spots(df[column].values),
        tf.frequency(df[column].values),
        tf.heterogeneity(df[column].values),
        tf.guerrero(df[column].values),
        tf.hurst(df[column].values),
        tf.hw_parameters(df[column].values),
        tf.intervals(df[column].values),
        tf.lumpiness(df[column].values),
        tf.acf_features(df[column].values),
        tf.arch_stat(df[column].values),
        tf.pacf_features(df[column].values),
        tf.sparsity(df[column].values),
        tf.stability(df[column].values),
        tf.stl_features(df[column].values),
        tf.unitroot_kpss(df[column].values),
        tf.unitroot_pp(df[column].values)
    ]
    return feat_list    

In [7]:
for inp, out in tqdm(zip(input_list, output_list)):
    
    # Read data
    df = pd.read_csv(inp)
    
    # Initialize an empty DataFrame to accumulate the results.
    df_out = pd.DataFrame()

    # Iterate over each column in the original DataFrame.
    for column in tqdm(df.columns):
               
        # Extract features for the current column.
        extracted_features = features_extr(df, column)
        
        # Combine all extracted feature dictionaries into a single dictionary.
        combined_features = {}
        for feature_dict in extracted_features:
            combined_features.update(feature_dict)
        
        # Create a DataFrame from the combined features.
        temp_df = pd.DataFrame.from_dict(combined_features, orient='index', columns=[column])
        
        # Concatenate temp_df to df_out horizontally.
        df_out = pd.concat([df_out, temp_df], axis=1, join='outer')
    
    # Drop all NaNs
    df_out.dropna(inplace=True)
    
    # Save to file
    df_out.to_csv(out)


0it [00:00, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

  0%|          | 0/111 [00:00<?, ?it/s]

In [20]:
# Printing features
for i in output_list:
    df = pd.read_csv(i, index_col=0)
    print(i)
    print(df.index.values.tolist())
    print('\n')

manual_features\danish_manual_features.csv
['alpha', 'beta', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'intervals_mean', 'lumpiness', 'arch_lm', 'sparsity', 'stability', 'nperiods', 'seasonal_period', 'trend', 'spike', 'linearity', 'curvature']


manual_features\mipt_manual_features.csv
['alpha', 'beta', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'hurst', 'intervals_mean', 'lumpiness', 'arch_lm', 'sparsity', 'stability', 'nperiods', 'seasonal_period', 'trend', 'spike', 'linearity', 'curvature']


manual_features/nn_manual_features.csv
['alpha', 'beta', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'hurst', 'intervals_mean', 'intervals_sd', 'lumpiness', 'x_acf1', 'x_acf10', 'diff1_acf1', 'diff1_acf10', 'diff2_acf1', 'diff2_acf10', 'arch_lm', 'x_pacf5', 'diff1x_pacf5', 'diff2x_pacf5', 'sparsity', 'stability', 'nperiods', 'seasonal_period', 'trend', 'spike', 'linearity', 'curvature', 'e_acf1', 'e_ac

In [22]:
# Transposing data
for i in output_list:
    df = pd.read_csv(i, index_col=0)
    df = df.T
    df.to_csv(i)

In [26]:
# Checking how data look
for i in output_list:
    df = pd.read_csv(i, index_col=0)
    display(df.head(2))

Unnamed: 0,alpha,beta,count_entropy,crossing_points,entropy,flat_spots,frequency,intervals_mean,lumpiness,arch_lm,sparsity,stability,nperiods,seasonal_period,trend,spike,linearity,curvature
danish_atm_daily_0,0.141939,0.005905,-171289.625528,115.0,0.832942,6.0,1.0,59.4,0.003311,0.271016,0.02623,0.00892,0.0,1.0,0.04893,2.149307e-07,-0.132583,0.238538
danish_atm_daily_1,0.138975,0.017725,-186506.124217,67.0,0.838796,108.0,1.0,100.666667,0.001346,0.000178,0.009836,0.001621,0.0,1.0,0.050821,8.532189e-08,0.174946,-0.168956


Unnamed: 0,alpha,beta,count_entropy,crossing_points,entropy,flat_spots,frequency,hurst,intervals_mean,lumpiness,arch_lm,sparsity,stability,nperiods,seasonal_period,trend,spike,linearity,curvature
mipt_alpha_0,0.146429,0.036607,-33443390000.0,255.0,0.750951,5.0,1.0,0.787643,671.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,2.206568e-15,1.637579e-15
mipt_alpha_1,0.217143,0.032571,-15727630000.0,162.0,0.578434,176.0,1.0,1.088826,120.75,0.002706,0.950701,0.280179,0.190769,0.0,1.0,0.877022,2.298643e-08,5.339229,0.6874725


Unnamed: 0,alpha,beta,count_entropy,crossing_points,entropy,flat_spots,frequency,hurst,intervals_mean,intervals_sd,...,nperiods,seasonal_period,trend,spike,linearity,curvature,e_acf1,e_acf10,unitroot_kpss,unitroot_pp
nn5_0,1.490116e-08,5.120002e-12,-68475.870483,229.0,0.705705,5.0,1.0,0.865866,37.368421,37.987148,...,0.0,1.0,0.008541,4.548068e-08,0.069356,0.092808,0.109844,0.042532,0.073708,-635.596768
nn5_1,1.490116e-08,2.202271e-11,-36241.351149,269.0,0.772642,7.0,1.0,0.634561,44.4375,36.02956,...,0.0,1.0,0.013273,4.319773e-08,0.012371,0.073767,0.218461,0.063091,0.128528,-543.997661


In [27]:
# Cheking for NaNs
for i in output_list:
    print(f'NaNs quantity in {i}: {np.sum(pd.read_csv(i).isna().sum())}')
    print('\n')

NaNs quantity in manual_features\danish_manual_features.csv: 0


NaNs quantity in manual_features\mipt_manual_features.csv: 0


NaNs quantity in manual_features/nn_manual_features.csv: 0


