In [1]:
# !pip install tsfresh

In [2]:
# Imports
import pandas as pd
from tsfresh import extract_features
import numpy as np

In [3]:
# Reading Libraries
danish_df = pd.read_csv('data\danish_atm_daily.csv')
mipt_df = pd.read_csv('data\mipt_alpha.csv')
nn5_df = pd.read_csv('data/nn5.csv')

In [4]:
# Feature extraction function
def feat_extr(df, column):
    
    '''
    Extracts features from data, then data presented as one row
    with each column presented single feature

    Args:
        df (Pandas Dataframe): Data from where extract feature
        column (str): Column from where extract feature
    '''
    # Create data for extraction
    df_extract = df.copy()
    # Add ID column for tsfresh correct work
    df_extract['id'] = 1
    # Exctract features
    extract_data = extract_features(pd.concat([df_extract['id'], df_extract[column]], axis=1), column_id="id", column_sort=None)
    return extract_data

In [5]:
def idx(df, df_path):
    
    '''
    Creating indexes for file

    Args:
        df (Pandas Dataframe): Data from where create index
        df_path (str): Path to output
    '''
    # Select for which data create index
    if df_path == 'tsfresh_fetures\With NaNs\danish_tsfresh.csv':
        column='danish_atm_daily_0'
    elif df_path == 'tsfresh_fetures\With NaNs\mipt_tsfresh.csv':
        column='mipt_alpha_0'
    elif df_path == 'tsfresh_fetures/With NaNs/nn5_tsfresh.csv':
        column='nn5_0'
    # Get features list
    extract_data = feat_extr(df, column)
    # Delete data name from feature names
    extract_data.columns = [col.replace(column + '__', '') for col in extract_data.columns] 
    return extract_data.columns

In [6]:
def transpose(df):
    '''
    Making column from the row

    Args:
        df (Pandas Dataframe): Data from where make column
    '''

    df_transposed = df.T
    df_transposed = df_transposed.reset_index()
    return df_transposed.drop('index', axis=1)

In [7]:
# Creating lists for cycles
datasets_list = [danish_df, mipt_df, nn5_df]
outputs_list_NaNs = ['tsfresh_fetures/With NaNs/danish_tsfresh.csv', 
                'tsfresh_fetures\With NaNs\mipt_tsfresh.csv',
                'tsfresh_fetures/With NaNs/nn5_tsfresh.csv']

In [8]:
# Getting features and saving them to files
for data, output in zip(datasets_list, outputs_list_NaNs):
    
    features_df = pd.DataFrame()
    for column in data.columns:
        
        features = feat_extr(data, column)
        transposed = transpose(features)
        features_df = pd.concat([features_df, transposed], ignore_index=True, axis=1)

    features_df.set_index(idx(data, output), inplace=True)
    features_df.columns = data.columns
    features_df.to_csv(output)

Feature Extraction:   0%|          | 0/1 [00:00<?, ?it/s]

Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.58s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.58s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.70s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.65s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.60s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.64s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.75s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.74s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.90s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.68s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.71s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.73s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.70s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.71s/it]
Feature Extraction: 100%|██████████| 1/1 [00:02<00:00,  2.65s/it]
Feature Ex

In [8]:
outputs_list_without_NaNs = ['tsfresh_fetures\Without NaNs\danish_tsfresh_no_NaNs.csv',
                             'tsfresh_fetures\Without NaNs\mipt_tsfresh_no_NaNs.csv',
                             'tsfresh_fetures/Without NaNs/nn_tsfresh_no_NaNs.csv']

In [9]:
# Deleting NaNs from tsfeatures
for name_NaN, name_no_NaN in zip(outputs_list_NaNs, outputs_list_without_NaNs):
    df_NaN = pd.read_csv(name_NaN)
    df_no_NaN = df_NaN.dropna()
    if np.sum(df_no_NaN.isna().sum()) != 0:
        raise OSError(f"NaNs weren't deleted in {name_NaN}")
    else:
        print(f"There are no NaNs in {name_NaN}")
        df_no_NaN.to_csv(name_no_NaN)


There are no NaNs in tsfresh_fetures/With NaNs/danish_tsfresh.csv
There are no NaNs in tsfresh_fetures\With NaNs\mipt_tsfresh.csv
There are no NaNs in tsfresh_fetures/With NaNs/nn5_tsfresh.csv


In [11]:
# Comparing NaNs and no NaNs
for i, j in zip(outputs_list_NaNs, outputs_list_without_NaNs):
    print(f'NaNs quantity before preprocess in {i}: {np.sum(pd.read_csv(i).isna().sum())}')
    print(f'Tsfreash features number before preprocessing in {i}: {pd.read_csv(i).shape[0]}')
    print(f'NaNs quantity after preprocess in {j}: {np.sum(pd.read_csv(j).isna().sum())}')
    print(f'Tsfreash features number before preprocessing in {j}: {pd.read_csv(j).shape[0]}')
    print('\n')


NaNs quantity before preprocess in tsfresh_fetures/With NaNs/danish_tsfresh.csv: 438
Tsfreash features number before preprocessing in tsfresh_fetures/With NaNs/danish_tsfresh.csv: 783
NaNs quantity after preprocess in tsfresh_fetures\Without NaNs\danish_tsfresh_no_NaNs.csv: 0
Tsfreash features number before preprocessing in tsfresh_fetures\Without NaNs\danish_tsfresh_no_NaNs.csv: 777


NaNs quantity before preprocess in tsfresh_fetures\With NaNs\mipt_tsfresh.csv: 2090
Tsfreash features number before preprocessing in tsfresh_fetures\With NaNs\mipt_tsfresh.csv: 783
NaNs quantity after preprocess in tsfresh_fetures\Without NaNs\mipt_tsfresh_no_NaNs.csv: 0
Tsfreash features number before preprocessing in tsfresh_fetures\Without NaNs\mipt_tsfresh_no_NaNs.csv: 777


NaNs quantity before preprocess in tsfresh_fetures/With NaNs/nn5_tsfresh.csv: 171
Tsfreash features number before preprocessing in tsfresh_fetures/With NaNs/nn5_tsfresh.csv: 783
NaNs quantity after preprocess in tsfresh_fetures/W