# Automatic extraction of time series features


Extract time series features of the Groundwater level observations based on the libraries [Tsfeatures](http://pkg.robjhyndman.com/tsfeatures/index.html) and [Tsfresh]( https://tsfresh.readthedocs.io/en/latest/)
 to rapidly obtain different parameters that allow us to understand and interpret the modeled results. 

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import pandas as pd
import geopandas as gpd
from tsfeatures import tsfeatures
from tsfeatures import acf_features
import numpy as np
from tsfresh import extract_features
from pathlib import Path

In [2]:
rpath="D:/Erasmus/Thesis/data"
patht="D:/Erasmus/Thesis/"
pathg=r'D:\Erasmus\Thesis\data\resultsCNN/'
pathfig=patht+"/Figures/"

In [3]:
#Read Pickle
GWfilldatamod=pd.read_pickle(rpath+"/Pickle/GWfilldatamod2.pkl")
gw_sel_int=gpd.read_file(rpath+"/SHP/SHP2/gw_sel_intmod3.shp")

In [4]:
#Load files
folder2 = Path(pathg+"wihtoutRH/")
df = pd.concat([pd.read_csv(fname) for fname in folder2.glob("summary_CNN_*.txt")])
dfcomb=pd.concat([df.reset_index().NSE, 
                  df.reset_index().r2,df.reset_index().Bias ], axis=1)

## Extract features available 

In [5]:
#List of available features except for the ones with constant values
ltsfeatures= ['hurst', 'series_length', 'unitroot_pp', 'unitroot_kpss',
       'hw_alpha', 'hw_beta', 'hw_gamma', 'stability', 'nperiods',
       'seasonal_period', 'trend', 'spike', 'linearity', 'curvature', 'e_acf1',
       'e_acf10', 'seasonal_strength', 'peak', 'trough', 'x_pacf5',
       'diff1x_pacf5', 'diff2x_pacf5', 'seas_pacf', 'nonlinearity',
       'lumpiness', 'alpha', 'beta', 'arch_acf', 'garch_acf', 'arch_r2',
       'garch_r2', 'flat_spots', 'entropy', 'crossing_points', 'arch_lm',
       'x_acf1', 'x_acf10', 'diff1_acf1', 'diff1_acf10', 'diff2_acf1',
       'diff2_acf10', 'seas_acf1']

#List of features with high-correlation and potential GWL influence
ltsfresh=['y__longest_strike_below_mean','y__longest_strike_above_mean',
           'y__fourier_entropy__bins_3',
           'y__sample_entropy', 'y__cid_ce__normalize_True', 
           'y__autocorrelation__lag_6', 
           'y__lempel_ziv_complexity__bins_3',
          'y__partial_autocorrelation__lag_3', 
          'y__number_peaks__n_1',
          'y__number_peaks__n_3', 'y__number_peaks__n_5',
           'y__number_peaks__n_10', 'y__approximate_entropy__m_2__r_0.5']


In [6]:
cou=0
for dfraw in GWfilldatamod.GW_NN:
    
    #well id as an integer to search in the gw_sel file--> shapefile witht the well locations
    wellid=int(dfraw.columns[-1].split("_")[-1])
    cou+=1
    indv=gw_sel_int[gw_sel_int.MEST_ID==wellid].index[0]
    
    dfg=dfraw[[dfraw.columns[0],dfraw.columns[-1]]]
    df=dfg.copy()
    df["unique_id"]=[dfraw.columns[-1]]*len(df)  #give the unique code as the library requested it 
    
    df.rename(columns={dfraw.columns[0]: "ds", dfraw.columns[-1]: "y"}, inplace=True)
    feat=tsfeatures(df, freq=30)
    
    df2=dfg.copy()
    df2["id"]=[dfraw.columns[-1]]*len(df2)  #give the unique code as the library requested it 
    df2.rename(columns={dfraw.columns[0]: "time", dfraw.columns[-1]: "y"}, inplace=True)
    feat2=extract_features(df2, column_id="id", column_sort="time")
    
    for c in ltsfeatures:
        gw_sel_int.at[indv,c]=feat[c][0] 
    for c2 in ltsfresh:
        gw_sel_int.at[indv,c2]=feat2[c2][0] 

    

Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.50s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.31s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.77s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.03s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.63s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.40s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.01s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.93s/it]
Feature Extraction: 100%|███████████████

In [7]:
gw_sel_int.to_file(rpath+"/SHP/SHP2/gw_sel_TS.shp")