In [1]:
import numpy as np
import gdal
import pandas as pd
import pickle
import os

from tsfresh import extract_features
from tsfresh.utilities.distribution import MultiprocessingDistributor
from tsfresh.feature_selection.relevance import calculate_relevance_table as crt
from tsraster.prep import sRead


#from tsraster.calculate import calculateFeatures

In [51]:
#
from tsfresh.feature_extraction.feature_calculators import quantile
from tsfresh.feature_extraction import ComprehensiveFCParameters

settings=ComprehensiveFCParameters()
settings

{'variance_larger_than_standard_deviation': None,
 'has_duplicate_max': None,
 'has_duplicate_min': None,
 'has_duplicate': None,
 'sum_values': None,
 'abs_energy': None,
 'mean_abs_change': None,
 'mean_change': None,
 'mean_second_derivative_central': None,
 'median': None,
 'mean': None,
 'length': None,
 'standard_deviation': None,
 'variance': None,
 'skewness': None,
 'kurtosis': None,
 'absolute_sum_of_changes': None,
 'longest_strike_below_mean': None,
 'longest_strike_above_mean': None,
 'count_above_mean': None,
 'count_below_mean': None,
 'last_location_of_maximum': None,
 'first_location_of_maximum': None,
 'last_location_of_minimum': None,
 'first_location_of_minimum': None,
 'percentage_of_reoccurring_datapoints_to_all_datapoints': None,
 'percentage_of_reoccurring_values_to_all_values': None,
 'sum_of_reoccurring_values': None,
 'sum_of_reoccurring_data_points': None,
 'ratio_value_number_to_time_series_length': None,
 'sample_entropy': None,
 'maximum': None,
 'minimum

In [16]:
temp_path = "/Users/adbe/mmann/img/inputs/temperature/"

In [30]:

def calculateFeatures(path,parameters, reset_df):
    '''
    calculateFeatures literally calculate features

    :param path: reads the dataframe created with ts_series
    Distributor is a tsfresh feature for parallel processing
    fc_parameters is a dictionary containin the features to be extracted
    :param reset_df should a new version of my_df be generated otherwise
    read from saved object pickle
    :return: a dataframe with features
    '''

    if reset_df == False:
    #if reset_df =F read in pickle file holding saved version of my_df
        with open(os.path.join(path,'my_df.pkl'), 'rb') as input:
            my_df = pickle.load(input)
    else:
    #if reset_df =T calculate ts_series and save pickle
        my_df = sRead.image2series(path)

        with open(os.path.join(path,'my_df.pkl'), 'wb') as output:
            my_df = sRead.image2series(path)
            pickle.dump(my_df, output, pickle.HIGHEST_PROTOCOL)
        print(os.path.join(path,'my_df.pkl'))

    
    Distributor = MultiprocessingDistributor(n_workers=10,
                                             disable_progressbar=False,
                                             progressbar_title="Feature Extraction")

    #select features to be extracted
    #Example: No parameters:  "maximum": None
    # "agg_linear_trend": [{"attr": 'slope', "chunk_len": 3, "f_agg": "min"}] # for one set of args
    #"large_standard_deviation": [{"r": 0.05}, {"r": 0.1}] to run with two sets of parameters
    # parameters found : https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html 
    
    extracted_features = extract_features(my_df,
                                          default_fc_parameters=parameters,
                                          column_sort="time",
                                          column_value="value",
                                          column_id="id",
                                          distributor=Distributor)

    kr = pd.DataFrame(list(extracted_features.columns))
    kr.index += 1
    kr.to_csv("features_names.csv")
    return extracted_features

In [52]:
parameters_group_1 = {
    "mean": None,
    "maximum": None,
    "median":None,
    "minimum":None,
    "quantile":[{"q": 0.15},{"q": 0.05},{"q": 0.85},{"q": 0.95}],
    "ratio_beyond_r_sigma":[{"r": 2},{"r": 3}],
    "skewness":None,
    "sum_values":None        
}

In [44]:
parameters_group_2 = {
    "mean": None,
    "maximum": None,
    "median":None,
    "minimum":None,
    "agg_linear_trend": [{"attr": 'slope', "chunk_len": 6, "f_agg": "min"},{"attr": 'slope', "chunk_len": 6, "f_agg": "max"}],
    "last_location_of_maximum:":None,
    "last_location_of_maximum:":None,
    "last_location_of_minimum:":None,
    "longest_strike_above_mean:":None,
    "longest_strike_below_mean:":None,
    "mean_abs_change:":None,
    "mean_change:":None,
    "number_cwt_peaks:":[{"n": 6},{"n": 12}],
    "quantile:":[{"q": 0.15},{"q": 0.05},{"q": 0.85},{"q": 0.95}],
    "ratio_beyond_r_sigma:":[{"r": 2},{"r": 3}],
    "skewness:":None,
    "sum_values:":None        
}



In [45]:
path = "/Users/adbe/mmann/img/inputs/temperature/"

In [53]:
group_1_results = calculateFeatures(path, parameters_group_1, reset_df=True)

/Users/adbe/mmann/img/inputs/temperature/my_df.pkl





Feature Extraction:   0%|          | 0/50 [00:00<?, ?it/s][A[A[A


Feature Extraction:   2%|▏         | 1/50 [02:10<1:46:11, 130.03s/it][A[A[A


Feature Extraction:   4%|▍         | 2/50 [02:14<1:13:50, 92.30s/it] [A[A[A


Feature Extraction:   6%|▌         | 3/50 [02:15<50:50, 64.91s/it]  [A[A[A


Feature Extraction:   8%|▊         | 4/50 [02:18<35:33, 46.38s/it][A[A[A


Feature Extraction:  10%|█         | 5/50 [02:20<24:46, 33.04s/it][A[A[A


Feature Extraction:  12%|█▏        | 6/50 [02:23<17:33, 23.94s/it][A[A[A


Feature Extraction:  14%|█▍        | 7/50 [02:25<12:28, 17.40s/it][A[A[A


Feature Extraction:  16%|█▌        | 8/50 [02:27<09:00, 12.87s/it][A[A[A


Feature Extraction:  18%|█▊        | 9/50 [02:29<06:33,  9.60s/it][A[A[A


Feature Extraction:  20%|██        | 10/50 [02:32<05:01,  7.55s/it][A[A[A


Feature Extraction:  22%|██▏       | 11/50 [03:48<18:23, 28.29s/it][A[A[A


Feature Extraction:  24%|██▍       | 12/50 [03:51<13:06, 20

In [55]:
group_1_results.describe()

variable,value__maximum,value__mean,value__median,value__minimum,value__quantile__q_0.05,value__quantile__q_0.15,value__quantile__q_0.85,value__quantile__q_0.95,value__ratio_beyond_r_sigma__r_2,value__ratio_beyond_r_sigma__r_3,value__skewness,value__sum_values
count,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0,976640.0
mean,12.073717,6.963103,5.669012,3.600137,3.758972,4.068339,10.877911,11.664085,9.7e-05,0.0,0.227081,62.667931
std,16.483459,9.731706,8.129277,5.542686,5.71736,6.08946,14.903262,15.943853,0.003282,0.0,0.311813,87.585352
min,0.0,-0.333333,-4.0,-7.0,-6.6,-6.0,0.0,0.0,0.0,0.0,-0.801639,-3.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,31.0,17.222222,14.0,8.0,8.4,9.2,27.6,29.8,0.0,0.0,0.595123,155.0
max,49.0,30.666667,27.0,20.0,20.0,20.2,45.0,47.8,0.111111,0.0,1.734735,276.0


In [None]:
group_2_results = calculateFeatures(path, reset_df=False, parameters_group_2)