In [1]:
import os
import numpy as np
import pandas as pd
from tsfresh import select_features, extract_features, feature_extraction
from tsfresh.utilities.dataframe_functions import impute
from sklearn import preprocessing
from parameters import fc_parameters

In [2]:
train_path = '../data/hy_round2_train_20200225'

def tsfresh_extract_features():
    train_df_list = []
    for file_name in os.listdir(train_path):
        df = pd.read_csv(os.path.join(train_path, file_name))
        train_df_list.append(df)

    train_df = pd.concat(train_df_list)

    train_df['time'] = pd.to_datetime(train_df['time'], format='%m%d %H:%M:%S')

    all_df = pd.concat([train_df], sort=False)

    df = all_df.drop(columns=['type'])

    extracted_df = extract_features(df, column_id='渔船ID', column_sort='time',
                                    n_jobs=8, kind_to_fc_parameters=fc_parameters)

    train_df = extracted_df.iloc[:len(train_df_list)]

    y = []
    for name, group in all_df.groupby('渔船ID'):
        y.append(group.iloc[0]['type'])

    y_train = y[:train_df.shape[0]]
    le = preprocessing.LabelEncoder()
    y_train = le.fit_transform(y_train)

    impute(train_df)
    filtered_train_df = select_features(train_df, y_train)

    filtered_train_df['type'] = le.inverse_transform(y_train)

#     if not os.path.exists('./feature'):
#         os.makedirs('./feature')
#     filtered_train_df.to_csv('./feature/train.csv')

    return filtered_train_df

In [3]:
tsfresh_extract_features()

Feature Extraction: 100%|██████████| 40/40 [02:16<00:00,  3.41s/it]


variable,lon__quantile__q_0.1,lon__minimum,lon__quantile__q_0.3,lon__quantile__q_0.4,lat__quantile__q_0.9,lat__quantile__q_0.8,lon__quantile__q_0.2,lat__quantile__q_0.3,lat__quantile__q_0.2,lat__quantile__q_0.6,...,"lat__fft_coefficient__coeff_74__attr_""angle""","lon__fft_coefficient__coeff_2__attr_""angle""","方向__fft_coefficient__coeff_5__attr_""real""","速度__fft_coefficient__coeff_9__attr_""angle""","lon__fft_coefficient__coeff_64__attr_""angle""","lat__fft_coefficient__coeff_5__attr_""real""","lon__fft_coefficient__coeff_58__attr_""angle""",速度__ratio_beyond_r_sigma__r_0.5,"速度__change_quantiles__f_agg_""var""__isabs_True__qh_0.8__ql_0.4",type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20000,115.3020,115.246,115.3470,115.3630,21.1967,21.1424,115.3240,20.9480,20.9340,21.0220,...,133.909819,66.962958,-2348.999538,163.839350,-156.323820,6.127692,-66.208345,0.478916,0.039621,拖网
20001,119.1310,119.130,119.1310,119.1370,25.1720,25.1720,119.1310,25.0450,24.9878,25.1700,...,98.340809,-39.357719,12792.803691,-20.228126,-90.323732,-13.612014,-64.652113,0.721481,0.321328,围网
20002,120.2610,120.258,120.2610,120.2610,27.2870,27.2870,120.2610,27.2870,27.2870,27.2870,...,124.363636,-29.058475,519.703362,137.947507,102.827335,-0.000239,75.728385,0.716364,0.003024,刺网
20003,117.6552,117.361,117.8886,117.9464,23.1569,23.1188,117.8092,22.8800,22.8730,22.9540,...,111.052929,142.077786,-7904.761565,172.445005,98.772858,1.463794,93.866518,0.372832,0.024884,拖网
20004,119.5090,119.509,119.5100,119.5100,26.0680,26.0680,119.5100,26.0680,26.0680,26.0680,...,83.283532,-27.826723,-5008.367586,-171.867113,-125.179676,-0.001334,-5.789860,0.352256,0.003010,刺网
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28161,119.2552,119.199,119.2660,119.2660,25.1800,25.1790,119.2610,25.1306,25.0900,25.1780,...,-34.767121,-42.210235,27998.044882,0.861185,-98.221277,-14.822121,-68.968543,0.950966,0.105894,围网
28162,119.1290,119.129,119.1300,119.1310,25.3470,25.3330,119.1300,25.1720,25.1720,25.2372,...,13.200029,137.305213,-1203.823420,-159.138606,136.531889,-5.855217,77.363321,0.858929,0.988164,围网
28163,116.8172,116.639,116.9552,117.0166,23.5770,23.5770,116.8840,22.7302,22.6336,23.0044,...,-70.138385,-110.155527,-5906.985318,176.318176,-61.821442,5.782164,-41.868871,0.669065,0.027511,拖网
28164,117.8230,117.822,117.8230,117.8230,24.4460,24.4450,117.8230,24.4450,24.2546,24.4450,...,-108.065795,-53.093806,-12509.036847,-51.456969,-151.699314,3.399353,-160.098968,0.664075,0.084627,刺网
