In [1]:
import os
import numpy as np
import pandas as pd
from tsfresh import select_features, extract_features
from tsfresh.utilities.dataframe_functions import impute
from sklearn import preprocessing

In [2]:
train_path = '../data/hy_round2_train_20200225'

def tsfresh_extract_features():
    train_df_list = []
    for file_name in os.listdir(train_path):
        df = pd.read_csv(os.path.join(train_path, file_name))
        train_df_list.append(df)

    train_df = pd.concat(train_df_list)

    train_df['time'] = pd.to_datetime(train_df['time'], format='%m%d %H:%M:%S')

    all_df = pd.concat([train_df], sort=False)

    df = all_df.drop(columns=['type'])

    extracted_df = extract_features(df, column_id='渔船ID', column_sort='time', n_jobs=8)

    train_df = extracted_df.iloc[:len(train_df_list)]

    y = []
    for name, group in all_df.groupby('渔船ID'):
        y.append(group.iloc[0]['type'])

    y_train = y[:train_df.shape[0]]
    le = preprocessing.LabelEncoder()
    y_train = le.fit_transform(y_train)

    impute(train_df)
    filtered_train_df = select_features(train_df, y_train)

    filtered_train_df['type'] = le.inverse_transform(y_train)

    if not os.path.exists('./feature'):
        os.makedirs('./feature')
    filtered_train_df.to_csv('./feature/train.csv')

    return filtered_train_df

In [3]:
tsfresh_extract_features()

Feature Extraction: 100%|██████████| 40/40 [1:54:04<00:00, 171.12s/it]  


variable,"lon__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_5__w_2","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_11__w_20","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_11__w_5","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_10","lon__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_5","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_5","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_10","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_20","lat__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5",...,"速度__fft_coefficient__coeff_79__attr_""abs""",lon__number_cwt_peaks__n_5,"速度__fft_coefficient__coeff_82__attr_""abs""","速度__fft_coefficient__coeff_74__attr_""abs""","方向__fft_coefficient__coeff_5__attr_""imag""","速度__fft_coefficient__coeff_87__attr_""abs""","速度__fft_coefficient__coeff_73__attr_""abs""","方向__fft_coefficient__coeff_20__attr_""real""",lat__symmetry_looking__r_0.35000000000000003,type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20000,15.041422,39.086500,8.002773,33.991337,30.066673,41.396290,5.494960,32.484090,43.433685,3.595712,...,22.843813,45.0,19.766003,73.241138,-992.511057,26.937354,62.719212,6708.335041,1.0,拖网
20001,15.555658,45.787256,9.413717,40.046062,31.086398,48.561202,6.479546,38.317047,51.014413,4.263204,...,60.723283,40.0,14.144836,39.208915,-832.201707,10.420878,42.154673,7686.128966,1.0,围网
20002,15.646361,50.057259,10.321680,43.744373,31.281147,53.059871,7.097635,41.816981,55.712556,4.656219,...,1.624750,6.0,3.165306,2.712678,-76.997668,1.789029,1.697228,-626.520463,1.0,刺网
20003,15.329394,41.980946,8.657685,36.675710,30.493956,44.500222,5.955826,35.060232,46.726224,3.909379,...,14.019591,47.0,35.499179,14.815519,4992.039880,41.621580,36.930767,528.637311,1.0,拖网
20004,15.548528,47.820621,9.861144,41.790447,31.085651,50.689021,6.781237,39.949011,53.223129,4.448959,...,4.533309,22.0,16.310496,7.121245,-820.528246,6.919305,3.913123,-1242.016415,1.0,刺网
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28161,15.537333,45.836438,9.488184,40.122831,30.959063,48.587373,6.481232,38.302420,51.017661,4.196428,...,30.932981,28.0,102.915530,40.110297,1567.237664,31.425718,40.045110,1669.260586,1.0,围网
28162,15.499214,46.178734,9.521201,40.354675,30.986109,48.948737,6.546866,38.576620,51.395931,4.294614,...,9.487354,30.0,59.414658,61.754050,3026.158089,42.029938,47.234707,-678.640743,1.0,围网
28163,15.269452,43.251405,8.918322,37.796860,30.527607,45.845767,6.132625,36.131530,48.137779,4.023149,...,16.192627,42.0,40.618786,19.006193,-5919.693946,8.946803,5.202907,2847.785713,1.0,拖网
28164,15.329169,44.844764,9.246412,39.188411,30.646730,47.534671,6.358182,37.461899,49.911088,4.171075,...,36.856627,23.0,46.997504,46.051485,-891.505956,22.292978,12.777289,-423.592220,1.0,刺网
