In [1]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from obspy import read
from tqdm.auto import tqdm
from pathlib import Path
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="darkgrid")

# Mengambil directory dan input file awal

In [2]:
cwd = os.getcwd()
input_dir = os.path.join(cwd, 'input')
output_dir = os.path.join(cwd, 'output')
waveform_list = os.path.join(input_dir, 'event_with_label_and_filename.csv')

# Membaca file mseed dari daftar *waveform* event

In [3]:
waveform_csv = pd.read_csv(waveform_list)

waveform_csv.head()

Unnamed: 0,id,datetime,station,channel,event_mseed,filename,label
0,1,2017-10-18 02:04:52.222,ABNG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0204 52.222000.mseed,1
1,1,2017-10-18 02:04:53.063,TMKS,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0204 53.063000.mseed,0
2,1,2017-10-18 02:04:53.534,PSAG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0204 53.534000.mseed,1
3,1,2017-10-18 02:04:53.782,YHKR,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0204 53.782000.mseed,0
4,2,2017-10-18 02:17:45.904,ABNG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0217 45.904000.mseed,0


Membuang *waveform* yang belum memiliki label

In [4]:
df =  waveform_csv[waveform_csv['label'] > 0]
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,id,datetime,station,channel,event_mseed,filename,label
0,1,2017-10-18 02:04:52.222,ABNG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0204 52.222000.mseed,1
1,1,2017-10-18 02:04:53.534,PSAG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0204 53.534000.mseed,1
2,2,2017-10-18 02:17:46.889,TMKS,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0217 46.889000.mseed,1
3,2,2017-10-18 02:17:47.032,PSAG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0217 47.032000.mseed,1
4,3,2017-10-18 02:19:37.872,TMKS,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171018 0219 37.872000.mseed,1
...,...,...,...,...,...,...,...
2109,2721,2017-11-30 15:30:26.545,CEGI,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171130 1530 26.545000.mseed,2
2110,2723,2017-11-30 16:01:28.241,PSAG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171130 1601 28.241000.mseed,2
2111,2724,2017-11-30 17:42:49.544,PSAG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171130 1742 49.544000.mseed,2
2112,2725,2017-11-30 18:44:40.631,PSAG,EHZ,D:\Project\pick-agung\join_output\filtering_0-...,20171130 1844 40.631000.mseed,2


---

## Mengambil data label untuk setiap *waveform*

In [5]:
y = df['label']
y.index = range(1, len(y)+1)

y

1       1
2       1
3       1
4       1
5       1
       ..
2110    2
2111    2
2112    2
2113    2
2114    2
Name: label, Length: 2114, dtype: int64

In [6]:
y.to_csv(os.path.join(input_dir, 'label.csv'))

## Membuat fungsi ekstrak, transform, dan load data *waveform* dari file *mseed* menjadi bentuk *dataframe* baru 

In [7]:
def create_new_dataframe(waveform_id, time, amplitude, event_id, label):
    df_waveform = pd.DataFrame({
        'id' : waveform_id,
        'label' : label,
        'time' : time,
        'amplitude' :  amplitude, 
        'event_id' : event_id
    })
    return df_waveform

In [8]:
def get_data_and_waveform_id(row):
    trace = read(row['event_mseed'])[0]
    amplitude = trace.data
    waveform_id = row.name+1
    time = [index for index in range(len(amplitude))]
    event_id = row['id']
    label = row['label']
    return create_new_dataframe(waveform_id, time, amplitude, event_id, label)

In [9]:
amplitudes = [get_data_and_waveform_id(row) for index, row in df.iterrows()]

In [10]:
len(amplitudes)

2114

## Menggabungkan keseluruhan hasil transformasi data per *waveform* menjadi satu *dataframe* baru

In [11]:
df_amplitudes = pd.concat(amplitudes)

In [12]:
df_amplitudes

Unnamed: 0,id,label,time,amplitude,event_id
0,1,1,0,-0.032211,1
1,1,1,1,-0.010978,1
2,1,1,2,-0.007734,1
3,1,1,3,-0.003970,1
4,1,1,4,0.004136,1
...,...,...,...,...,...
995,2114,2,995,-0.220835,2725
996,2114,2,996,-0.300042,2725
997,2114,2,997,-0.379990,2725
998,2114,2,998,-0.407046,2725


In [13]:
df_amplitudes.to_csv(os.path.join(input_dir, 'result_of_transformation.csv'), index=False)