In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# 16 periods (0-15)
nrows = None
period_valid = [1, 8, 13]

In [3]:
# key: period
# value: last index of remaining time of occurring quake in corresponding period
df_period = pd.read_csv('period2idx.csv')
period2idx = dict(zip(list(df_period.period), list(df_period.last_idx)))
period2idx

{0: 5656573,
 1: 50085877,
 2: 104677355,
 3: 138772452,
 4: 187641819,
 5: 218652629,
 6: 245829584,
 7: 307838916,
 8: 338276286,
 9: 375377847,
 10: 419368879,
 11: 461811622,
 12: 495800224,
 13: 528777114,
 14: 585568143,
 15: 621985672}

In [4]:
input_dir = '/run/media/hoosiki/WareHouse3/mtb/datasets/LANL'

In [5]:
df_train = pd.read_csv(input_dir + '/train.csv', nrows=nrows)
df_train['acoustic_data'] = df_train['acoustic_data'].astype('float64')
df_train.rename({'acoustic_data': 'amplitude', 'time_to_failure': 'quake_time'}, axis='columns', inplace=True)

In [6]:
df_train['quake_period'] = 0
for iperiod in range(len(period2idx)):
    df_train['quake_period'] = np.where(df_train.index > period2idx[iperiod], iperiod+1, df_train['quake_period'])
df_train

Unnamed: 0,amplitude,quake_time,quake_period
0,12.0,1.469100,0
1,6.0,1.469100,0
2,8.0,1.469100,0
3,5.0,1.469100,0
4,8.0,1.469100,0
5,8.0,1.469100,0
6,9.0,1.469100,0
7,7.0,1.469100,0
8,-5.0,1.469100,0
9,3.0,1.469100,0


In [7]:
df_train_split = df_train[~df_train['quake_period'].isin(period_valid)].reset_index(drop=True)
df_train_split

Unnamed: 0,amplitude,quake_time,quake_period
0,12.0,1.469100,0
1,6.0,1.469100,0
2,8.0,1.469100,0
3,5.0,1.469100,0
4,8.0,1.469100,0
5,8.0,1.469100,0
6,9.0,1.469100,0
7,7.0,1.469100,0
8,-5.0,1.469100,0
9,3.0,1.469100,0


In [8]:
df_valid_split = df_train[df_train['quake_period'].isin(period_valid)].reset_index(drop=True)
df_valid_split

Unnamed: 0,amplitude,quake_time,quake_period
0,4.0,11.540800,1
1,5.0,11.540800,1
2,6.0,11.540800,1
3,3.0,11.540800,1
4,4.0,11.540800,1
5,3.0,11.540800,1
6,8.0,11.540800,1
7,8.0,11.540800,1
8,10.0,11.540800,1
9,7.0,11.540800,1


In [9]:
df_train_split.to_csv(input_dir + '/train_split.csv', index=False)
df_valid_split.to_csv(input_dir + '/valid_split.csv', index=False)