In [1]:
import pandas as pd
import numpy as np
import torch
from scipy.signal import spectrogram
from models import QuakeModel

In [2]:
save_step = 1
n_epochs = 100
learning_rate = 0.0001
weight_decay = 1e-5
step_size = 10
gamma = 0.1
n_samples_train = 100000
n_samples_valid = 10000
n_layers = 3
n_heads = 8
time_length = 500
time_size = 300
freq_size = 128
d_model = 32  # 512, 128
d_ff = 128    # 2048, 512
batch_size = 32
num_workers = 4
dropout = 0.1

In [3]:
model_name = 'model-epoch-02.ckpt'

In [4]:
input_dir = '/run/media/hoosiki/WareHouse3/mtb/datasets/LANL'
checkpoint = torch.load('./models/' + model_name)

In [5]:
df_submission = pd.read_csv(input_dir + '/sample_submission.csv', index_col='seg_id', dtype={'time_to_failure': np.float32})
#df_submission

In [6]:
model = QuakeModel(
    n_layers=n_layers,
    n_heads=n_heads,
    freq_size=freq_size,
    d_model=d_model,
    d_ff=d_ff,
    dropout=dropout)

model.load_state_dict(checkpoint['state_dict'])
model.eval()

QuakeModel(
  (embedding): Embeddings(
    (fc_embed): Linear(in_features=128, out_features=32, bias=True)
  )
  (position): PositionalEncoding(
    (dropout): Dropout(p=0.1)
  )
  (attn): MultiHeadedAttention(
    (linears): ModuleList(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): Linear(in_features=32, out_features=32, bias=True)
      (2): Linear(in_features=32, out_features=32, bias=True)
      (3): Linear(in_features=32, out_features=32, bias=True)
    )
    (dropout): Dropout(p=0.1)
  )
  (pff): PositionwiseFeedForward(
    (w_1): Linear(in_features=32, out_features=128, bias=True)
    (w_2): Linear(in_features=128, out_features=32, bias=True)
    (dropout): Dropout(p=0.1)
  )
  (encoder_layer): EncoderLayer(
    (self_attn): MultiHeadedAttention(
      (linears): ModuleList(
        (0): Linear(in_features=32, out_features=32, bias=True)
        (1): Linear(in_features=32, out_features=32, bias=True)
        (2): Linear(in_features=32, out_features=32

In [7]:
nperseg = 256 # default 
noverlap = nperseg // 4 # default: nperseg // 8
fs = 4000000 # raw signal sample rate is 4MHz
window = 'triang'
scaling = 'density' # {'density', 'spectrum'}
detrend = 'linear' # {'linear', 'constant', False}
eps = 1e-11

In [8]:
for i, seg_id in enumerate(df_submission.index):
    #if i == 1:
    #    break
    seg = pd.read_csv(input_dir + '/test/' + seg_id + '.csv')
    #x = seg['acoustic_data'].values.reshape(1, time_length, time_size)
    
    amplitude = seg['acoustic_data'].values
    #print(amplitude)
    f, t, Sxx = spectrogram(amplitude,
                            nperseg=nperseg,
                            noverlap=noverlap,
                            fs=fs,
                            window=window,
                            scaling=scaling,
                            detrend=detrend)
    Sxx = np.log(Sxx + eps)
    Sxx = Sxx[:-1, :]
    Sxx = Sxx.transpose(1, 0)
    
    x = torch.from_numpy(Sxx).float()
    x = x.unsqueeze(0)
    y = model(x).item()
    df_submission.time_to_failure[i] = y
    
df_submission.head(20)

Unnamed: 0_level_0,time_to_failure
seg_id,Unnamed: 1_level_1
seg_00030f,3.809649
seg_0012b5,4.894618
seg_00184e,5.480223
seg_003339,8.695795
seg_0042cc,5.882518
seg_004314,0.894612
seg_004cd2,6.676472
seg_004ee5,4.021156
seg_004f1f,3.517986
seg_00648a,2.528921


In [9]:
df_submission.to_csv('submission.csv')