In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd

import os
from obspy.core import read

### EVENTS

In [21]:
main_path = os.path.abspath("")

file_path = os.path.join(main_path, 'earthquakes_filtered.txt') #all events
all_events = pd.read_csv(file_path, sep=',')

file_list = os.listdir(os.path.join(main_path, "geofon_waveforms"))
file_list = [int(file[:-6]) for file in file_list] #remove the '.mseed' ending and convert to int to get event_id
filtered_events = pd.DataFrame(data=file_list, columns=['event_id']) #events for which files exist

events = pd.merge(left = filtered_events, right = all_events, on='event_id', how= 'inner')


try:
    events.drop(columns=['Unnamed: 0'], inplace=True) #automatically created column (idk why)
except:
    pass

events.head()

Unnamed: 0,event_id,event_ID,year,month,day,hour,minute,second,lat,lng,depth,mag_ML,std_dev_ML,mag_MA,std_dev_MA,category
0,10000,10253,2007,10,30,9,44,54.44,-21.54454,-68.41121,120.18,2.083,0.027,2.144,0.029,0
1,10001,10254,2007,10,30,10,23,12.05,-21.06589,-68.84076,103.35,1.476,0.02,1.633,0.023,0
2,10002,10255,2007,10,30,10,32,18.81,-22.27305,-68.59028,101.18,2.539,0.02,2.67,0.016,0
3,10003,10256,2007,10,30,10,47,34.14,-21.75952,-68.46277,110.75,1.776,0.009,1.864,0.016,0
4,10004,10257,2007,10,30,11,3,32.12,-19.48856,-70.1743,26.0,1.307,0.028,1.463,0.026,0


### CUSTOM DATASET

In [22]:
class event_dataset(Dataset):
    def __init__(self, split_procentage: float, dataset_type: str) -> None:
        """Returns a dataset fit for our CNN model. Arguments:
        split procentage: the procentage where we want to split our entire dataset.
        dataset_type: either 'train' or 'test'. The train dataset will take the data up
        to the specified split procentage, the test from that oint to the end.
        """
        if dataset_type not in ['train', 'test']:
            raise KeyError("dataset_type has to be one of the follwoing: 'train', 'test' ")
        
        split_idx = int(len(events) * split_procentage)

        if dataset_type == "train":
            self.dataframe = events.iloc[:split_idx, :]
        elif dataset_type == "test":
            self.dataframe = events.iloc[split_idx:, :]

        self.data_direcotry = "geofon_waveforms"
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]

        #CLASSIFICATION
        event_type = torch.tensor(data= row['category'], dtype= torch.int64)

        #WAVEFORM FETCH
        file_name = f"{int(row['event_id'])}.mseed"
        waveform = read(os.path.join(main_path, self.data_direcotry, file_name))
        waveform = [trace.data for trace in waveform]
        waveform = np.stack(waveform, axis = 0, dtype=np.float32)
        waveform = torch.from_numpy(waveform)

        #create sample
        sample = {'labels': event_type,
                  'data': waveform}

        return sample


In [23]:
def collate_fn(batch):
    labels = [sample['labels'] for sample in batch]
    target = [sample['data'] for sample in batch]
    if len(target) > 3:
        target = target[:3]
    target = torch.LongTensor(target)
    return {'data': target, 'labels': labels}

### CNN MODEL

In [24]:
class seismic_CNN(nn.Module):
    def __init__(self) -> None:
        super(seismic_CNN, self).__init__()
        self.max_pool = nn.MaxPool1d(5, 2)

        self.conv1 = nn.Conv1d(in_channels = 3, out_channels = 18, kernel_size = 5)
        self.conv2 = nn.Conv1d(in_channels = 18, out_channels = 36, kernel_size = 3)
        self.conv3 = nn.Conv1d(in_channels = 36, out_channels = 68, kernel_size = 3)
        self.conv4 = nn.Conv1d(in_channels = 68, out_channels = 68, kernel_size = 2)

      
        self.fc1 = nn.Linear(in_features= 295 , out_features=80)
        self.fc2 = nn.Linear(in_features= 80, out_features=80)
        self.fc3 = nn.Linear(in_features=80, out_features= 2)
        

        self.dropout = nn.Dropout1d(p=0.5)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.max_pool(x)

        x = F.relu(self.conv2(x))
        x = self.max_pool(x)

        x = F.relu(self.conv3(x))
        x = self.max_pool(x)

        x = F.relu(self.conv4(x))
        x = self.max_pool(x)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim = -1)

        x = torch.flatten(input = x, start_dim=1, end_dim=-1)

        return x

### BASIC CNN  AND OTHER PARAMETERS

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 100
num_epochs = 11

model = seismic_CNN().to(device=device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
                             lr = 0.01,
                               momentum= 0.9)

category_convert = {0: 'natural', 1: 'mining'} #int DataFrame category convert

### CNN TRAINING

In [30]:
train_loader = DataLoader(dataset = event_dataset(split_procentage=0.8, dataset_type='train'),
                          batch_size = batch_size,
                          shuffle=True,
                          num_workers=0)

n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    
    running_loss = 0.0
    for idx, sample in enumerate(train_loader):
        labels = sample['labels'].to(device)
        data = sample['data'].to(device)

        optimizer.zero_grad()

        prediction = model(data)
        loss = loss_fn(prediction, labels)
        loss.backward()
        optimizer.step()
    
        running_loss += loss.item()
        
        if (idx+1) % 10 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{n_total_steps}], Loss: {loss.item():.4f}')
            running_loss = 0.0

print('Finished Training')
PATH = './seismic_cnn.pth'
torch.save(model.state_dict(), PATH)

Epoch [1/11], Step [10/400], Loss: 4.9762
Epoch [1/11], Step [20/400], Loss: 4.8788
Epoch [1/11], Step [30/400], Loss: 4.8164
Epoch [1/11], Step [40/400], Loss: 4.7522
Epoch [1/11], Step [50/400], Loss: 4.6919
Epoch [1/11], Step [60/400], Loss: 4.6593
Epoch [1/11], Step [70/400], Loss: 4.6595
Epoch [1/11], Step [80/400], Loss: 4.6319
Epoch [1/11], Step [90/400], Loss: 4.6331
Epoch [1/11], Step [100/400], Loss: 4.7007
Epoch [1/11], Step [110/400], Loss: 4.6614
Epoch [1/11], Step [120/400], Loss: 4.6589
Epoch [1/11], Step [130/400], Loss: 4.6571
Epoch [1/11], Step [140/400], Loss: 4.6366
Epoch [1/11], Step [150/400], Loss: 4.6446
Epoch [1/11], Step [160/400], Loss: 4.6244
Epoch [1/11], Step [170/400], Loss: 4.6233
Epoch [1/11], Step [180/400], Loss: 4.6320
Epoch [1/11], Step [190/400], Loss: 4.6216
Epoch [1/11], Step [200/400], Loss: 4.6112
Epoch [1/11], Step [210/400], Loss: 4.6008
Epoch [1/11], Step [220/400], Loss: 4.6586
Epoch [1/11], Step [230/400], Loss: 4.6095
Epoch [1/11], Step [

### CNN TESTING

In [None]:
model = seismic_CNN()
model.load_state_dict(torch.load(PATH))

test_loader = torch.utils.data.DataLoader(dataset = event_dataset(split_procentage=0.8, dataset_type='test'),
                                          batch_size=batch_size,
                                            shuffle=False)

n_correct = 0
n_samples = 0

with torch.no_grad():
    for sample in test_loader:
        data = sample['data'].to(device)
        labels = sample['labels'].to(device)
        
        outputs = model(data)

        _, predicted = torch.max(outputs.data, 1)
        print(f"Predicted = {predicted}")
        n_samples += labels.size(0)
        print(f"N samples = {n_samples}, label size {labels.size(0)}")
        n_correct += (predicted == labels).sum().item()
        

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc:.2f} %')

In [None]:
# test_loader = torch.utils.data.DataLoader(dataset = event_dataset(split_procentage=0.8, dataset_type='test'),
#                                           batch_size=batch_size,
#                                             shuffle=False)

# with torch.no_grad():
#     n_correct = 0
#     n_samples = 0
#     n_class_correct = [0 for i in range(len(category_convert))]
#     n_class_samples = [0 for i in range(len(category_convert))]
    
#     for sample in test_loader:
#         data = sample['data'].to(device)
#         labels = sample['labels'].to(device)
#         outputs = model(data)

#         # max returns (value ,index)
#         _, predicted = torch.max(outputs.data, 1)
#         n_samples += labels.size(0)
#         n_correct += (predicted == labels).sum().item()
        
#         for i in range(batch_size):
#             try:
#                 label = labels[i]
#                 pred = predicted[i]
#                 if (label == pred):
#                     n_class_correct[label] += 1
#                 n_class_samples[label] += 1
#             except IndexError: #in case we have a few batches at the end that dont have len == batchsize we skip them
#                 pass

#     acc = 100.0 * n_correct / n_samples
#     print(f'Accuracy of the network: {acc:.2f} %')

Accuracy of the network: 0.00 %
