In [50]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import os
from obspy.core import read

### EVENTS

In [59]:
main_path = os.path.abspath("")
file_path = os.path.join(main_path, 'earthquakes_geofon_filtered.txt')
events = pd.read_csv(file_path, sep=',')

try:
    events.drop(columns=['Unnamed: 0'], inplace=True) #automatically created column (idk why)
except:
    pass

events.head()

Unnamed: 0,event_id,year,month,day,hour,minute,second,lat,lng,depth,mag_ML,std_dev_ML,mag_MA,std_dev_MA,category
0,0,2007,1,1,2,41,13.28,-21.65559,-68.41471,121.33,2.345,0.02,2.394,0.029,0
1,1,2007,1,1,2,47,7.83,-20.54848,-69.05857,102.79,1.114,0.033,1.305,0.031,0
2,2,2007,1,1,3,50,29.15,-21.86299,-68.53639,110.95,2.779,0.031,2.917,0.031,0
3,3,2007,1,1,4,19,27.82,-20.29515,-69.13106,95.79,1.401,0.017,1.571,0.023,0
4,4,2007,1,1,5,40,2.58,-21.23847,-70.05151,34.64,1.995,0.022,2.222,0.018,0


### CUSTOM DATASET

In [53]:
class event_dataset(Dataset):
    def __init__(self, split_procentage: float, dataset_type: str) -> None:
        """Returns a dataset fit for our CNN model. Arguments:
        split procentage: the procentage where we want to split our entire dataset.
        dataset_type: either 'train' or 'test'. The train dataset will take the data up
        to the specified split procentage, the test from that oint to the end.
        """
        if dataset_type not in ['train', 'test']:
            raise KeyError("dataset_type has to be one of the follwoing: 'train', 'test' ")
        
        split_idx = int(len(events) * split_procentage)

        if dataset_type == "train":
            self.dataframe = events.iloc[:split_idx, :]
        elif dataset_type == "test":
            self.dataframe = events.iloc[split_idx:, :]

        self.data_direcotry = "waveforms"
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]

        #CLASSIFICATION
        event_type = torch.tensor(data= row['category'], dtype= torch.int64)

        #WAVEFORM FETCH
        file_name = f"{int(row['event_id'])}.mseed"
        waveform = read(os.path.join(main_path, self.data_direcotry, file_name))
        waveform = [trace.data for trace in waveform]
        waveform = np.stack(waveform, axis = 0, dtype=np.float32)
        waveform = torch.from_numpy(waveform)

        #create sample
        sample = {'labels': event_type,
                  'data': waveform}

        return sample


### CNN MODEL

In [54]:
class seismic_CNN(nn.Module):
    def __init__(self) -> None:
        super(seismic_CNN, self).__init__()
        self.max_pool = nn.MaxPool1d(5, 2)

        self.conv1 = nn.Conv1d(in_channels = 3, out_channels = 18, kernel_size = 5)
        self.conv2 = nn.Conv1d(in_channels = 18, out_channels = 36, kernel_size = 3)
        self.conv3 = nn.Conv1d(in_channels = 36, out_channels = 68, kernel_size = 3)
        self.conv4 = nn.Conv1d(in_channels = 68, out_channels = 68, kernel_size = 2)

      
        self.fc1 = nn.Linear(in_features= 295 , out_features=80)
        self.fc2 = nn.Linear(in_features= 80, out_features=80)
        self.fc3 = nn.Linear(in_features=80, out_features= 2)
        

        self.dropout = nn.Dropout1d(p=0.5)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.max_pool(x)

        x = F.relu(self.conv2(x))
        x = self.max_pool(x)

        x = F.relu(self.conv3(x))
        x = self.max_pool(x)

        x = F.relu(self.conv4(x))
        x = self.max_pool(x)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim = -1)

        x = torch.flatten(input = x, start_dim=1, end_dim=-1)

        return x

### BASIC CNN  AND OTHER PARAMETERS

In [55]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 4
num_epochs = 4
learning_rate = 0.001

model = seismic_CNN().to(device=device)
crieterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

category_convert = {0: 'natural', 1: 'mining'} #int DataFrame category convert

### CNN TRAINING

In [56]:
train_loader = DataLoader(dataset = event_dataset(split_procentage=0.6, dataset_type='train'),
                          batch_size = batch_size,
                          shuffle=True,
                          num_workers=0)

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for idx, sample in enumerate(train_loader):
        labels = sample['labels'].to(device)
        data = sample['data'].to(device)

        outputs = model(data)
        loss = crieterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (idx+1) % 10 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')
PATH = './seismic_cnn.pth'
torch.save(model.state_dict(), PATH)

Finished Training


### CNN TESTING

In [57]:
test_loader = torch.utils.data.DataLoader(dataset = event_dataset(split_procentage=0.6, dataset_type='test'),
                                          batch_size=batch_size,
                                            shuffle=False)

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(len(category_convert))]
    n_class_samples = [0 for i in range(len(category_convert))]
    
    for sample in test_loader:
        data = sample['data'].to(device)
        labels = sample['labels'].to(device)
        outputs = model(data)

        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

Accuracy of the network: 100.0 %
