Import packages.

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import confusion_matrix
from  sklearn.naive_bayes import GaussianNB

Load dataloader for event metadata.

In [2]:
dataloader = torch.load('../data/dataloader.pth')
event_ids = np.array([f.split('_')[0] for f in dataloader.dataset.files])
df_event = pd.read_csv('../data/events.csv')
event_types = np.array([
    df_event.loc[df_event['eventID'] == int(i), 'event_type'].item()
    for i in event_ids
])

There are two types of event: earthquare, nuclear explosion.

Let's predict the event type from its signal embeddings.

In [3]:
print('Number of signals by event type:')
print('- earthquake:', len(event_ids[event_types == 'earthquake']))
print('- nuclear explosion:', len(event_ids[event_types == 'nuclear explosion']))

Number of signals by event type:
- earthquake: 48638
- nuclear explosion: 31


The data is highly imbalanced

Load embeddings.

In [4]:
embeddings = torch.load('../data/embeddings.pt', map_location=torch.device('cpu'))
embeddings = embeddings.squeeze().numpy()

print('Shape of embeddings:', embeddings.shape)

Shape of embeddings: (48669, 256)


Train a Gaussian naive Bayes classifier to predict the event type from the signal embeddings.

In [5]:
clf = GaussianNB()
clf.fit(embeddings, event_types)

print('Confusion matrix:')
print(confusion_matrix(event_types, clf.predict(embeddings)))

Confusion matrix:
[[41477  7161]
 [   25     6]]
