In [None]:
%matplotlib inline

import torch
from torch import nn
import torch.nn.functional as F

import numpy as np
import sklearn
import matplotlib.pyplot as plt

# 1. Defining the CNN architecture

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=21, padding=10)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=15, padding=7)
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=11, padding=5)
        
        self.batchnorm32 = nn.BatchNorm1d(num_features=32)
        self.batchnorm64 = nn.BatchNorm1d(num_features=64)
        self.batchnorm128 = nn.BatchNorm1d(num_features=128)
        self.batchnorm512 = nn.BatchNorm1d(num_features=512)
        
        self.fc1 = nn.Linear(4736, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 2)
        
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.batchnorm32(x))
        x = self.maxpool(x)
        
        x = self.conv2(x)
        x = F.relu(self.batchnorm64(x))
        x = self.maxpool(x)
        
        x = self.conv3(x)
        x = F.relu(self.batchnorm128(x))
        x = self.maxpool(x)
        
        # Flatten input for fully connected layers
        x = x.view(x.shape[0], -1) 
        
        x = self.fc1(x)
        x = F.relu(self.batchnorm512(x))
        
        x = self.fc2(x)
        x = F.relu(self.batchnorm512(x))
        
        x = F.log_softmax(self.fc3(x), dim=1)
        return x

# 2. Loading the Data

In [None]:
import h5py

mixdata = h5py.File("../train/scsn_p_2000_2017_6sec_0.5r_pick_train_mix.hdf5", "r")
testdata = h5py.File("../test/scsn_p_2000_2017_6sec_0.5r_pick_test_mix.hdf5", "r")

In [None]:
def split_trainset(train_val_data, train_val_labels, ratio):
    train_ratio = ratio
    
    trainsize = int(len(train_val_data) * train_ratio)
    
    trainset = train_val_data[:trainsize]
    trainlabels = train_val_labels[:trainsize]
    
    valset = train_val_data[trainsize:]
    valabels = train_val_labels[trainsize:]
    
    return (trainset, trainlabels), (valset, valabels)

In [None]:
%%time

from torch.utils.data import DataLoader

batch_size = 250

train_size = 1 * 10 ** 6
train_ratio = 0.7
test_size = 1 * 10 ** 5

# Load test data
train_val_data = mixdata["X"][:train_size]
train_val_labels = mixdata["pwave"][:train_size]

(trainset, trainlabels), (valset, val_labels) = split_trainset(train_val_data, train_val_labels, train_ratio)

In [None]:
trainset = list(zip(trainset, trainlabels))

valset = list(zip(valset, val_labels))

testset = testdata["X"][:test_size]
testlabels = testdata["pwave"][:test_size]

testset = list(zip(testset, testlabels))

In [None]:
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [None]:
model = CNN()

# 3. Add Multi-GPU Support to Model 
In order to run the model on multiple GPU's, we can use the nn.DataParellel method. This method requires that we move all tensors to the cuda:0 (the default gpu) before we can pass them through the network. 

In [None]:
def parallelize(model):
    device_ids = [i for i in range(torch.cuda.device_count())]
    model = torch.nn.DataParallel(model, device_ids=device_ids)
    return model

In [None]:
model = parallelize(model)
model

# 4. Define Loss Function and Optimizer
Here we define the loss function and optimizer. For the loss function (criterion), we use the binary cross entropy with logits loss (BCEWithLogitsLoss). This function applies a sigmoid as well as calculates the cross entropy. 

In [None]:
from torch import optim

criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 5. Training and Validation

## TODO -- Save model on validation improvement. Then load model when testing and evaluating. 

In [None]:
%%time

epochs = 10

train_losses = []
val_losses = []

for epoch in range(epochs):
    model.train()
    train_loss = 0
    
    for batch, labels in trainloader:
        # ============================================
        #            TRAINING
        # ============================================
        batch, labels = batch.to(device), labels.to(device)
        # Clear gradients in optimizer
        optimizer.zero_grad()
        # Forward pass
        output = model.forward(batch.unsqueeze(1))
        # Calculate loss
        loss = criterion(output, labels.type(torch.cuda.LongTensor).view(labels.shape, 1))
        train_loss += loss.item()
        # Backpropagation
        loss.backward()
        # Update weights
        optimizer.step()
    else:
        with torch.no_grad():
            model.eval()
            val_loss = 0

            for batch, labels in val_loader:
                # ============================================
                #            VALIDATION
                # ============================================
                batch, labels = batch.to(device), labels.to(device)
                # Forward pass
                ouput = model.forward(batch.unsqueeze(1))
                # Calculate loss
                loss = criterion(output, labels.type(torch.cuda.LongTensor).view(labels.shape, 1))
                val_loss += loss.item()
                
    # Print epoch summary
    t_loss_avg = train_loss / len(trainloader)
    v_loss_avg = val_loss / len(val_loader)
    
    train_losses.append(t_loss_avg)
    val_losses.append(v_loss_avg)
    
    print('Epoch [{:5d}/{:5d}] | train loss: {:6.4f} | validation loss: {:6.4f}'.format(
            epoch+1, epochs, t_loss_avg, v_loss_avg))

In [None]:
# add learning curve plot
train_loss = np.array(train_loss)

plt.plot(train_losses, label="Training")
plt.plot(val_losses, label="Validation")
plt.title("Training Loss")
plt.legend()

# 6. Testing Model Performance

For testing model performance, we'll be using scikit learn's metrics library. Scikit learn provides a handful of builtin classfication metrics which we can take advantage of. In order to use them with pytorch, we'll have to move the tensors from GPU to CPU and convert them to numpy arrays. 

1. Classification Report  
    a. Precision  
    b. Recall  
    c. F1 Score
2. Accuracy
2. AUC-ROC
3. AUC

In [None]:
def cuda_to_numpy(tensor):
    """Converts a cuda tensor to a numpy array in place
    Positional argument:
        tensor -- Tensor to convert to numpy array 
    """
    if tensor.requires_grad:
        return tensor.detach().cpu().numpy()
    else:
        return tensor.cpu().numpy()

In [None]:
def test_model(model, testloader, device="cpu"):
    """
    Arguments:
        model -- Model to validate with validation data 
        testloader -- DataLoader with labels and data
    Returns:
        None
    """
    from sklearn.metrics import classification_report
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import roc_auc_score
    
    y_pred = np.array([])
    y_true = np.array([])
    y_probs = np.array([])
    
    for batch, labels in testloader:
        batch, labels = batch.to(device), labels.to(device)
        # forward pass
        log_probs = model.forward(batch.unsqueeze(1))
        # Calculate class labels
        probs = torch.exp(log_probs)
        top_p, top_class = probs.topk(1, dim=1)
        
        y_pred = np.append(y_pred, cuda_to_numpy(top_class))
        y_true = np.append(y_true, cuda_to_numpy(labels))
        y_probs = np.append(y_probs, cuda_to_numpy(top_p))
        
    target_names = ["p_wave", "noise"]
    print(classification_report(y_true, y_pred, target_names=target_names))
    print("Accuracy: {}%".format(accuracy_score(y_true, y_pred) * 100))
    print("AUC-ROC Score: {}".format(roc_auc_score(y_true, y_probs)))

In [None]:
%%time

test_model(model, testloader, device="cuda")

# References

1. https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2017JB015251
2. http://scedc.caltech.edu/research-tools/deeplearning.html#picking_polarity