In [19]:
import glob
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import numpy as np
import pandas as pd
from torch.utils.data import Dataset

# Custom FOG dataset
class FOGDataset(Dataset):
    def __init__(self, data_files, transform=None, target_transform=None):
        self.data_files = data_files
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data_files)

    def __getitem__(self, idx):
        sample = pd.read_csv(self.data_files[idx])
        label = sample["labels"].to_numpy()
        label[label > 0] = 1.0
        label[label < 0] = 0.0
        sample = sample.drop(["labels", "timestamp"], axis = 1).to_numpy()
        if self.transform:
            sample = self.transform(sample)
        if self.target_transform:
            label = self.target_transform(label)
        return sample, label


In [20]:
train_dir = "D:/SYDE599/train_fog_data/"
test_dir = "D:/SYDE599/test_fog_data/"
train_files = glob.glob(train_dir + "*.csv")
test_files = glob.glob(train_dir + "*.csv")

train_data = pd.read_csv(train_files[0])
classes = train_data["labels"].unique()

print(train_data["labels"].unique())
print(train_data["labels"].shape)
print(train_data["timestamp"].shape)
print(train_data.drop(["labels","timestamp"], axis = 1).shape)

[-0.75469007  1.3250399 ]
(180501,)
(180501,)
(180501, 30)


In [21]:
# Set up datasets and dataloaders
train_data = FOGDataset(train_files)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True)

test_data = FOGDataset(test_files)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True)

In [22]:
# FOG Model
class Network(nn.Module):
    def __init__(self, input_features=30, input_shape=10, nc=10, do=0.5, kernel=3, padding='valid', bn2d=True, mp=3, pool='max', bs=1):
        super(Network, self).__init__()
        
        # MODEL ARCHITECTURE
        
        # Enable or disable batchnorm2d layers
        self.bn2d = True if bn2d==1 else False
        
        # Batch size
        self.bs = bs

        self.h1 = nn.Conv1d(input_features, nc, kernel_size=1, padding=padding)
        self.mp1 = nn.MaxPool2d(mp)
        self.h2 = nn.Conv1d(nc, int(nc/2), kernel_size=kernel, padding=padding)
        #self.h1 = nn.Linear(window*input_features, 256)

        self.h3 = nn.Linear(2490, 256)

        self.h4 = nn.Linear(256, 64)
        # self.h2 = nn.Linear(256, 128)
        # self.h3 = nn.Linear(128, 64)

        self.output = nn.Linear(64, 1)

        # Dropout with dropout rate set by parameters
        self.dropout = nn.Dropout(p=do)

        
        
    # Forward pass of model
    def forward(self, x):

        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.relu(x)

        x = torch.flatten(x, 1)

        x = self.h3(x)
        x = F.relu(x)
        x = self.h4(x)
        x = F.relu(x)
        x = self.dropout(x)

        return self.output(x)


In [23]:
# Train the model
def train(model, train_loader, optimizer, epoch, window, batch_size=28):
    model.train()
    total_loss = 0
    correct = 0
    acc = 0
    
    dataset_size = 0

    update_steps = 28

    optimizer.zero_grad()

    # Iterate through patient trials
    for patient_idx, (inputs, targets) in enumerate(train_loader):
            target_len = targets.shape[1]
            num_samples  = target_len - window

            num_batches = np.floor(num_samples/batch_size).astype(int)
            batch_idx = np.arange(num_batches)
            np.random.shuffle(batch_idx)

            # Split trials into separate samples for the given kernel size
            for b in np.arange(num_batches):
                    batch = batch_idx[b]
                    i = batch*batch_size
                    optimizer.zero_grad()

                    # For batch
                    sample_list = [inputs[:,i+k:i+k+window,:] for k in np.arange(batch_size)]
                    target_list = [targets[:, i+k+window-1] for k in np.arange(batch_size)]
                    sample = torch.Tensor(batch_size, window, inputs.shape[2])
                    target = torch.Tensor(batch_size, 1)
                    torch.cat(sample_list, out=sample)
                    torch.cat(target_list, out=target)
                    
                    sample = np.transpose(sample, (0,2,1))

                    # Run input through model
                    output = model(sample.float())

                    loss = nn.BCELoss()(nn.Sigmoid()(output[:,0].float()), target.float())
                    total_loss += loss
                    loss.backward()
                    optimizer.step()
                    predictions = output.argmax(dim=1, keepdim=True)
                    correct += predictions.eq(target.view_as(predictions)).sum()


                    # if b % 1000 == 0:
                    #     print('Epoch: {} Patient: {}/{} Batch: {}/{} Training loss: {:.6f}'.format(
                    #             epoch,
                    #             patient_idx + 1,
                    #             len(train_loader.dataset),
                    #             b + 1,
                    #             num_batches,
                    #             loss))

                    
            dataset_size += target_len

            print('Epoch: {} {}/{} Training loss: {:.6f}; Training accuracy: {:.1f}%'.format(
                    epoch,
                    patient_idx * len(inputs) + 1,
                    len(train_loader.dataset),
                    loss,
                    100.* correct/dataset_size))

            
            
    print("DATASET_SIZE: " + str(dataset_size))
    print('Training loss: {:.6f}; Training accuracy: {}/{} ({:.1f}%)\n'.format(
        total_loss / dataset_size,
        correct,
        dataset_size,
        100. * correct/dataset_size))

    return total_loss / dataset_size, 100. * acc

# Test the model
def test(model, test_loader, window):
    model.eval()
    loss = 0
    correct = 0
    acc = 0
    dataset_size = 0
    with torch.no_grad():
        for patient_idx, (inputs, targets) in test_loader:
            target_len = targets.shape[1]
            num_samples  = target_len - window
            idx = np.arange(num_samples)
            np.random.shuffle(idx)
            for batch_idx in np.arange(num_samples):
                i = idx[batch_idx]
                sample = inputs[0, i:i+window, :]
                sample = np.transpose(sample, (0,2,1))
                target = targets[0, i+window]
                output = model(sample.float())
                loss += nn.BCELoss()(nn.Sigmoid()(output[:,0].float()), target.float())
                predictions = output.argmax(dim=1, keepdim=True)
                correct += predictions.eq(targets.view_as(predictions)).sum()

            dataset_size += target_len  
            acc = acc*(patient_idx/(patient_idx+1)) + (correct/num_samples)/(patient_idx + 1)

    loss = loss / dataset_size
    print("TEST SET SIZE: " + str(dataset_size))
    print('Test loss: {:.6f}; Test accuracy: {}/{} ({:.1f}%)\n'.format(
        loss,
        correct,
        len(test_loader.dataset),
        100. * acc))
    return loss, 100. * acc

In [24]:
# Run the model
def run(window=100, batch_size=28):
    # Set up datasets and dataloaders
    train_data = FOGDataset(train_files)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True)

    test_data = FOGDataset(test_files)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True)
    
    #model = Network(params['nodes'])
    model = Network(input_shape=window)
    model = model.float()
    
    optimizer = optim.Adam(model.parameters())

    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []

    EPOCHS = 10

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    for epoch in range(0, EPOCHS):
        tr_loss, tr_acc = train(model, train_loader, optimizer, epoch, window, batch_size)
        train_loss.append(tr_loss.detach().numpy())
        train_acc.append(tr_acc)

        test_loss, test_acc = test(model, test_loader, window)
        val_loss.append(test_loss.detach().numpy())
        val_acc.append(test_acc.detach().numpy())

    return val_acc[-1]

window = 500
batch_size = 28
model_acc = run(window=window, batch_size=batch_size)

Epoch: 0 1/48 Training loss: 0.000727; Training accuracy: 48.4%
Epoch: 0 2/48 Training loss: 0.007105; Training accuracy: 67.5%
Epoch: 0 3/48 Training loss: 0.002506; Training accuracy: 72.4%
Epoch: 0 4/48 Training loss: 0.788487; Training accuracy: 68.7%
Epoch: 0 5/48 Training loss: 0.000003; Training accuracy: 69.6%
Epoch: 0 6/48 Training loss: 0.000000; Training accuracy: 70.5%
Epoch: 0 7/48 Training loss: 0.000006; Training accuracy: 69.3%
Epoch: 0 8/48 Training loss: 0.000000; Training accuracy: 69.6%
Epoch: 0 9/48 Training loss: 0.009251; Training accuracy: 69.1%
Epoch: 0 10/48 Training loss: 0.363805; Training accuracy: 56.4%
Epoch: 0 11/48 Training loss: 0.568654; Training accuracy: 56.5%
Epoch: 0 12/48 Training loss: 0.000000; Training accuracy: 56.8%
Epoch: 0 13/48 Training loss: 0.000125; Training accuracy: 58.1%
Epoch: 0 14/48 Training loss: 0.000000; Training accuracy: 58.4%
Epoch: 0 15/48 Training loss: 0.036476; Training accuracy: 57.9%
Epoch: 0 16/48 Training loss: 0.03

TypeError: test() takes 3 positional arguments but 4 were given

In [None]:
from torchsummary import summary
model = Network(input_shape=window)

# Print summary for model with optimized parameters
summary(model, (1,100,30))