In [55]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from pathlib import Path
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import os

# Pre-processing

In [61]:
class SHM_Dataset(Dataset):
    """ Prepare dataset for pytorch
        Ref: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
    """

    def __init__(self, case, data_file):
        self.case = case
        self.data_file = Path(data_file)
        self.data_df = pd.read_json(self.data_file, dtype=np.array)
        # self.data = self.data_df.values.reshape(-1) # @Chen here I ONLY take a column of data as an example
        self.data = pd.DataFrame(np.array([np.array(i) for i in self.data_df.values.flatten()]))
        self.labels = pd.DataFrame([self.case,]*self.data_df.shape[0]*self.data_df.shape[1]) # ONLY take a column as an example
    
    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, index):
        label = np.array(self.labels.iloc[index])
        feature = np.array(self.data.iloc[index])
        return feature, label

shmDS = SHM_Dataset(1, "~/Codes/homework/data/SHM/shm01s.json")

train_loader = DataLoader(shmDS, batch_size=32, shuffle=True) # split samples into mini-batches and reshuffle the data to reduce overfitting
test_loader = DataLoader(shmDS, batch_size=32, shuffle=False)

ValueError: Must pass 2-d input. shape=(2992, 8, 16)

In [57]:
print(type(shmDS.data_df.iloc[0,1]))
print(shmDS.data_df.iloc[0,1])
print(type(shmDS.data[0]))
print(shmDS.data[1])
print(type(shmDS.data))

<class 'list'>
[[-0.0020818, -0.0021144, -0.0021016, -0.0024254, -0.002283, -0.0022006, -0.002391, -0.0022262, -0.0022094, -0.0022658, -0.0022518, -0.0022848, -0.0022878, -0.002281, -0.0023396, -0.002293], [-0.0022887999999999997, -0.0021839999999999997, -0.0022286, -0.0022114, -0.0022716, -0.0022635999999999997, -0.0023653999999999997, -0.0022175999999999997, -0.0023834, -0.0022221999999999997, -0.0022911999999999997, -0.00231, -0.0022459999999999997, -0.0023255999999999997, -0.0021584, -0.0024373999999999997], [-0.0020506, -0.0023041999999999997, -0.0023179999999999997, -0.002169, -0.0022854, -0.0022351999999999997, -0.0023103999999999998, -0.0020488, -0.0024089999999999997, -0.0021774, -0.0022605999999999998, -0.002348, -0.0021966, -0.0023967999999999997, -0.0021992, -0.0023384], [-0.0021379999999999997, -0.002241, -0.0021444, -0.0021601999999999997, -0.002333, -0.0022448, -0.0023312, -0.0021953999999999997, -0.0022166, -0.0023022, -0.0022408, -0.0021742, -0.0022258, -0.00238, -0.00

Let's have a look on a single sample:

In [58]:
train_features, train_labels = next(iter(train_loader))
train_features = train_features.unsqueeze(dim=1)
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

# img = train_features[0].squeeze()
# label = train_labels[0]
# fig, axis = plt.subplots()
# axis.imshow(img, cmap="gray")
# axis.set(title=f"Label: {label}", xticks=range(16), yticks=range(8))
# plt.show()


AttributeError: 'numpy.ndarray' object has no attribute 'iloc'

# Build the neural network

## CNN Model

In [None]:
# Two-layer convolution
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Quick build with sequence tools
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=2, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(2 * 4 * 32, 10)

    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = out.view(out.size(0), -1)  # reshape
        out = self.fc(out)
        return out

cnn = CNN()

## Loss and Optimizer

In [None]:
# Hyper Parameters
num_epochs = 5
batch_size = 100
learning_rate = 0.001

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

# Train the model

In [None]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        images = images.unsqueeze(dim=1)
        images = images.float()
        labels = Variable(labels)
        print(type(images), images)
        print(type(labels), labels)
        print("[ OK ] at this step") 

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = cnn(images)
        # loss = loss_func(outputs, labels)
        loss = loss_func(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, loss.item()))

# Test the model

In [None]:
cnn.eval()  # Change to test form, application scenarios such as: dropout
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images)
    images = images.unsqueeze(dim=1)
    images = images.float()
    labels = Variable(labels)

    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.data).sum()

print(' Test Accuracy: %d %%' % (100 * correct / total))

# Save the Trained Model
torch.save(cnn.state_dict(), 'cnn.pkl')