In [1]:
import os
import numpy as np
import json
import scipy.io as sio
from sklearn.preprocessing import MultiLabelBinarizer
import pickle

In [2]:
import pandas as pd
from tqdm import tqdm

# for reading and displaying images
import matplotlib.pyplot as plt

# for creating validation set
from sklearn.model_selection import train_test_split
# for evaluating the model
from sklearn.metrics import accuracy_score


# PyTorch libraries and modules
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import *
import h5py

Import data

In [None]:
def importDataFromMatFiles(rootDir):
    """
    Param: rootDir: The parent directory to the directories that start with Copy ....
                    For example, in this case, rootDir = "/Users/jameshe/Documents/radar_ura/FirstBatch"
    """
    xList = list()
    yList = list()
    mlb = MultiLabelBinarizer()
    for f in os.scandir(rootDir):
        if f.is_dir():
            with open(os.path.join(f.path, "test_data.json")) as labelFile:
                labels = json.load(labelFile)
                occupancyLabel = labels["Occupied_Seats"]
            for file in os.scandir(os.path.join(f.path, "SavedVars_RF_Data")):
                frame = sio.loadmat(file)
                image = frame["Image"]
                mask = frame["Mask"]
                image[mask == False] = 0
                image = image.reshape((29,29,64,1))
                xList.append(image)
                yList.append(occupancyLabel)
    yList = mlb.fit_transform(yList)
    xList = np.array(xList)
    xList = np.absolute(xList)
    return (xList, yList)

In [None]:
def saveData(data, path, fileName):
    """
    open file mode w: write, if the file exist, erase it
    open file mode b: open the file as a binary file
    """
    filePath = os.path.join(path, fileName)
    with open(filePath,'wb') as pickleFileHandle:
        pickle.dump(data, pickleFileHandle)
        pickleFileHandle.close()

In [3]:
def loadData(path, fileName):
    """
    open file mode b: open the file as a binary file
    open file mode r: read file
    """
    filePath = os.path.join(path, fileName)
    with open(filePath, 'rb') as pickleFileHandle:
        data = pickle.load(pickleFileHandle)
        return data

In [6]:
x, y = importDataFromMatFiles("/Users/jameshe/Documents/radar_ura/FirstBatch")

In [7]:
saveData(x,"./","xData.pickle")
saveData(y,"./","yData.pickle")

In [4]:
x = loadData("./","xData.pickle")
y = loadData("./","yData.pickle")

In [5]:
print(x.shape)
print(y.shape)

(5989, 29, 29, 64, 1)
(5989, 5)


In [6]:
#https://towardsdatascience.com/pytorch-step-by-step-implementation-3d-convolution-neural-network-8bf38c70e8b3
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train).float()
x_test = torch.from_numpy(x_test).float()
y_test = torch.from_numpy(y_test).float()
#x_train.shape == 4192, 131 * 2^5 = 4192
batch_size = 131 #We pick beforehand a batch_size that we will use for the training


# Pytorch train and test sets
train = torch.utils.data.TensorDataset(x_train,y_train)
test = torch.utils.data.TensorDataset(x_test,y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)


In [7]:
x_test.shape

torch.Size([1797, 29, 29, 64, 1])

In [8]:
num_classes = 5

# Create CNN Model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.conv_layer1 = self._conv_layer_set(1, 32)
        self.conv_layer2 = self._conv_layer_set(32, 64)
        self.fc1 = nn.Linear(22400, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.relu = nn.LeakyReLU()
        self.batch=nn.BatchNorm1d(128)
        self.drop=nn.Dropout(p=0.15) 
        self.sigmoid = nn.Sigmoid()               
        
    def _conv_layer_set(self, in_c, out_c):
        conv_layer = nn.Sequential(
        nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
        nn.LeakyReLU(),
        nn.MaxPool3d((2, 2, 2)),
        )
        return conv_layer
    

    def forward(self, x):
        # Set 1
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = out.view(out.size(0), -1) #Flatten it out
        out = self.fc1(out)
        out = self.relu(out)
        out = self.batch(out)
        out = self.drop(out)
        out = self.fc2(out)
        #Apply sigmoid activation for the output layer
        out = self.sigmoid(out)

        return out

#Definition of hyperparameters
n_iters = 4500
num_epochs = n_iters / (len(x_train) / batch_size)
num_epochs = int(num_epochs)

# Create CNN
model = CNNModel()
#model.cuda()
print(model)

# Binary Cross Entropy Loss for MultiLabel Classfication
error = nn.BCELoss()

# SGD Optimizer
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

CNNModel(
  (conv_layer1): Sequential(
    (0): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (conv_layer2): Sequential(
    (0): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=22400, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=5, bias=True)
  (relu): LeakyReLU(negative_slope=0.01)
  (batch): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drop): Dropout(p=0.15, inplace=False)
  (sigmoid): Sigmoid()
)


In [9]:
# CNN model training
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        train = Variable(images.view(batch_size,1, 29 ,29 ,64))
        labels = Variable(labels)
        # Clear gradients
        optimizer.zero_grad()
        # Forward propagation
        outputs = model(train)
        # Calculate softmax and ross entropy loss
        loss = error(outputs, labels)
        # Calculating gradients
        loss.backward()
        # Update parameters
        optimizer.step()
        
        count += 1
        #32 batches
        if count % 32 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                
                test = Variable(images.view(len(images), 1, 29, 29 ,64))
                # Forward propagation
                outputs = model(test)
                # Get predictions from the maximum value
                predicted = outputs
                predicted[predicted < 0.5] = 0
                predicted[predicted > 0.5] = 1
               
                # Total number of labels
                total += (len(labels) * num_classes)
                correct += (predicted == labels).sum().item()

            accuracy = 100 * correct / float(total)
            # store loss and iteration
            loss_list.append(loss.data)
            iteration_list.append(count)
            accuracy_list.append(accuracy)
            # Print Loss
            print('Iteration: {}  Loss: {}  Accuracy: {} %'.format(count, loss.data, accuracy))

Iteration: 32  Loss: 0.676860511302948  Accuracy: 74.10127991096272 %
Iteration: 64  Loss: 0.6713660955429077  Accuracy: 80.82359488035615 %
Iteration: 96  Loss: 0.6612071990966797  Accuracy: 84.00667779632721 %
Iteration: 128  Loss: 0.6583976745605469  Accuracy: 85.66499721758487 %
Iteration: 160  Loss: 0.6525956988334656  Accuracy: 86.56649972175849 %
Iteration: 192  Loss: 0.637985110282898  Accuracy: 87.69059543683917 %
Iteration: 224  Loss: 0.6326025128364563  Accuracy: 88.4028937117418 %
Iteration: 256  Loss: 0.6159455180168152  Accuracy: 89.04841402337229 %
Iteration: 288  Loss: 0.6092703342437744  Accuracy: 89.18196994991652 %
Iteration: 320  Loss: 0.5927468538284302  Accuracy: 89.13745130773512 %
Iteration: 352  Loss: 0.5840119123458862  Accuracy: 88.72565386755704 %
Iteration: 384  Loss: 0.5827195048332214  Accuracy: 88.70339454646633 %
Iteration: 416  Loss: 0.5613179802894592  Accuracy: 88.97050639955481 %
Iteration: 448  Loss: 0.5364841222763062  Accuracy: 88.52531997774068 

KeyboardInterrupt: 

In [None]:
arr = np.array([[0,0,0],
                [1,1,0]])
x = torch.tensor(arr)

arr_label = np.array([[1,0,0],
                [0,1,0]])
x_label = torch.tensor(arr_label)

In [12]:
print(x)
print(x_label)

tensor([[0, 0, 0],
        [1, 1, 0]])
tensor([[1, 0, 0],
        [0, 1, 0]])


In [14]:
(x == x_label).sum()

tensor(4)

In [15]:
x.

ValueError: only one element tensors can be converted to Python scalars