In [1]:
import torch, json
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
with open('Sample_Data.json') as f:
    d = json.load(f)

In [3]:
# extract data points into arrays
#timestamps   = []; ## we may not need this for now
gt_locations = [];
inp_rss_vals = [];
for datapoint in d["data"]:
    #timestamps.append(datapoint["timestamp"])
    gt_locations.append( [ float(datapoint["location"]["loc_x"]),
                           float(datapoint["location"]["loc_y"]) ]) # keeping x and y together in 2x1 vector

    inp_rss_vals.append( [ float(datapoint["signal_strength"]["RSS_1"]) ,
                           float(datapoint["signal_strength"]["RSS_2"]) ,
                           float(datapoint["signal_strength"]["RSS_3"]) ] )

gt_locations = np.asarray(gt_locations)
inp_rss_vals = np.asarray(inp_rss_vals)

In [4]:
print(gt_locations.shape)
print(inp_rss_vals.shape)

(2, 2)
(2, 3)


In [5]:
number_of_training_iters = 401;
batch_size = 1; # this is =1 because we have just one sample, when we have hundreds of samples we can use larger batch sizes

In [6]:
# traditionally, inputs are called x and outputs are called y, I'm just doing dummy assignments here to keep the tradition going
x_train = inp_rss_vals[0:1,:] # we have 2 data points, let's use 1 for training and 1 for testing
x_test  = inp_rss_vals[1:2,:]
y_train = gt_locations[0:1,:]
y_test  = gt_locations[1:2,:]

In [7]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(1, 3) (1, 2) (1, 3) (1, 2)


In [8]:
# we need to convert the types of these arrays from numpy to torch and create iterators called "dataloaders"
tensor_x_train = torch.tensor(x_train).float()
tensor_y_train = torch.tensor(y_train).float()
tensor_x_test = torch.tensor(x_test).float()
tensor_y_test = torch.tensor(y_test).float()

# these dataloaders take the whole dataset, randomize their indices if shuffle=True, and then efficiently
# load them during training (it's inefficient to loop through data points and choosing random ones in python ourselves)
datasets = torch.utils.data.TensorDataset(tensor_x_train, tensor_y_train)
train_iter = torch.utils.data.DataLoader(datasets, batch_size=batch_size, shuffle=True)
datasets = torch.utils.data.TensorDataset(tensor_x_test, tensor_y_test)
test_iter = torch.utils.data.DataLoader(datasets, batch_size=y_test.shape[0], shuffle=False)

In [9]:
# this defines the model (it's a simple MLP)
class mdl(nn.Module):
    def __init__(self):
        super(mdl, self).__init__()
        self.input_layer    = nn.Linear(3, 16)
        self.hidden_layer1  = nn.Linear(16, 32)
        self.hidden_layer2  = nn.Linear(32, 20)
        self.output_layer   = nn.Linear(20, 2)
        self.activation_fcn = nn.ReLU()
    def forward(self, x):
        x = self.activation_fcn(self.input_layer(x))
        x = self.activation_fcn(self.hidden_layer1(x))
        x = self.activation_fcn(self.hidden_layer2(x))
        x = self.output_layer(x)
        return x

In [10]:
model = mdl()
model.train()

# we optimize for minimizing MSE loss, with an Adam optimizer (a certain flavor of the gradient descent optimizer)
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [11]:
for i in range(number_of_training_iters):
    running_loss = 0.0
    for inputs, labels in train_iter:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        #print(loss)
        optimizer.step()
        running_loss += loss.item()
    if i % 20 == 0:
        print('Epoch [%d]/[%d] running accumulative loss across all batches: %.3f' %
              (i + 1, number_of_training_iters, running_loss))

Epoch [1]/[401] running accumulative loss across all batches: 110813.562
Epoch [21]/[401] running accumulative loss across all batches: 105717.000
Epoch [41]/[401] running accumulative loss across all batches: 94335.500
Epoch [61]/[401] running accumulative loss across all batches: 72227.641
Epoch [81]/[401] running accumulative loss across all batches: 38858.031
Epoch [101]/[401] running accumulative loss across all batches: 11136.074
Epoch [121]/[401] running accumulative loss across all batches: 1157.345
Epoch [141]/[401] running accumulative loss across all batches: 29.860
Epoch [161]/[401] running accumulative loss across all batches: 17.387
Epoch [181]/[401] running accumulative loss across all batches: 0.929
Epoch [201]/[401] running accumulative loss across all batches: 0.213
Epoch [221]/[401] running accumulative loss across all batches: 0.020
Epoch [241]/[401] running accumulative loss across all batches: 0.001
Epoch [261]/[401] running accumulative loss across all batches: 0

loss quickly went to 0 because we optimized the model for a single data point, it memorized that data point

In [12]:
predicted_locations_trainset = model(tensor_x_train)
print(predicted_locations_trainset)

tensor([[123.0000, 456.0000]], grad_fn=<AddmmBackward0>)


In [13]:
print(gt_locations)

[[123. 456.]
 [234. 567.]]


perfect output! Since the model memorized this though, it will possibly not do this well on the test set, let's see this:

In [14]:
predicted_locations_test = model(tensor_x_test)
print(predicted_locations_test)

tensor([[126.7971, 470.1260]], grad_fn=<AddmmBackward0>)


see how it's still thinking the location is something like 123 by 456? This is overfitting, and it's expected because we have a single data point!

this will (hopefully) not happen in our larger dataset