In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

project_dir = os.path.dirname(os.path.dirname(os.getcwd()))
if project_dir not in sys.path:
    sys.path.append(project_dir)

from utils.data import load_data

# Prepare Data

In [4]:
os.chdir(project_dir)
train_loader, val_loader, test_loader = load_data()
# change dir back
os.chdir(os.path.join(project_dir, 'baselines/IDW'))

File1: 3BAGEmnnQ2K4zF49Dkkoxg.csv contains missing hours
File4: 4XEJFVFOS761cvyEjOYf0g.csv contains outliers
File5: 6kzhfU9xTKCUVJMz492l2g.csv contains outliers
File6: 6nBLCf6WT06TOuUExPkBtA.csv contains missing hours
File17: JQ1px-xqQx-xKh3Oa5h9nA.csv contains missing hours
File21: OfAvTbS1SiOjQo4WKSAP9g.csv contains missing hours
File24: R2ebpAblQHylOjteA-2hlQ.csv contains missing hours
File37: jDYxIP2JQL2br5aTIAR7JQ.csv contains outliers
File38: kyRUtBOTTaK7V_-dxOJTwg.csv contains outliers
File45: wSo2iRgjT36eWC4a2joWZg.csv contains outliers


# Interpolation Algorithm

In [6]:
class IDW:
    def __init__(self, X, Y):
        """
        X: (n, d), n is the number of samples, d is the dimension of feature vectors
        Y: (n, ), n is the number of samples
        """
        self.X = X
        self.Y = Y

    def predict(self, X_test, p=2):
        """
        X_test: (m, d), m is the number of test samples, d is the dimension of feature vectors
        p: the power of distance
        """
        # construct distance matrix
        dist_matrix = np.zeros((X_test.shape[0], self.X.shape[0]))
        for i in range(X_test.shape[0]):
            for j in range(self.X.shape[0]):
                dist = np.linalg.norm(X_test[i] - self.X[j])
                dist_matrix[i, j] = dist
        
        # construct weight matrix
        weight_matrix = 1 / np.power(dist_matrix, p)

        # normalize weight matrix
        weight_matrix = weight_matrix / np.sum(weight_matrix, axis=1, keepdims=True)
        self.weight_matrix = weight_matrix

        # predict
        Y_pred = np.matmul(weight_matrix, self.Y)
        return Y_pred

In [8]:
test_loader.dataset.readings.shape

torch.Size([2928, 41])

In [18]:
X_m = test_loader.dataset.locations[test_loader.dataset.train_idx, :]
Y_m = test_loader.dataset.readings[:, test_loader.dataset.train_idx]
print(X_m.shape, Y_m.shape)

torch.Size([28, 2]) torch.Size([2928, 28])


In [19]:
X_u = test_loader.dataset.locations[test_loader.dataset.test_idx, :]
Y_u = test_loader.dataset.readings[:, test_loader.dataset.test_idx]
print(X_u.shape, Y_u.shape)

torch.Size([9, 2]) torch.Size([2928, 9])


In [26]:
n_steps = Y_m.shape[0]

Y_true = []
Y_pred = []
for i in range(n_steps):
    idw = IDW(X_m, Y_m[i])
    target = Y_u[i]
    pred = idw.predict(X_u)
    Y_true.append(target)
    Y_pred.append(pred)

Y_true = np.concatenate(Y_true, axis=0)
Y_pred = np.concatenate(Y_pred, axis=0)

rmse = np.sqrt(np.mean((Y_true - Y_pred) ** 2))
cvrmse = rmse / np.mean(Y_true)
mae = np.mean(np.abs(Y_true - Y_pred))
r2 = 1 - np.sum((Y_true - Y_pred) ** 2) / np.sum((Y_true - np.mean(Y_true)) ** 2)
print('RMSE: {:.4f}'.format(rmse))
print('CVRMSE: {:.4f}'.format(cvrmse))
print('MAE: {:.4f}'.format(mae))
print('R2: {:.4f}'.format(r2))

RMSE: 5.8276
CVRMSE: 0.3688
MAE: 3.0519
R2: 0.7285
