In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_percentage_error
from tqdm import tqdm

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
BATCH_SIZE = 64
EPOCHS = 3
VAL_PCT = 0.3

In [4]:
data_load = fetch_california_housing()
pd.DataFrame(data_load.data, columns=data_load.feature_names).head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [5]:
X = torch.Tensor(data_load.data)
y = torch.Tensor(data_load.target)

val_size = int(len(X)*VAL_PCT)

train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]

print('Train Records: ', len(train_X), ', Test Records: ', len(test_X) )

Train Records:  14448 , Test Records:  6192


In [6]:
class W_KNN(nn.Module):

    def __init__(self, EPSILON = .0001, N_NEIGHBORS = 5):
        super(NET, self).__init__()
        self.N_NEIGHBORS = N_NEIGHBORS
        self.EPSILON = EPSILON                         # to avoid division by 0
        self.weights = nn.Parameter(torch.ones(8))

    def forward(self, x1, x2, y2 ):
        '''Making predictions for x1.
        Choosing Neighbors from x2.
        Assigning target values from the neighbors using weighted average'''

        x1 = torch.matmul(x1, (self.weights * torch.eye(x1.shape[1]) ))
        x2 = torch.matmul(x2, (self.weights * torch.eye(x2.shape[1]) ))
        
        # pairwise distance
        dist_mat = torch.cdist(x1, x2)
        sorted_dist, indices = torch.sort(dist_mat)
        neighbors = indices[:, :self.N_NEIGHBORS]
        dist_inv = 1/(sorted_dist[:,:self.N_NEIGHBORS] + self.EPSILON)
        
        # to calculate weighted average. proportional to inverse of the distance
        weightages = dist_inv/ torch.matmul(torch.eye(x1.shape[0]) * dist_inv.sum(1), torch.ones_like(dist_inv))
        predictions = ((y2[neighbors]) * weightages).sum(1)
        
        return predictions
    
    def predict(self, test_X, train_X, train_Y ):
        '''Similar to forward pass.
        In addition to predictions this also returns nearest neighbor indexes.'''

        x1 = torch.matmul(test_X, (self.weights * torch.eye(test_X.shape[1]) ))
        x2 = torch.matmul(train_X, (self.weights * torch.eye(train_X.shape[1]) ))

        dist_mat = torch.cdist(x1, x2)
        sorted_dist, indices = torch.sort(dist_mat)
        neighbors = indices[:, :self.N_NEIGHBORS]
        dist_inv = 1/(sorted_dist[:,:self.N_NEIGHBORS] + self.EPSILON)
        weightages = dist_inv/ torch.matmul(torch.eye(x1.shape[0]) * dist_inv.sum(1), torch.ones_like(dist_inv))
        predictions = ((train_Y[neighbors]) * weightages).sum(1)
        
        return predictions.detach(), neighbors

In [7]:
model = W_KNN()
predictions, comparables = model.predict(test_X, train_X, train_y)
mean_absolute_percentage_error(test_y, predictions)     # MAPE without optimal weights

0.4784127

In [8]:
loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=.01)

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
        
        x1_index = list(range(i, i+BATCH_SIZE))
        x2_index = [i for i in range(len(train_X)) if i not in x1_index ]

        batch_x1 = train_X[i : i+BATCH_SIZE]
        batch_x2 = train_X[x2_index]

        batch_y1 = train_y[i : i+BATCH_SIZE]
        batch_y2 = train_y[x2_index]

        pred = model(batch_x1,batch_x2,batch_y2)

        loss = loss_func(pred, batch_y1)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()


100%|█████████████████████████████████████████| 226/226 [00:25<00:00,  8.75it/s]
100%|█████████████████████████████████████████| 226/226 [00:26<00:00,  8.47it/s]
100%|█████████████████████████████████████████| 226/226 [00:26<00:00,  8.50it/s]


In [9]:
pd.DataFrame(model.weights.detach().numpy(), data_load.feature_names, columns = ['weight'])

Unnamed: 0,weight
MedInc,1.420884
HouseAge,0.038708
AveRooms,0.485311
AveBedrms,2.004348
Population,0.025391
AveOccup,2.27451
Latitude,1.697351
Longitude,2.41542


In [10]:
predictions, comparables = model.predict(test_X, train_X, train_y)
mean_absolute_percentage_error(test_y, predictions)           # MAPE with improved weights

0.28552243