In [1]:
import pandas as pd
import torch
import numpy as np 
from sklearn.base import BaseEstimator, RegressorMixin
from random import randrange 
from sklearn.model_selection import KFold

In [2]:
df = pd.read_csv("dataset.csv", names=['x1', 'x2', 'x3', 'x4', 'x5', 'y'], index_col=False)
data = df[['x1', 'x2', 'x3', 'x4', 'x5']]
target = df[['y']]

X = torch.from_numpy(data.values).cpu()
Y = torch.from_numpy(target.values).cpu()

X /= X.norm(dim=0)
Y /= Y.norm()

print(X.shape, Y.shape)
print(X[0], Y[0])

torch.Size([1503, 5]) torch.Size([1503, 1])
tensor([-0.0258, -0.0108,  0.0169,  0.0122, -0.0061], dtype=torch.float64) tensor([0.0260], dtype=torch.float64)


In [5]:
class CrossValidationRegressor():
    
    def __init__(self, X, Y, iterations=1000, K=5):
        self.X = X
        self.Y = Y
        self.I = iterations
        self.K = K
        
    def fit(self, X, Y):
        W = torch.randn((X.shape[1], 1), dtype=torch.double, requires_grad=True)
        b = torch.randn(1, dtype=torch.double, requires_grad=True)
        optimizer = torch.optim.SGD([W], lr=0.001)
        for i in range(self.I):
          optimizer.zero_grad()
          term_1 = torch.sum(torch.pow((torch.matmul(X, W) + b) - Y, 2))
          term_2 = torch.sum(torch.pow(W, 2))
          cost = 0.5 * (term_1 + term_2)
          cost.backward()
          optimizer.step()
#           if i % 50 == 0:
#             print("Iteration: {}, Cost: {:.5f}".format(i, cost))
        
        return [W,b]
    
    def mse(self, X, Y, W, bias):
        pred = torch.matmul(X, W) + bias
        diff = pred - Y 
        return torch.sum(diff * diff) / diff.numel()
    
    def cross_validation_error(self):
        kf = KFold(n_splits=self.K)
        error_list = []
        for train_index, test_index in kf.split(self.X):
            X_train, X_test = self.X[train_index], X[test_index]
            Y_train, Y_test = self.Y[train_index], Y[test_index]
            W,b = self.fit(X_train,Y_train)
            error_list.append(self.mse(X_test, Y_test, W, b))
        return sum(error_list)/len(error_list)

In [6]:
model = CrossValidationRegressor(X,Y)
print("The Cross validation error for K=5 is : {} ".format(model.cross_validation_error()))

model = CrossValidationRegressor(X,Y, K=len(Y))
print("The Cross validation error for K=len(dataset) is : {} ".format(model.cross_validation_error()))

The Cross validation error for K=5 is : 1.0406776207084394 
The Cross validation error for K=len(dataset) is : 0.9745352007170768 
