In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
#Data loading

class AirMeteoroPatientDataset(Dataset):

    def __init__(self):
        xy = pd.read_csv('Sangsad All year(generated, processed).csv')
        self.len = xy.shape[0]
        self.X_data = torch.from_numpy(xy.iloc[:,1:12].values)
        self.y_data = torch.from_numpy(xy.iloc[:,12:13].values)

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [4]:
# Dataset creation and normalization

dataset = AirMeteoroPatientDataset()
dataset_inp = dataset.X_data
means = torch.mean(dataset_inp, dim=0, keepdim=True) # Computing mean across the data points. This results in a ndarray of size [1, 10]
std = torch.std(dataset_inp, dim=0, keepdim=True)   # Same as before

dataset.X_data = (dataset_inp - means) / std

In [5]:
#Dataset split

train_size = round(len(dataset) * 0.8)
test_size = len(dataset) - train_size

train_set, test_set = torch.utils.data.random_split(dataset,[train_size, test_size])

In [6]:
X_train = torch.empty(0, 11).float().cuda()
y_train = torch.empty(0,1).float().cuda()
for i, (X_data, y_data) in enumerate(train_set):
    X_data = X_data.float().reshape(1,11).cuda()
    X_train = torch.cat((X_train, X_data), 0)
    
    y_data = y_data.float().reshape(1,1).cuda()
    y_train = torch.cat((y_train, y_data), 0)

In [7]:
X_test = torch.empty(0, 11).float().cuda()
y_test = torch.empty(0,1).float().cuda()
for i, (X_data, y_data) in enumerate(test_set):
    X_data = X_data.float().reshape(1,11).cuda()
    X_test = torch.cat((X_test, X_data), 0)
    
    y_data = y_data.float().reshape(1,1).cuda()
    y_test = torch.cat((y_test, y_data), 0)

In [8]:
X_train = np.array(X_train.cpu())
y_train = np.array(y_train.cpu())
xy = np.column_stack((X_train, y_train)) 
df_train = pd.DataFrame(xy)
df_train.columns = ["0", "1", "2", "3","4","5","6","7","8","9","10","11"]

In [9]:
X_test = np.array(X_test.cpu())
y_test = np.array(y_test.cpu())
xy = np.column_stack((X_test, y_test)) 
df_test = pd.DataFrame(xy)
df_test.columns = ["0", "1", "2", "3","4","5","6","7","8","9","10","11"]

In [10]:
from sklearn import linear_model

In [11]:
reg = linear_model.LinearRegression()

In [12]:
reg.fit(df_train[["0", "1", "2", "3","4","5","6","7","8","9","10"]], df_train[["11"]])

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [13]:
outputs = reg.predict(df_test[["0", "1", "2", "3","4","5","6","7","8","9","10"]])

In [14]:
outputs = torch.from_numpy(outputs)
y_test = torch.from_numpy(y_test)

In [15]:
loss_MSE = nn.MSELoss()
loss_MAE = nn.L1Loss()
loss_huber = nn.SmoothL1Loss()
MSEloss = loss_MSE(outputs, y_test)
MAEloss = loss_MAE(outputs, y_test)
Huberloss = loss_huber(outputs, y_test)
print('Total MSE loss', MSEloss.item())
print('Total MAE loss', MAEloss.item())
print('Total Huber loss', Huberloss.item())

Total MSE loss 185.2227783203125
Total MAE loss 10.567689895629883
Total Huber loss 10.073370933532715
