In [77]:
import pandas as pd
import numpy as np
import torch
if torch.__version__[0] == "2":
    from torch import mps
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
from sklearn.metrics import mean_squared_error


data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
targets = raw_df.values[1::2, 2]
feature_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM',
                 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
print(data.shape)
print(targets.shape)
print(data[0])
print(targets[0])



(506, 13)
(506,)
[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00]
24.0


In [78]:
class BostonDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        data = self.data[idx]
        target = self.targets[idx]
        return data, target


In [79]:
alpha = 0.05
class Regression(torch.nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.weight = torch.nn.Parameter(torch.zeros(in_features, 1))
        self.bias = torch.nn.Parameter(torch.zeros(1))
        torch.nn.init.xavier_uniform_(self.weight)
        torch.nn.init.constant_(self.bias, 0.1)

    def forward(self, x):
        x=x.float()
        return torch.matmul(x, self.weight) + self.bias

    def getWeight(self):
        return torch.cat([self.weight, self.bias.unsqueeze(0)], dim=0)


def simpleRegressionLoss(target, predict, matrix):
    return torch.mean(0.5*(target-predict)**2)


def lassoRegressionLoss(target, predict, matrix):
    return torch.mean(0.5*(target-predict)**2 + alpha*torch.abs(torch.linalg.norm(matrix)))


def ridgeRegressionLoss(target, predict, matrix):
    return torch.mean(0.5*(target-predict)**2 + alpha*torch.linalg.norm(matrix)**2)


In [80]:
def train(model, train_loader, optimizer, epoch, lossFunc, device=None):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if device is not None:
            data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = lossFunc(output, target, model.getWeight())
        loss.backward()
        optimizer.step()
        if(batch_idx+1) % 5 == 0 or batch_idx == 13:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(model, test_loader, lossFunc, device=None):
    model.eval()
    test_loss = 0

    y_pred=[]
    y_true=[]
    with torch.no_grad():
        for data, target in test_loader:
            if device is not None:
                data, target = data.to(device), target.to(device)
            output = model(data)
            print(output)
            y_pred.append(output.item())
            y_true.append(target)
            test_loss += lossFunc(output, target, model.getWeight()).item()
            pred = output.max(1, keepdim=True)[1]  # 找到概率最大的下标


    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}'.format(test_loss))
    print(mean_squared_error(y_true, y_pred))



# Main

In [81]:
bostonDataset=BostonDataset(data,targets)
train_size = int(0.8 * len(bostonDataset))
test_size = len(bostonDataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(bostonDataset, [train_size, test_size])
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=64,shuffle=True)
test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=64,shuffle=True)

Normal regression

In [82]:
model=Regression(13)
optimizer=torch.optim.Adam(model.parameters(),lr=0.01)
for epoch in range(1, 100):
    train(model, train_loader, optimizer, epoch, simpleRegressionLoss)
test(model, test_loader, simpleRegressionLoss)




RuntimeError: a Tensor with 64 elements cannot be converted to Scalar