In [24]:
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

## Подготовка данных

In [25]:
df = pd.read_csv('./housing.csv', header=None, sep=r'\s+')
df = df.astype(float)

scaler = StandardScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

np_features = df.values[:,:-1]
np_labels = df.values[:,-1]
features = torch.from_numpy(np_features).float()
labels = torch.from_numpy(np_labels).float()

In [26]:
dataset = TensorDataset(features, labels)
dataset_size = len(dataset)
train_size = int(0.9 * dataset_size)
test_size = dataset_size - train_size
lengths = [train_size, test_size]
torch.manual_seed(42) # for reproducibility
train_dataset, test_dataset = random_split(dataset, lengths)

print(f"Training set size: {len(train_dataset)}")
print(f"Test set size: {len(test_dataset)}")
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

Training set size: 455
Test set size: 51


## Обучение

In [27]:
model = torch.nn.Sequential(torch.nn.Linear(13, 1))
loss = torch.nn.MSELoss(reduction='mean')
trainer = torch.optim.SGD(model.parameters(), lr=0.001)

In [28]:
num_epochs = 100
for epoch in range(1, num_epochs + 1):
    for X, y in train_loader:
        trainer.zero_grad()
        y_hat = model(X).reshape(-1)
        # print(y_hat)
        # print(y_hat.shape)
        # print(y)
        # print(y.shape)
        l = loss(model(X).reshape(-1), y)
        l.backward()
        trainer.step()
    l = loss(model(features).reshape(-1), labels)
    if epoch % 5 == 0:
        print('epoch %d, loss: %f' % (epoch, l.item()),'|\tw', model[0].weight.data, '|\tb', model[0].bias.data)

epoch 5, loss: 0.466708 |	w tensor([[ 0.1357, -0.1787, -0.0847,  0.0171,  0.0131,  0.1426, -0.1699, -0.0134,
         -0.1008, -0.0807, -0.2347,  0.1779, -0.2859]]) |	b tensor([0.1293])
epoch 10, loss: 0.370720 |	w tensor([[ 0.1093, -0.1351, -0.0740,  0.0562,  0.0142,  0.2271, -0.1389, -0.0538,
         -0.0565, -0.0511, -0.2438,  0.1559, -0.3220]]) |	b tensor([0.0774])
epoch 15, loss: 0.329483 |	w tensor([[ 0.0824, -0.0990, -0.0701,  0.0754,  0.0044,  0.2798, -0.1177, -0.0827,
         -0.0282, -0.0359, -0.2453,  0.1437, -0.3452]]) |	b tensor([0.0453])
epoch 20, loss: 0.309661 |	w tensor([[ 0.0593, -0.0725, -0.0656,  0.0865, -0.0074,  0.3100, -0.0999, -0.1067,
         -0.0071, -0.0262, -0.2407,  0.1359, -0.3564]]) |	b tensor([0.0241])
epoch 25, loss: 0.298437 |	w tensor([[ 0.0391, -0.0504, -0.0633,  0.0903, -0.0211,  0.3277, -0.0862, -0.1267,
          0.0089, -0.0211, -0.2348,  0.1307, -0.3618]]) |	b tensor([0.0109])
epoch 30, loss: 0.291096 |	w tensor([[ 0.0218, -0.0307, -0.0622,  

## Проверка качества

In [48]:
errors = []
with torch.no_grad():
    for X, y in test_loader:
        errors_tensor = model(X).reshape(-1) - y
        se = np.square(errors_tensor.numpy())
        errors.extend(se)

sum_squared_errors = np.sum(errors)
mse = sum_squared_errors / len(errors)
mse

np.float32(0.30626726)