# Training and validating a Linear Regression Model using PyTorch

In [None]:
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [70]:
class LinearRegressionDataset:
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets
    
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        current_sample = self.data[idx, :]
        current_target = self.targets[idx]
        return {
            "x": torch.tensor(current_sample, dtype=torch.float),
            "y": torch.tensor(current_target, dtype=torch.long)
        }

In [71]:
# generate classification data from sklearn datasets
data, targets = make_classification(n_samples=1000)
train_data, test_data, train_targets, test_targets = train_test_split(
    data, 
    targets, 
    stratify=targets)

In [72]:
train_dataset = LinearRegressionDataset(train_data, train_targets)
test_dataset = LinearRegressionDataset(test_data, test_targets)


In [74]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4)

In [75]:
# model
model = lambda x, w, b: torch.matmul(x, w) + b

In [76]:
W = torch.randn(20, 1, requires_grad=True)
b = torch.randn(1, requires_grad=True)
learning_rate = 0.001

In [77]:
for epoch in range(100):
    epoch_loss = 0
    counter = 0
    for data in train_loader:
        xtrain, ytrain = data["x"], data['y']

        output = model(xtrain, W, b)
        loss = torch.mean((output.view(-1) - ytrain.view(-1)) ** 2)
        epoch_loss = epoch_loss + loss.item()
        loss.backward()

        with torch.no_grad():
            W = W - learning_rate * W.grad
            b = b - learning_rate * b.grad
        
        W.requires_grad_(True)
        b.requires_grad_(True)
        counter += 1
    
    print(f"Epoch {epoch} \t Loss: {epoch_loss/counter}")

Epoch 0 	 Loss: 21.627449687808117
Epoch 1 	 Loss: 7.610697101246803
Epoch 2 	 Loss: 3.330258601681983
Epoch 3 	 Loss: 1.5504278341268605
Epoch 4 	 Loss: 0.7634357835384126
Epoch 5 	 Loss: 0.4071569365568142
Epoch 6 	 Loss: 0.24314670641460714
Epoch 7 	 Loss: 0.1665539096017923
Epoch 8 	 Loss: 0.13031198887124737
Epoch 9 	 Loss: 0.11294939030104867
Epoch 10 	 Loss: 0.10453257885169574
Epoch 11 	 Loss: 0.10040629567935112
Epoch 12 	 Loss: 0.09836203621779667
Epoch 13 	 Loss: 0.09733950343240608
Epoch 14 	 Loss: 0.09682374483499517
Epoch 15 	 Loss: 0.09656184586012696
Epoch 16 	 Loss: 0.09642826302591315
Epoch 17 	 Loss: 0.0963600208546887
Epoch 18 	 Loss: 0.09632525010594226
Epoch 19 	 Loss: 0.09630766951731663
Epoch 20 	 Loss: 0.09629895197886458
Epoch 21 	 Loss: 0.0962947747989637
Epoch 22 	 Loss: 0.09629289332673231
Epoch 23 	 Loss: 0.09629216769145445
Epoch 24 	 Loss: 0.09629198676222896
Epoch 25 	 Loss: 0.09629205060984067
Epoch 26 	 Loss: 0.09629222313734762
Epoch 27 	 Loss: 0.096

In [78]:
outputs = []
labels = []

with torch.no_grad():
    for data in test_loader:
        xtest, ytest = data["x"], data["y"]

        output = model(xtest, W, b)
        labels.append(ytest)
        outputs.append(output)


In [79]:
# check the model performance
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.968192

# Training and Validation in PyTorch

In [2]:
def train_one_step(model, data, optimizer):
    optimizer.zero_grad()
    for k, v in data.items():
        data[k] = v.to("cuda")
    loss = model(**data)
    # loss = model(data["x"], data["y"])
    loss.backward()
    optimizer.step()
    return loss


In [3]:
def train_one_epoch(model, data_loader, optimizer, scheduler):
    model.train()
    total_loss = 0
    for batch_index, data in enumerate(data_loader):
        loss = train_one_step(model, data, optimizer)
        scheduler.step()
        total_loss += loss


In [6]:
def validate_one_step(model, data):
    for k, v in data.items():
        data[k] = v.to("cuda")
    loss = model(**data)
    return loss

In [7]:
def validate_one_epoch(model, data_loader):
    model.eval()
    total_loss = 0
    for batch_index, data in enumerate(data_loader):
        with torch.no_grad():
            loss = validate_one_step(model, data)
        total_loss += loss