In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# data               
device = 'cuda' if torch.cuda.is_available() else 'cpu'

np.random.seed(42)
sz = 100
x = np.random.rand(sz, 1)
y = 1 + 2 * x + 0.1 * np.random.randn(sz, 1)
idx = np.arange(sz)
np.random.shuffle(idx)
sz80 = (int)(sz * 0.8)
train_idx = idx[:sz80]
val_idx = idx[sz80:]
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]
x_train_tensor = torch.from_numpy(x_train).float().to(device)
y_train_tensor = torch.from_numpy(y_train).float().to(device)
x_val_tensor = torch.from_numpy(x_val).float().to(device)
y_val_tensor = torch.from_numpy(y_val).float().to(device)

class PolynomialModel(nn.Module):
    def __init__(self, degree):
        super(PolynomialModel, self).__init__()
        self.coefficients = nn.ParameterList([nn.Parameter(torch.randn(1, requires_grad=True, device=device)) for _ in range(degree + 1)])
    
    def forward(self, x):
        yhat = sum(coeff * (x ** i) for i, coeff in enumerate(self.coefficients))
        return yhat

degree = 10
model = PolynomialModel(degree).to(device)

lr = 0.01
n_epochs = 10000

loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=lr)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

def make_train_step(model, loss_fn, optimizer):
    def train_step(x, y):
        model.train()
        yhat = model(x)
        loss = loss_fn(yhat, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return loss.item()
    return train_step

train_step = make_train_step(model, loss_fn, optimizer)

for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        loss = train_step(x_batch, y_batch)
    
    if epoch % 1000 == 0:
        with torch.no_grad():
            val_loss = 0
            for x_val_batch, y_val_batch in val_loader:
                x_val_batch = x_val_batch.to(device)
                y_val_batch = y_val_batch.to(device)
                model.eval()
                yhat = model(x_val_batch)
                val_loss += loss_fn(yhat, y_val_batch).item()
            val_loss /= len(val_loader)
        print(f'Epoch {epoch}/{n_epochs}, Training Loss: {loss:.4f}, Validation Loss: {val_loss:.4f}')

for i, coeff in enumerate(model.coefficients):
    print(f'Coefficient for x^{i}: {coeff.item()}')

Epoch 0/10000, Training Loss: 2.8670, Validation Loss: 2.5025
Epoch 1000/10000, Training Loss: 0.0044, Validation Loss: 0.0079
Epoch 2000/10000, Training Loss: 0.0097, Validation Loss: 0.0082
Epoch 3000/10000, Training Loss: 0.0054, Validation Loss: 0.0081
Epoch 4000/10000, Training Loss: 0.0088, Validation Loss: 0.0084
Epoch 5000/10000, Training Loss: 0.0107, Validation Loss: 0.0084
Epoch 6000/10000, Training Loss: 0.0034, Validation Loss: 0.0083
Epoch 7000/10000, Training Loss: 0.0085, Validation Loss: 0.0085
Epoch 8000/10000, Training Loss: 0.0117, Validation Loss: 0.0085
Epoch 9000/10000, Training Loss: 0.0070, Validation Loss: 0.0085
Coefficient for x^0: 1.1052765846252441
Coefficient for x^1: 1.2511405944824219
Coefficient for x^2: 0.8106275200843811
Coefficient for x^3: 1.0923269987106323
Coefficient for x^4: -1.5596163272857666
Coefficient for x^5: 1.229148268699646
Coefficient for x^6: -0.5366079807281494
Coefficient for x^7: -1.5857013463974
Coefficient for x^8: -0.6227945685