In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Generate data
np.random.seed(42)
n_samples = 1000
X1 = np.random.randn(n_samples)
X2 = np.random.randn(n_samples) * 2
# Irrelevant features
X_noise1 = np.random.rand(n_samples) * 3
X_noise2 = np.random.rand(n_samples) * 2
X_noise3 = np.random.rand(n_samples) * 5
Y = 3 * X1 + 1.5 * X2 + np.random.randn(n_samples) * 0.5

In [3]:
data = pd.DataFrame({
    'X1': X1,
    'X2': X2,
    'Noise1': X_noise1,
    'Noise2': X_noise2,
    'Noise3': X_noise3,
    'Target': Y
})

In [4]:
# Prepare data for training and testing
features = data.drop('Target', axis=1).values
target = data['Target'].values

# Standardize features
scaler = StandardScaler()
features = scaler.fit_transform(features)

In [5]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42)

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [6]:
import torch.nn as nn
import torch.optim as optim

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(5, 3)  # 5 inputs to 3 hidden nodes
        self.fc2 = nn.Linear(3, 1)  # 3 hidden nodes to 1 output
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train_model(model, train_loader, 
                l1_strength=0.01, epochs=100):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    
    for epoch in range(epochs):
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            l1_penalty = sum(p.abs().sum() for p in model.parameters())
            total_loss = loss + l1_strength * l1_penalty
            total_loss.backward()
            optimizer.step()
    
    return model


In [7]:
# Create and train the model

# Model with L1 regularization
model = SimpleNN()
trained_model = train_model(
    model, train_loader, l1_strength=0.1)

# Model with no regularization (l1_strength is zero)
model2 = SimpleNN()
trained_model_noRegu = train_model(
    model2, train_loader, l1_strength=0.0)


In [8]:
# Check the weights
for name, param in trained_model.named_parameters():
    if "weight" in name:
        print(f"{name} weights: {param.data}")

fc1.weight weights: tensor([[-9.7806e-01, -6.1022e-01, -4.4671e-04,  6.5395e-04,  6.8184e-04],
        [ 1.2252e+00,  1.2414e+00, -1.8125e-03, -5.0232e-06,  1.4645e-03],
        [-6.6168e-01, -1.0257e+00, -8.8860e-04,  1.0574e-03,  6.5078e-04]])
fc2.weight weights: tensor([[-1.2673,  1.6169, -1.4445]])


In [9]:
# Check the weights
for name, param in trained_model_noRegu.named_parameters():
    if "weight" in name:
        print(f"{name} weights: {param.data}")

fc1.weight weights: tensor([[ 0.6880,  1.1571, -0.3001,  0.2020, -0.0301],
        [-1.0862, -1.2773, -0.0989,  0.0703,  0.0471],
        [ 1.1677,  0.6130,  0.1170, -0.0979,  0.0830]])
fc2.weight weights: tensor([[ 0.9468, -1.4518,  1.1074]])


In [10]:
########## Model evaluation using Test data #######

import torch.nn.functional as F

def evaluate_model(model, X_test, y_test):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Turn off gradients
        predictions = model(X_test)
        loss = F.mse_loss(predictions, y_test)  # Compute the mean squared error loss
    return loss.item()

In [11]:
# Evaluate both models
mse_l1 = evaluate_model(trained_model, X_test, y_test)
mse_no_regu = evaluate_model(trained_model_noRegu, X_test, y_test)

print(f"Mean Squared Error with L1 Regularization: {mse_l1}")
print(f"Mean Squared Error with No Regularization: {mse_no_regu}")

Mean Squared Error with L1 Regularization: 0.6119637489318848
Mean Squared Error with No Regularization: 0.5939650535583496
