# Problem: Write a custom Dataset and Dataloader to load from a CSV file

### Problem Statement
You are tasked with creating a **custom Dataset** and **Dataloader** in PyTorch to load data from a given `data.csv` file. The loaded data will be used to run a pre-implemented linear regression model.

### Requirements
1. **Dataset Class**:
   - Implement a class `CustomDataset` that:
     - Reads data from a provided `data.csv` file.
     - Stores the features (X) and target values (Y) separately.
     - Implements PyTorch's `__len__` and `__getitem__` methods for indexing.

2. **Dataloader**:
   - Use PyTorch's `DataLoader` to create an iterable for batch loading the dataset.
   - Support user-defined batch sizes and shuffling of the data.

In [1]:
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import torch

In [2]:
torch.manual_seed(42)
X = torch.rand(100, 1) * 10  # 100 data points between 0 and 10
y = 2 * X + 3 + torch.randn(100, 1)  # Linear relationship with noise

# Save the generated data to data.csv
data = torch.cat((X, y), dim=1)
df = pd.DataFrame(data.numpy(), columns=["X", "y"])
df.to_csv("data.csv", index=False)

In [None]:
class LinearRegressionDataset(Dataset):
    """A dataset for linear regression task."""

    def __init__(self, file_path: str):
        """Initializes a LinearRegressionDataset instance.

        Args:
            file_path (str): File path of the dataset.
        """
        self.data = pd.read_csv(file_path)
        self.features = torch.tensor(
            self.data["X"].values, dtype=torch.float32
        ).unsqueeze(1)
        self.values = torch.tensor(
            self.data["y"].values, dtype=torch.float32
        ).unsqueeze(1)

    def __len__(self):
        """Returns the number of data points.

        Returns:
            Number of data points.
        """
        return len(self.values)

    def __getitem__(self, idx: int):
        """Gets an item from dataset located at a given index.

        Args:
            idx (int): Index of the data point.

        Returns:
            A tuple containing features and values at the given idx.
        """
        return self.features[idx], self.values[idx]


# Example usage of the DataLoader
dataset = LinearRegressionDataset("data.csv")
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [4]:
# Define the Linear Regression Model
class LinearRegressionModel(nn.Module):
    """A simple linear regression model."""

    def __init__(self):
        """Initializes a LinearRegressionModel instance."""
        super(LinearRegressionModel, self).__init__()
        self.linear = torch.nn.Linear(1, 1)

    def forward(self, x):
        """Forward pass.

        Args:
            x: Input value.
        """
        return self.linear(x)


In [5]:
# Initialize the model, loss function, and optimizer
model = LinearRegressionModel()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [6]:
# Training loop
epochs = 1000
for epoch in range(epochs):
    for batch_X, batch_y in dataloader:
        # Forward pass
        predictions = model(batch_X)
        loss = criterion(predictions, batch_y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Log progress every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [100/1000], Loss: 1.5655
Epoch [200/1000], Loss: 0.4624
Epoch [300/1000], Loss: 1.4614
Epoch [400/1000], Loss: 0.3983
Epoch [500/1000], Loss: 0.5415
Epoch [600/1000], Loss: 1.6767
Epoch [700/1000], Loss: 1.0075
Epoch [800/1000], Loss: 0.3245
Epoch [900/1000], Loss: 0.6541
Epoch [1000/1000], Loss: 1.6099


In [7]:
# Display the learned parameters
[w, b] = model.linear.parameters()
print(f"Learned weight: {w.item():.4f}, Learned bias: {b.item():.4f}")

# Testing on new data
X_test = torch.tensor([[4.0], [7.0]])
with torch.no_grad():
    predictions = model(X_test)
    print(f"Predictions for {X_test.tolist()}: {predictions.tolist()}")

Learned weight: 1.9207, Learned bias: 3.2333
Predictions for [[4.0], [7.0]]: [[10.91616153717041], [16.678312301635742]]
