**The torch.utils.data.DataLoader** is an essential tool in PyTorch used for efficient loading of datasets, especially when training machine learning models. It simplifies the process of:

Handling large datasets by breaking them into manageable mini-batches.
Shuffling data for randomness during training.
Parallelizing data loading using multiple worker threads.

In [6]:
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


torch.manual_seed(42)
X = torch.rand(100, 1) * 10  
y = 2 * X + 3 + torch.randn(100, 1) 

data = torch.cat((X, y), dim=1)
df = pd.DataFrame(data.numpy(), columns=['X', 'y'])
df.to_csv('data.csv', index=False)

In [7]:
print(df)

           X          y
0   8.822693  21.286140
1   9.150040  21.883327
2   3.828638  11.724202
3   9.593057  21.735960
4   3.904482  10.130211
..       ...        ...
95  2.330659   6.131865
96  9.578310  22.561489
97  3.312838  10.257551
98  3.227418   9.767368
99  0.162027   3.290552

[100 rows x 2 columns]


In [13]:
class LinearRegressionDataset(Dataset):
    def __init__(self,data):
        super().__init__()
        self.data = pd.read_csv(data)
        self.X = torch.tensor(self.data['X'].values, dtype=torch.float32).view(-1, 1)
        self.y = torch.tensor(self.data['y'].values, dtype=torch.float32).view(-1, 1)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        return self.X[index],self.y[index]

dataset = LinearRegressionDataset('data.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [14]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(1, 1) 

    def forward(self, x):
        return self.linear(x)

model = LinearRegressionModel()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

epochs = 1000
for epoch in range(epochs):
    for batch_X, batch_y in dataloader:
        predictions = model(batch_X)
        loss = criterion(predictions, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [100/1000], Loss: 1.5655
Epoch [200/1000], Loss: 0.4624
Epoch [300/1000], Loss: 1.4614
Epoch [400/1000], Loss: 0.3983
Epoch [500/1000], Loss: 0.5415
Epoch [600/1000], Loss: 1.6767
Epoch [700/1000], Loss: 1.0075
Epoch [800/1000], Loss: 0.3245
Epoch [900/1000], Loss: 0.6541
Epoch [1000/1000], Loss: 1.6099


In [15]:
[w, b] = model.linear.parameters()
print(f"Learned weight: {w.item():.4f}, Learned bias: {b.item():.4f}")

X_test = torch.tensor([[4.0], [7.0]])
with torch.no_grad():
    predictions = model(X_test)
    print(f"Predictions for {X_test.tolist()}: {predictions.tolist()}")

Learned weight: 1.9207, Learned bias: 3.2333
Predictions for [[4.0], [7.0]]: [[10.91616153717041], [16.678312301635742]]
