# LInear Regression

In [39]:
import pandas as pd

readData = pd.read_csv("./0_datasets/Realestate.csv")
# print(readData)
readData.drop(columns=["No"], inplace=True)
readData.info()
# readData

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 7 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   X1 transaction date                     414 non-null    float64
 1   X2 house age                            414 non-null    float64
 2   X3 distance to the nearest MRT station  414 non-null    float64
 3   X4 number of convenience stores         414 non-null    int64  
 4   X5 latitude                             414 non-null    float64
 5   X6 longitude                            414 non-null    float64
 6   Y house price of unit area              414 non-null    float64
dtypes: float64(6), int64(1)
memory usage: 22.8 KB


In [40]:
inputs = readData.iloc[:, :-1].to_numpy()
inputs.shape

(414, 6)

In [41]:
targets =  readData.iloc[:, -1].to_numpy()
targets.shape

(414,)

In [42]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

inputs = scaler.fit_transform(inputs)

In [43]:
import torch

inputs = torch.tensor(inputs, dtype=torch.float32)
targets = torch.tensor(targets, dtype=torch.float32)

inputs.shape, targets.shape

(torch.Size([414, 6]), torch.Size([414]))

In [44]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split


# Create Dataset and DataLoader
dataset = TensorDataset(inputs, targets)

# Split dataset into train (80%) and test (20%)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_ds, test_ds = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)  # Mini-batch training
test_loader = DataLoader(test_ds, batch_size=16)

len(train_loader), len(test_loader)

(21, 6)

In [45]:
for x,label in train_loader:
    print(x.shape)
    print(label.shape)
    break

torch.Size([16, 6])
torch.Size([16])


In [46]:
# ================== 2. Define Model ==================
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Sequential(
            nn.Linear(6,1024),
            nn.Tanh(),
            nn.Linear(1024,512),
            nn.Tanh(),
            nn.Linear(512,256),
            nn.Tanh(),
            nn.Linear(256,128),
            nn.Tanh(),
            nn.Linear(128,64),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,16),
            nn.ReLU(),
            nn.Linear(16,1),
            nn.ReLU()
        )

    def forward(self, x):
        return self.linear(x)
    
torch.manual_seed(4)

model = LinearRegressionModel()


In [47]:
dummy = torch.randn((10,6))
# model(dummy)

In [48]:
from torchinfo import summary

summary(model, input_size=(10,6))

Layer (type:depth-idx)                   Output Shape              Param #
LinearRegressionModel                    [10, 1]                   --
├─Sequential: 1-1                        [10, 1]                   --
│    └─Linear: 2-1                       [10, 1024]                7,168
│    └─Tanh: 2-2                         [10, 1024]                --
│    └─Linear: 2-3                       [10, 512]                 524,800
│    └─Tanh: 2-4                         [10, 512]                 --
│    └─Linear: 2-5                       [10, 256]                 131,328
│    └─Tanh: 2-6                         [10, 256]                 --
│    └─Linear: 2-7                       [10, 128]                 32,896
│    └─Tanh: 2-8                         [10, 128]                 --
│    └─Linear: 2-9                       [10, 64]                  8,256
│    └─ReLU: 2-10                        [10, 64]                  --
│    └─Linear: 2-11                      [10, 32]                

In [49]:
# ================== 3. Define Loss and Optimizer ==================
loss_fn = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Small learning rate for stability

In [50]:
# ================== 4. Training Loop ==================
from tqdm.auto import tqdm 
epochs = 100  # Number of training iterations

for epoch in tqdm(range(epochs)):
    model.train()  # Ensure model is in training mode
    epoch_loss = 0  # Track total loss

    for batch_inputs, batch_targets in train_loader:
        optimizer.zero_grad()  # Reset gradients
        predictions = model(batch_inputs)  # Forward pass
        loss = loss_fn(predictions, batch_targets.unsqueeze(1))
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters

        epoch_loss += loss  # Accumulate loss
    # Print average loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        avg_loss = epoch_loss / len(train_loader)  # Compute average loss
        print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.4f}")


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch 10/100, Avg Loss: 183.6805
Epoch 20/100, Avg Loss: 183.6093
Epoch 30/100, Avg Loss: 100.0897
Epoch 40/100, Avg Loss: 63.4814
Epoch 50/100, Avg Loss: 49.8473
Epoch 60/100, Avg Loss: 39.0966
Epoch 70/100, Avg Loss: 32.2105
Epoch 80/100, Avg Loss: 34.0986
Epoch 90/100, Avg Loss: 20.8174
Epoch 100/100, Avg Loss: 16.7403


In [51]:

model.eval()  # Set model to evaluation mode
test_loss = 0.0

with torch.no_grad():  # No need to compute gradients for testing
    for test_inputs, test_targets in test_loader:
        test_predictions = model(test_inputs)
        test_loss += loss_fn(test_predictions, test_targets.unsqueeze(1)).item()

print(f"\nTest Loss: {test_loss / len(test_loader):.4f}")




Test Loss: 46.8682
