#Creating Dta set


In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

np.random.seed(42)

num_samples = 1000

# More realistic features
square_footage = np.random.normal(2000, 500, num_samples)
num_bedrooms = np.random.randint(1, 6, num_samples)
distance_to_city = np.random.normal(10, 4, num_samples)

# Realistic target: house price
# Price = 100*sqft + 20000*bedrooms - 5000*distance + noise
price = (100 * square_footage + 
         20000 * num_bedrooms - 
         5000 * distance_to_city + 
         np.random.normal(0, 25000, num_samples))

df = pd.DataFrame({
    'square_footage': square_footage,
    'num_bedrooms': num_bedrooms,
    'distance_to_city': distance_to_city,
    'house_price': price
})

df.to_csv('data.csv', index=False)
print("Housing dataset created!")
print(df.head())

Housing dataset created!
   square_footage  num_bedrooms  distance_to_city    house_price
0     2248.357077             4          9.491474  274017.935657
1     1930.867849             1          6.491259  198063.314927
2     2323.844269             3          7.890856  236893.676654
3     2761.514928             5          7.950681  347773.437843
4     1882.923313             3          8.969839  198708.257146


#Import Library and setup

In [2]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [3]:
df = pd.read_csv('data.csv')
x = df.iloc[:, :3].values  # First 3 columns as input features
y = df.iloc[:, 3].values.reshape(-1, 1)  # 4th column as target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

#Convert datas to tensors

In [5]:
# Convert numpy arrays to PyTorch tensors
x_train = torch.tensor(X_train, dtype=torch.float32).to(device)
x_test  = torch.tensor(X_test,  dtype=torch.float32).to(device)

y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test  = torch.tensor(y_test,  dtype=torch.float32).to(device)

#Dataset Dataloader

In [6]:
class CustomDataset(Dataset):
    """
    Custom Dataset for regression.
    Inputs and targets are already tensors.
    """

    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [7]:
training_data = CustomDataset(x_train, y_train)
testing_data = CustomDataset(x_test, y_test)

In [8]:
train_loader = DataLoader(training_data, batch_size = 32 , shuffle=True , pin_memory= True)

test_loader = DataLoader(testing_data, batch_size = 32 , shuffle=False, pin_memory= True)

#Define Model

In [9]:
class MyNN(nn.Module):
    """
    Fully connected neural network for regression
    """

    def __init__(self, input_dim):
        super().__init__()

        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(64, 1)  # Single output ‚Üí regression
        )

    def forward(self, x):
        return self.model(x)


#Training Loop

In [None]:
# Correct input dimension (number of features)
#input_dim = x_train.shape[1]

# Initialize model
model = MyNN(x_train.shape[1])

# Regression loss
criterion = nn.MSELoss()

# Optimizer (Adam is best for regression)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training settings
epochs = 100


In [11]:
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch_features, batch_labels in train_loader:
        batch_features = batch_features.to(device)
        batch_labels = batch_labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(batch_features)

        # Loss computation
        loss = criterion(outputs, batch_labels)

        # Backpropagation
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}] | Train MSE: {avg_loss:.6f}")




Epoch [1/100] | Train MSE: 0.500086
Epoch [2/100] | Train MSE: 0.193165
Epoch [3/100] | Train MSE: 0.181105
Epoch [4/100] | Train MSE: 0.180940
Epoch [5/100] | Train MSE: 0.169154
Epoch [6/100] | Train MSE: 0.172827
Epoch [7/100] | Train MSE: 0.166920
Epoch [8/100] | Train MSE: 0.156167
Epoch [9/100] | Train MSE: 0.156578
Epoch [10/100] | Train MSE: 0.157328
Epoch [11/100] | Train MSE: 0.162078
Epoch [12/100] | Train MSE: 0.155469
Epoch [13/100] | Train MSE: 0.151118
Epoch [14/100] | Train MSE: 0.149792
Epoch [15/100] | Train MSE: 0.163074
Epoch [16/100] | Train MSE: 0.150848
Epoch [17/100] | Train MSE: 0.150735
Epoch [18/100] | Train MSE: 0.164481
Epoch [19/100] | Train MSE: 0.155820
Epoch [20/100] | Train MSE: 0.152044
Epoch [21/100] | Train MSE: 0.151851
Epoch [22/100] | Train MSE: 0.155951
Epoch [23/100] | Train MSE: 0.154433
Epoch [24/100] | Train MSE: 0.154482
Epoch [25/100] | Train MSE: 0.154917
Epoch [26/100] | Train MSE: 0.143555
Epoch [27/100] | Train MSE: 0.148001
Epoch [28/

#Evaluation

In [12]:
model.eval()
with torch.no_grad():
    test_predictions = model(x_test)
    test_mse = criterion(test_predictions, y_test)
    test_rmse = torch.sqrt(test_mse)

print("\nTest Results:")
print(f"MSE  : {test_mse.item():.6f}")
print(f"RMSE : {test_rmse.item():.6f}")



Test Results:
MSE  : 0.153577
RMSE : 0.391890


In [14]:
from sklearn.metrics import r2_score

# Put model in evaluation mode
model.eval()

with torch.no_grad():
    # Get predictions (scaled)
    y_pred_scaled = model(x_test)

# Move tensors to CPU and convert to NumPy
y_pred_scaled = y_pred_scaled.cpu().numpy()
y_test_scaled = y_test.cpu().numpy()

# üîÅ Inverse transform to original scale
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test_scaled)

# üìä Compute R¬≤ score
r2 = r2_score(y_true, y_pred)

print(f"R¬≤ Score (test set): {r2:.4f}")


R¬≤ Score (test set): 0.8262
