## NN time

### For this dataset we are going with a DNN


In [9]:
import torch
print(torch.cuda.is_available())

devices = [d for d in range(torch.cuda.device_count())]
device_names = [torch.cuda.get_device_name(d) for d in devices]
print(device_names)



True
['NVIDIA GeForce RTX 4070']


### Dataset prep

In [10]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

data = pd.read_csv('../data/final_dataset_1.csv')

X = data.drop('price', axis=1).values
y = data['price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

Defining the model and its losing and optimizing functions

In [11]:
import torch.nn as nn

class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.layer1 = nn.Linear(X_train.shape[1], 128)
        self.dropout = nn.Dropout(0.5)  # Dropout layer
        self.layer2 = nn.Linear(128, 256)
        self.output_layer = nn.Linear(256, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)  # Applying dropout
        x = self.relu(self.layer2(x))
        x = self.output_layer(x)
        return x


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = DNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


Train

In [12]:
num_epochs = 100
model.train()  
for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/100], Loss: 173298188288.0000
Epoch [2/100], Loss: 330616373248.0000
Epoch [3/100], Loss: 109847543808.0000
Epoch [4/100], Loss: 18622097408.0000
Epoch [5/100], Loss: 17073433600.0000
Epoch [6/100], Loss: 15711789056.0000
Epoch [7/100], Loss: 75779039232.0000
Epoch [8/100], Loss: 11137985536.0000
Epoch [9/100], Loss: 32038959104.0000
Epoch [10/100], Loss: 89955262464.0000
Epoch [11/100], Loss: 101630795776.0000
Epoch [12/100], Loss: 5411389440.0000
Epoch [13/100], Loss: 15290548224.0000
Epoch [14/100], Loss: 35948462080.0000
Epoch [15/100], Loss: 16064856064.0000
Epoch [16/100], Loss: 8164881920.0000
Epoch [17/100], Loss: 21355747328.0000
Epoch [18/100], Loss: 14851836928.0000
Epoch [19/100], Loss: 44966719488.0000
Epoch [20/100], Loss: 9203366912.0000
Epoch [21/100], Loss: 58826772480.0000
Epoch [22/100], Loss: 20981432320.0000
Epoch [23/100], Loss: 64444858368.0000
Epoch [24/100], Loss: 5206033408.0000
Epoch [25/100], Loss: 146199838720.0000
Epoch [26/100], Loss: 25174421504

Test

In [13]:
from sklearn.metrics import r2_score
# Set the model to evaluation mode
model.eval()

# Disable gradient calculation for evaluation
with torch.no_grad():
    inputs, targets = X_test.to(device), y_test.to(device)
    predictions = model(inputs)

    # Calculating different metrics
    mse_loss = criterion(predictions, targets)  # MSE
    rmse_loss = torch.sqrt(mse_loss)            # RMSE
    mae_loss = torch.mean(torch.abs(predictions - targets))  # MAE
    r2 = r2_score(targets.cpu().numpy(), predictions.cpu().numpy())  # R2 score

    # Printing the metrics
    print(f'Test MSE Loss: {mse_loss.item():.4f}')
    print(f'Test RMSE Loss: {rmse_loss.item():.4f}')
    print(f'Test MAE Loss: {mae_loss.item():.4f}')
    print(f'R-squared Score: {r2:.4f}')


Test MSE Loss: 31791507456.0000
Test RMSE Loss: 178301.7344
Test MAE Loss: 114020.2109
R-squared Score: 0.6714


In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

df = pd.read_csv('../data/final_dataset_1.csv')

# Assuming df is your DataFrame and 'price' is the target variable
X = df.drop('price', axis=1)
y = df['price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Initialize the model
clf = GradientBoostingRegressor(n_estimators = 700, max_depth = 7, min_samples_split = 3, learning_rate = 0.1)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
Y_pred = clf.predict(X_test)


In [42]:
# Add new metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score

mse = mean_squared_error(y_test, Y_pred)
mae = mean_absolute_error(y_test, Y_pred)
r2 = r2_score(y_test, Y_pred)
#accuracy = accuracy_score(y_test, Y_pred)

print(f'Mean Squared Error: {mse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R-squared: {r2:.4f}')
#print("Accuracy: %.2f%%" % (accuracy * 100.0))


Mean Squared Error: 30194992032.6178
Mean Absolute Error: 103340.4322
R-squared: 0.7019


In [37]:
from xgboost import XGBRegressor

# Create an XGBoost regressor
model = XGBRegressor(objective='reg:squarederror', random_state=2)

# Train the model
model.fit(X_train, y_train)

Y_pred = model.predict(X_test)
