## NN time

### For this dataset we are going with a DNN


In [1]:
import torch
print(torch.cuda.is_available())

devices = [d for d in range(torch.cuda.device_count())]
device_names = [torch.cuda.get_device_name(d) for d in devices]
print(device_names)



False
[]


### Dataset prep

In [15]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

data = pd.read_csv('../data/final_dataset_1.csv')

X = data.drop('price', axis=1).values
y = data['price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

Defining the model and its losing and optimizing functions

In [11]:
#do not use this model, use the next cell

import torch.nn as nn

class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.layer1 = nn.Linear(X_train.shape[1], 128)
        self.dropout = nn.Dropout(0.5)  # Dropout layer
        self.layer2 = nn.Linear(128, 256)
        self.output_layer = nn.Linear(256, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)  # Applying dropout
        x = self.relu(self.layer2(x))
        x = self.output_layer(x)
        return x


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = DNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [16]:
import torch
import torch.nn as nn
import torch.optim as optim

# Definirea modelului DNN


class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        # Inițializarea layerelor
        # Primul layer primeste 13 caracteristici (după PCA)
        self.fc1 = nn.Linear(16, 30)
        self.fc2 = nn.Linear(30, 30)  # Al doilea layer
        self.fc3 = nn.Linear(30, 30)  # Al treilea layer
        self.fc4 = nn.Linear(30, 20)  # Al patrulea layer
        # Layerul de output cu o singură valoare (predictie)
        self.output = nn.Linear(20, 1)

    def forward(self, x):
        # Aplică ReLU pe fiecare layer, exceptând layerul de output
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.output(x)  # Nicio funcție de activare (liniară)
        return x
    

model = DNN()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Train

In [17]:
num_epochs = 300
model.train()  
for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        inputs, targets = inputs, targets

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/300], Loss: 248009277440.0000
Epoch [2/300], Loss: 44898463744.0000
Epoch [3/300], Loss: 54407749632.0000
Epoch [4/300], Loss: 23964289024.0000
Epoch [5/300], Loss: 147291291648.0000
Epoch [6/300], Loss: 17637040128.0000
Epoch [7/300], Loss: 396567347200.0000
Epoch [8/300], Loss: 79004721152.0000
Epoch [9/300], Loss: 14989769728.0000
Epoch [10/300], Loss: 64486006784.0000
Epoch [11/300], Loss: 33806104576.0000
Epoch [12/300], Loss: 11338874880.0000
Epoch [13/300], Loss: 23598143488.0000
Epoch [14/300], Loss: 22119321600.0000
Epoch [15/300], Loss: 53801377792.0000
Epoch [16/300], Loss: 35688972288.0000
Epoch [17/300], Loss: 32804685824.0000
Epoch [18/300], Loss: 29121413120.0000
Epoch [19/300], Loss: 18499639296.0000
Epoch [20/300], Loss: 29887412224.0000
Epoch [21/300], Loss: 24052111360.0000
Epoch [22/300], Loss: 65485209600.0000
Epoch [23/300], Loss: 24495118336.0000
Epoch [24/300], Loss: 10590191616.0000
Epoch [25/300], Loss: 49799364608.0000
Epoch [26/300], Loss: 807142195

Test

In [18]:
from sklearn.metrics import r2_score
# Set the model to evaluation mode
model.eval()

# Disable gradient calculation for evaluation
with torch.no_grad():
    inputs, targets = X_test, y_test
    predictions = model(inputs)

    # Calculating different metrics
    mse_loss = criterion(predictions, targets)  # MSE
    rmse_loss = torch.sqrt(mse_loss)            # RMSE
    mae_loss = torch.mean(torch.abs(predictions - targets))  # MAE
    r2 = r2_score(targets.cpu().numpy(), predictions.cpu().numpy())  # R2 score

    # Printing the metrics
    print(f'Test MSE Loss: {mse_loss.item():.4f}')
    print(f'Test RMSE Loss: {rmse_loss.item():.4f}')
    print(f'Test MAE Loss: {mae_loss.item():.4f}')
    print(f'R-squared Score: {r2:.4f}')


Test MSE Loss: 29288804352.0000
Test RMSE Loss: 171139.7188
Test MAE Loss: 110256.2031
R-squared Score: 0.7416


In [19]:
def regression_accuracy(model, data_loader, tolerance=0.1):
    total_samples = 0
    accurate_predictions = 0
    model.eval()

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs, targets
            outputs = model(inputs)
            # Calculăm acuratețea în funcție de toleranță
            accurate_predictions += ((outputs.squeeze() -
                                     targets).abs() / targets <= tolerance).sum().item()
            total_samples += targets.size(0)

    accuracy = (accurate_predictions / total_samples) * 100
    return accuracy


# Calculul acurateței
train_accuracy = regression_accuracy(model, train_loader, tolerance=0.1)
val_accuracy = regression_accuracy(model, train_loader, tolerance=0.1)
print(f"Train Accuracy (±10%): {train_accuracy:.2f}%")
print(f"Validation Accuracy (±10%): {val_accuracy:.2f}%")

Train Accuracy (±10%): 789.29%
Validation Accuracy (±10%): 786.94%


In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

df = pd.read_csv('../data/final_dataset_1.csv')

# Assuming df is your DataFrame and 'price' is the target variable
X = df.drop('price', axis=1)
y = df['price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Initialize the model
clf = GradientBoostingRegressor(n_estimators = 700, max_depth = 7, min_samples_split = 3, learning_rate = 0.1)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
Y_pred = clf.predict(X_test)


In [42]:
# Add new metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score

mse = mean_squared_error(y_test, Y_pred)
mae = mean_absolute_error(y_test, Y_pred)
r2 = r2_score(y_test, Y_pred)
#accuracy = accuracy_score(y_test, Y_pred)

print(f'Mean Squared Error: {mse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R-squared: {r2:.4f}')
#print("Accuracy: %.2f%%" % (accuracy * 100.0))


Mean Squared Error: 30194992032.6178
Mean Absolute Error: 103340.4322
R-squared: 0.7019


In [37]:
from xgboost import XGBRegressor

# Create an XGBoost regressor
model = XGBRegressor(objective='reg:squarederror', random_state=2)

# Train the model
model.fit(X_train, y_train)

Y_pred = model.predict(X_test)
