In [32]:
import pandas as pd

# Încarcă setul de date CSV într-un DataFrame
df = pd.read_csv("../data/final_dataset_1.csv")

In [33]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Separă caracteristicile de variabila țintă (dacă există)
# Excludem coloana 'price' care este variabila țintă
X = df.drop(columns=['price'])
y = df['price']  # Definim variabila țintă

# Standardizăm datele
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Aplicăm PCA
pca = PCA(n_components=13)  # Specificăm să reducem la 13 componente
X_pca = pca.fit_transform(X_scaled)

# Verificăm câte componente ne-au rămas
print("Numărul de componente după PCA:", X_pca.shape[1])

Numărul de componente după PCA: 13


In [34]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

------


In [35]:
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split

# Convertim numpy arrays în tensori PyTorch
X_tensor = torch.tensor(X_pca, dtype=torch.float32)
# Asigură-te că 'y' este un array
y_tensor = torch.tensor(y.values, dtype=torch.float32)

# Creăm TensorDataset
dataset = TensorDataset(X_tensor, y_tensor)

In [36]:
# Setăm dimensiunile pentru antrenament și validare
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Împărțim dataset-ul
train_dataset, val_dataset = random_split(
    dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))

In [37]:
# Specificăm dimensiunea lotului
batch_size = 64

# Creăm DataLoader pentru antrenament și validare
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [38]:
for inputs, targets in train_loader:
    # Ar trebui să arate forma (batch_size, 13) și (batch_size,)
    print(inputs.shape, targets.shape)
    break

torch.Size([64, 13]) torch.Size([64])


In [39]:
import torch
import torch.nn as nn
import torch.optim as optim

# Definirea modelului DNN


class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        # Inițializarea layerelor
        # Primul layer primeste 13 caracteristici (după PCA)
        self.fc1 = nn.Linear(13, 20)
        # self.fc2 = nn.Linear(30, 30)  # Al doilea layer
        # self.fc3 = nn.Linear(128, 1)  # Al treilea layer
        # self.fc4 = nn.Linear(30, 30)  # Al patrulea layer
        # Layerul de output cu o singură valoare (predictie)
        self.output = nn.Linear(20, 1)

    def forward(self, x):
        # Aplică ReLU pe fiecare layer, exceptând layerul de output
        x = torch.relu(self.fc1(x))
        # x = torch.relu(self.fc2(x))
        # x = torch.relu(self.fc3(x))
        # x = torch.relu(self.fc4(x))
        x = self.output(x)  # Nicio funcție de activare (liniară)
        return x

In [40]:
# Creează o instanță a modelului
net = DNN().to(device)

# Specifică optimizatorul și funcția de pierdere
optimizer = optim.Adam(net.parameters(), lr=0.001)  # Learning rate de 0.001
loss_function = nn.MSELoss()

In [41]:
def train_model(model, train_loader, val_loader, optimizer, loss_function, epochs):
    model.to(device)  # Asigură-te că modelul este pe dispozitivul corect

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(
                device)  # Transferă datele pe GPU

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets.view(-1, 1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(
            f"Epoch {epoch+1}/{epochs}, Training Loss: {total_loss/len(train_loader):.4f}")

        # Evaluare pe setul de validare
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(
                    device)  # Transferă datele pe GPU
                outputs = model(inputs)
                val_loss += loss_function(outputs, targets.view(-1, 1)).item()

        print(f"Validation Loss: {val_loss/len(val_loader):.4f}")



# Exemplu de antrenare a modelului
train_model(net, train_loader, val_loader, optimizer, loss_function, epochs=100)

Epoch 1/100, Training Loss: 359225456895.0339
Validation Loss: 347081937690.7463
Epoch 2/100, Training Loss: 359858191147.4717
Validation Loss: 347060828007.1642
Epoch 3/100, Training Loss: 360132294725.5547
Validation Loss: 347022187199.0448
Epoch 4/100, Training Loss: 359306092389.4340
Validation Loss: 346967294930.1492
Epoch 5/100, Training Loss: 358867613301.8566
Validation Loss: 346898483597.3731
Epoch 6/100, Training Loss: 363053157457.1472
Validation Loss: 346816646250.9850
Epoch 7/100, Training Loss: 360319879964.0151
Validation Loss: 346722689390.8060
Epoch 8/100, Training Loss: 359213797113.2377
Validation Loss: 346617973133.3731
Epoch 9/100, Training Loss: 360138125327.4566
Validation Loss: 346502497600.9552
Epoch 10/100, Training Loss: 359790730985.7811
Validation Loss: 346378275580.1791
Epoch 11/100, Training Loss: 359402005500.1359
Validation Loss: 346243923478.9254
Epoch 12/100, Training Loss: 359227215732.8906
Validation Loss: 346100282505.5522
Epoch 13/100, Training Lo

In [42]:
def regression_accuracy(model, data_loader, tolerance=0.1):
    total_samples = 0
    accurate_predictions = 0
    model.to(device)  # Transferă modelul pe dispozitivul corect
    model.eval()

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # Transferă datele pe GPU
            outputs = model(inputs)
            # Calculăm acuratețea în funcție de toleranță
            accurate_predictions += ((outputs.squeeze() - targets).abs() / targets <= tolerance).sum().item()
            total_samples += targets.size(0)

    accuracy = (accurate_predictions / total_samples) * 100
    return accuracy



# Calculul acurateței
train_accuracy = regression_accuracy(net, train_loader, tolerance=0.1)
val_accuracy = regression_accuracy(net, val_loader, tolerance=0.1)
print(f"Train Accuracy (±10%): {train_accuracy:.2f}%")
print(f"Validation Accuracy (±10%): {val_accuracy:.2f}%")

Train Accuracy (±10%): 0.00%
Validation Accuracy (±10%): 0.00%


----


In [43]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Verificăm dacă există disponibilitate CUDA (GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Dispozitiv utilizat:", device)

# Definim modelul rețelei neurale


class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(13, 30)
        self.fc2 = nn.Linear(30, 30)
        self.fc3 = nn.Linear(30, 30)
        self.fc4 = nn.Linear(30, 30)
        self.fc5 = nn.Linear(30, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        # Funcția de activare pentru stratul de ieșire este 'linear'
        x = self.fc5(x)
        return x


model = NeuralNetwork().to(device)  # Mutăm modelul pe dispozitivul CUDA (GPU)

# Definim funcția de pierdere și optimizatorul
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

# Convertim datele în tensori PyTorch și îi mutăm pe dispozitivul CUDA (GPU)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
# Ne asigurăm că are dimensiunea corectă
y_train_tensor = torch.tensor(
    y_train.values, dtype=torch.float32).view(-1, 1).to(device)

# Creăm un TensorDataset și un DataLoader pentru setul de antrenare
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Antrenăm modelul
num_epochs = 50
for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Convertim datele de testare în tensori PyTorch și îi mutăm pe dispozitivul CUDA (GPU)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
# Ne asigurăm că are dimensiunea corectă
y_test_tensor = torch.tensor(
    y_test.values, dtype=torch.float32).view(-1, 1).to(device)

# Evaluăm performanța modelului pe setul de testare
with torch.no_grad():
    y_pred = model(X_test_tensor)
    test_loss = criterion(y_pred, y_test_tensor)
print("Loss pe setul de testare:", test_loss.item())

Dispozitiv utilizat: cuda


NameError: name 'X_train' is not defined

In [None]:
# Calculăm coeficientul de determinare (R^2)
def r_squared(y_true, y_pred):
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - torch.mean(y_true)) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2.item()


# Calculăm coeficientul de determinare pentru setul de testare
r2 = r_squared(y_test_tensor, y_pred)
print("Coeficientul de determinare (R^2) pe setul de testare:", r2)

---

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.model_selection import train_test_split

# Încarcă setul de date CSV într-un DataFrame
df = pd.read_csv("../data/final_dataset_1.csv")


# Separă caracteristicile de variabila țintă (dacă există)
# Excludem coloana 'price' care este variabila țintă
X = df.drop(columns=['price'])
y = df['price']  # Definim variabila țintă

# Standardizăm datele
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Aplicăm PCA
pca = PCA(n_components=13)  # Specificăm să reducem la 13 componente
X_pca = pca.fit_transform(X_scaled)

# Verificăm câte componente ne-au rămas
print("Numărul de componente după PCA:", X_pca.shape[1])

In [None]:

X_train, X_test = train_test_split(X_pca, test_size=0.3, random_state=42)

print("Dimensiunea setului de date de antrenament:", X_train.shape)
print("Dimensiunea setului de date de testare:", X_test.shape)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms

In [None]:
net = nn.Sequential(nn.Flatten(),
                    nn.Linear(13,30),
                    nn.ReLU(),
                    nn.Linear(30,30),
                    nn.ReLU(),
                    nn.Linear(30,1))


In [None]:
batch_size = 32

trans = [transforms.ToTensor()]
trans = transforms.Compose(trans)

train, val = torch.utils.data.random_split(X_train, [len(X_train) - len(X_train) // 10, len(X_train) // 10], generator=torch.Generator().manual_seed(42))

train_iter = DataLoader(train, batch_size, shuffle=True)
val_iter = DataLoader(val, batch_size, shuffle=False)
test_iter = DataLoader(X_test, batch_size, shuffle=False)

# show the length of train_iter
print(len(train))
len(X_train),len(X_test)

In [None]:
def evaluate_accuracy(net, data_iter):
    """Compute the accuracy for a model on a dataset."""
    net.eval()  # Set the model to evaluation mode

    total_loss = 0
    total_hits = 0
    total_samples = 0
    with torch.no_grad():
        for X, y in data_iter:
            y_hat = net(X)
            l = loss(y_hat, y)
            total_loss += float(l)
            total_hits += sum(net(X).argmax(axis=1).type(y.dtype) == y)
            total_samples += y.numel()
    return float(total_loss) / len(data_iter), float(total_hits) / total_samples * 100

def train_epoch(net, train_iter, loss, optimizer):
    # Set the model to training mode
    net.train()
    # Sum of training loss, sum of training correct predictions, no. of examples
    total_loss = 0
    total_hits = 0
    total_samples = 0
    for X, y in train_iter:
        # Compute gradients and update parameters
        y_hat = net(X)
        l = loss(y_hat, y)
        # Using PyTorch built-in optimizer & loss criterion
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        total_loss += float(l)
        total_hits += sum(y_hat.argmax(axis=1).type(y.dtype) == y)
        total_samples += y.numel()
    # Return training loss and training accuracy
    return float(total_loss) / len(train_iter), float(total_hits) / total_samples * 100


def train(net, train_iter, val_iter, loss, num_epochs, optimizer):
    """Train a model."""
    train_loss_all = []
    train_acc_all = []
    val_loss_all = []
    val_acc_all = []
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(net, train_iter, loss, optimizer)
        train_loss_all.append(train_loss)
        train_acc_all.append(train_acc)
        val_loss, val_acc = evaluate_accuracy(net, val_iter)
        val_loss_all.append(val_loss)
        val_acc_all.append(val_acc)
        print(f'Epoch {epoch + 1}, Train loss {train_loss:.2f}, Train accuracy {train_acc:.2f}, Validation loss {val_loss:.2f}, Validation accuracy {val_acc:.2f}')

    return train_loss_all, train_acc_all, val_loss_all, val_acc_all

In [None]:
for data in X_train:
    print(data)
    break

In [None]:
loss = nn.MSELoss()
num_epochs = 50
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_loss_all, train_acc_all, val_loss_all, val_acc_all = train(
    net, train_iter, val_iter, loss, num_epochs, optimizer)