## Auto_MPG_data

In [15]:
# 1.Import các thư viện cần thiết
import pandas as pd # type: ignore
import numpy as np # type: ignore
import matplotlib.pyplot as plt # type: ignore
import torch # type: ignore
import torch.nn as nn # type: ignore
from torch.utils.data import Dataset, DataLoader # type: ignore
import torch.nn.functional as F # type: ignore

from sklearn.model_selection import train_test_split # type: ignore
from sklearn.preprocessing import StandardScaler # type: ignore

In [None]:
# 2.Tải bộ dữ liệu
# !gdown --id 1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu

In [16]:
# 3.Cài đặt giá trị ngẫu nhiên cố định
random_state = 59
np.random.seed(random_state)
torch.manual_seed(random_state)
if torch.cuda.is_available():
    torch.cuda.manual_seed(random_state)


# 4.Cài đặt thiết bị tính toán
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# 5.Đọc dữ liệu
dataset_path = 'Auto_MPG_data.csv'
dataset = pd.read_csv(dataset_path, on_bad_lines='skip', sep=',')


# 6.Tiền xử lý bộ dữ liệu

# Tách đặc trưng X và nhãn y
X = dataset.drop(columns='MPG').values
y = dataset['MPG'].values

# Chia bộ dữ liệu train/val/test
val_size = 0.2
test_size = 0.125
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size, random_state=random_state, shuffle=is_shuffle)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=test_size, random_state=random_state, shuffle=is_shuffle)

# Chuẩn hóa đặc trưng đầu vào
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_val = normalizer.transform(X_val)
X_test = normalizer.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# 7. Xây dựng DataLoader
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# khai báo luôn tham số batch size
batch_size = 32
train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)
train_loader = DataLoader(
                        train_dataset,
                        batch_size=batch_size,
                        shuffle=True)
val_loader = DataLoader(
                        val_dataset,
                        batch_size=batch_size,
                        shuffle=False)


# 8.Xây dựng mạng MLP
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.relu(x)
        out = self.output(x)
        return out.squeeze(1)

# khai báo một đối tượng của class
input_dims = X_train.shape[1]
output_dims = 1
hidden_dims = 64
model = MLP(
            input_dims = input_dims,
            hidden_dims = hidden_dims,
            output_dims = output_dims).to(device)


# 9.Khai báo hàm loss và optimizer
lr = 1e-2
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)


# 10.Xây dựng hàm tính điểm R2
def r_squared(y_true, y_pred):
    y_true = torch.Tensor(y_true).to(device)
    y_pred = torch.Tensor(y_pred).to(device)
    mean_true = torch.mean(y_true)
    ss_tot = torch.sum((y_true - mean_true)**2)
    ss_res = torch.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

In [17]:
# 11.Huấn luyện mô hình
epochs = 20
train_losses = []
val_losses = []
train_r2 = []
val_r2 = []

for epoch in range(epochs):
    train_loss = 0.0
    train_target = []
    val_target = []
    train_predict = []
    val_predict = []
    model.train()
    for X_samples, y_samples in train_loader:
        X_samples = X_samples.to(device)
        y_samples = y_samples.to(device)
        optimizer.zero_grad()
        outputs = model(X_samples)
        train_predict += outputs.tolist()
        train_target += y_samples.tolist()
        loss = criterion(outputs, y_samples)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    train_r2.append(r_squared(train_target, train_predict))
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples = X_samples.to(device)
            y_samples = y_samples.to(device)
            outputs = model(X_samples)
            val_predict += outputs.tolist()
            val_target += y_samples.tolist()
            loss = criterion(outputs, y_samples)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    val_r2.append(r_squared(val_target, val_predict))
    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}')


# 12.Đánh giá mô hình
model.eval()
with torch.no_grad():
    y_hat = model(X_test)
    test_set_r2 = r_squared(y_test, y_hat)
    print('Evaluation on test set: ')
    print(f'R2: {test_set_r2}')

Epoch 1/20, Train Loss: 282.769, Val Loss: 88.672
Epoch 2/20, Train Loss: 137.669, Val Loss: 72.346
Epoch 3/20, Train Loss: 71.007, Val Loss: 19.143
Epoch 4/20, Train Loss: 25.083, Val Loss: 196.176
Epoch 5/20, Train Loss: 96.139, Val Loss: 20.444
Epoch 6/20, Train Loss: 17.765, Val Loss: 9.444
Epoch 7/20, Train Loss: 18.486, Val Loss: 14.535
Epoch 8/20, Train Loss: 37.859, Val Loss: 37.427
Epoch 9/20, Train Loss: 17.133, Val Loss: 38.134
Epoch 10/20, Train Loss: 22.991, Val Loss: 41.183
Epoch 11/20, Train Loss: 26.723, Val Loss: 20.063
Epoch 12/20, Train Loss: 9.852, Val Loss: 5.594
Epoch 13/20, Train Loss: 15.143, Val Loss: 16.025
Epoch 14/20, Train Loss: 12.213, Val Loss: 12.023
Epoch 15/20, Train Loss: 14.222, Val Loss: 7.731
Epoch 16/20, Train Loss: 10.845, Val Loss: 18.904
Epoch 17/20, Train Loss: 12.312, Val Loss: 14.885
Epoch 18/20, Train Loss: 15.474, Val Loss: 12.354
Epoch 19/20, Train Loss: 13.783, Val Loss: 5.380
Epoch 20/20, Train Loss: 7.285, Val Loss: 5.100
Evaluation on

## Câu hỏi trắc nghiệm

In [18]:
# Câu 8.1 : Linear Regression (Không sử dụng hàm kích hoạt)

# Điều chỉnh kiến trúc MLP thành Linear Regression (không sử dụng hàm kích hoạt)
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dims, output_dims):
        super().__init__()
        self.linear = nn.Linear(input_dims, output_dims)

    def forward(self, x):
        return self.linear(x).squeeze(1)  # Output không có hàm kích hoạt

# Khai báo mô hình Linear Regression
model_lr = LinearRegressionModel(input_dims=input_dims, output_dims=output_dims).to(device)

# Khai báo hàm loss và optimizer cho Linear Regression
optimizer_lr = torch.optim.SGD(model_lr.parameters(), lr=lr)

# Huấn luyện mô hình Linear Regression
train_losses_lr = []
val_losses_lr = []
train_r2_lr = []
val_r2_lr = []

for epoch in range(epochs):
    train_loss = 0.0
    train_target = []
    val_target = []
    train_predict = []
    val_predict = []
    model_lr.train()
    for X_samples, y_samples in train_loader:
        X_samples = X_samples.to(device)
        y_samples = y_samples.to(device)
        optimizer_lr.zero_grad()
        outputs = model_lr(X_samples)
        train_predict += outputs.tolist()
        train_target += y_samples.tolist()
        loss = criterion(outputs, y_samples)
        loss.backward()
        optimizer_lr.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses_lr.append(train_loss)
    train_r2_lr.append(r_squared(train_target, train_predict))
    model_lr.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples = X_samples.to(device)
            y_samples = y_samples.to(device)
            outputs = model_lr(X_samples)
            val_predict += outputs.tolist()
            val_target += y_samples.tolist()
            loss = criterion(outputs, y_samples)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses_lr.append(val_loss)
    val_r2_lr.append(r_squared(val_target, val_predict))
    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}')

# Đánh giá mô hình Linear Regression trên tập test
model_lr.eval()
with torch.no_grad():
    y_hat_lr = model_lr(X_test)
    test_set_r2_lr = r_squared(y_test, y_hat_lr)
    print(f'Evaluation on test set for Linear Regression: R2: {test_set_r2_lr}')


Epoch 1/20, Train Loss: 519.211, Val Loss: 436.727
Epoch 2/20, Train Loss: 356.768, Val Loss: 310.773
Epoch 3/20, Train Loss: 249.179, Val Loss: 222.140
Epoch 4/20, Train Loss: 177.410, Val Loss: 159.997
Epoch 5/20, Train Loss: 128.735, Val Loss: 116.927
Epoch 6/20, Train Loss: 90.269, Val Loss: 85.884
Epoch 7/20, Train Loss: 68.482, Val Loss: 64.860
Epoch 8/20, Train Loss: 50.737, Val Loss: 49.201
Epoch 9/20, Train Loss: 41.014, Val Loss: 37.912
Epoch 10/20, Train Loss: 32.396, Val Loss: 30.654
Epoch 11/20, Train Loss: 25.552, Val Loss: 24.609
Epoch 12/20, Train Loss: 22.205, Val Loss: 20.082
Epoch 13/20, Train Loss: 18.990, Val Loss: 17.193
Epoch 14/20, Train Loss: 17.377, Val Loss: 15.012
Epoch 15/20, Train Loss: 16.494, Val Loss: 13.892
Epoch 16/20, Train Loss: 14.978, Val Loss: 12.466
Epoch 17/20, Train Loss: 15.892, Val Loss: 12.055
Epoch 18/20, Train Loss: 13.987, Val Loss: 11.341
Epoch 19/20, Train Loss: 13.200, Val Loss: 10.778
Epoch 20/20, Train Loss: 13.355, Val Loss: 10.108

In [19]:
# Câu 8.2 : Sử dụng hàm kích hoạt Sigmoid

# Điều chỉnh hàm kích hoạt thành Sigmoid
class MLP_Sigmoid(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.sigmoid(x)  # Sử dụng hàm kích hoạt Sigmoid
        x = self.linear2(x)
        x = torch.sigmoid(x)  # Sử dụng hàm kích hoạt Sigmoid
        out = self.output(x)
        return out.squeeze(1)

# Khai báo mô hình với Sigmoid
model_sigmoid = MLP_Sigmoid(input_dims=input_dims, hidden_dims=hidden_dims, output_dims=output_dims).to(device)

# Khai báo hàm loss và optimizer cho MLP Sigmoid
optimizer_sigmoid = torch.optim.SGD(model_sigmoid.parameters(), lr=lr)

# Huấn luyện mô hình với hàm kích hoạt Sigmoid
train_losses_sigmoid = []
val_losses_sigmoid = []
train_r2_sigmoid = []
val_r2_sigmoid = []

for epoch in range(epochs):
    train_loss = 0.0
    train_target = []
    val_target = []
    train_predict = []
    val_predict = []
    model_sigmoid.train()
    for X_samples, y_samples in train_loader:
        X_samples = X_samples.to(device)
        y_samples = y_samples.to(device)
        optimizer_sigmoid.zero_grad()
        outputs = model_sigmoid(X_samples)
        train_predict += outputs.tolist()
        train_target += y_samples.tolist()
        loss = criterion(outputs, y_samples)
        loss.backward()
        optimizer_sigmoid.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses_sigmoid.append(train_loss)
    train_r2_sigmoid.append(r_squared(train_target, train_predict))
    model_sigmoid.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples = X_samples.to(device)
            y_samples = y_samples.to(device)
            outputs = model_sigmoid(X_samples)
            val_predict += outputs.tolist()
            val_target += y_samples.tolist()
            loss = criterion(outputs, y_samples)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses_sigmoid.append(val_loss)
    val_r2_sigmoid.append(r_squared(val_target, val_predict))
    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}')

# Đánh giá mô hình Sigmoid trên tập test
model_sigmoid.eval()
with torch.no_grad():
    y_hat_sigmoid = model_sigmoid(X_test)
    test_set_r2_sigmoid = r_squared(y_test, y_hat_sigmoid)
    print(f'Evaluation on test set for Sigmoid: R2: {test_set_r2_sigmoid}')


Epoch 1/20, Train Loss: 153.980, Val Loss: 52.652
Epoch 2/20, Train Loss: 47.394, Val Loss: 37.862
Epoch 3/20, Train Loss: 35.761, Val Loss: 28.605
Epoch 4/20, Train Loss: 25.665, Val Loss: 16.337
Epoch 5/20, Train Loss: 19.496, Val Loss: 10.216
Epoch 6/20, Train Loss: 17.013, Val Loss: 8.232
Epoch 7/20, Train Loss: 14.849, Val Loss: 7.465
Epoch 8/20, Train Loss: 14.112, Val Loss: 9.234
Epoch 9/20, Train Loss: 13.321, Val Loss: 7.253
Epoch 10/20, Train Loss: 12.945, Val Loss: 7.206
Epoch 11/20, Train Loss: 12.007, Val Loss: 8.929
Epoch 12/20, Train Loss: 12.153, Val Loss: 9.120
Epoch 13/20, Train Loss: 12.441, Val Loss: 8.774
Epoch 14/20, Train Loss: 12.169, Val Loss: 12.103
Epoch 15/20, Train Loss: 12.201, Val Loss: 6.851
Epoch 16/20, Train Loss: 11.218, Val Loss: 6.669
Epoch 17/20, Train Loss: 11.491, Val Loss: 6.968
Epoch 18/20, Train Loss: 11.668, Val Loss: 7.663
Epoch 19/20, Train Loss: 10.579, Val Loss: 6.361
Epoch 20/20, Train Loss: 11.354, Val Loss: 6.383
Evaluation on test set

In [20]:
# Câu 8.3 : Sử dụng hàm kích hoạt Tanh

# Điều chỉnh hàm kích hoạt thành Tanh
class MLP_Tanh(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.tanh(x)  # Sử dụng hàm kích hoạt Tanh
        x = self.linear2(x)
        x = torch.tanh(x)  # Sử dụng hàm kích hoạt Tanh
        out = self.output(x)
        return out.squeeze(1)

# Khai báo mô hình với Tanh
model_tanh = MLP_Tanh(input_dims=input_dims, hidden_dims=hidden_dims, output_dims=output_dims).to(device)

# Khai báo hàm loss và optimizer cho MLP Tanh
optimizer_tanh = torch.optim.SGD(model_tanh.parameters(), lr=lr)

# Huấn luyện mô hình với hàm kích hoạt Tanh
train_losses_tanh = []
val_losses_tanh = []
train_r2_tanh = []
val_r2_tanh = []

for epoch in range(epochs):
    train_loss = 0.0
    train_target = []
    val_target = []
    train_predict = []
    val_predict = []
    model_tanh.train()
    for X_samples, y_samples in train_loader:
        X_samples = X_samples.to(device)
        y_samples = y_samples.to(device)
        optimizer_tanh.zero_grad()
        outputs = model_tanh(X_samples)
        train_predict += outputs.tolist()
        train_target += y_samples.tolist()
        loss = criterion(outputs, y_samples)
        loss.backward()
        optimizer_tanh.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses_tanh.append(train_loss)
    train_r2_tanh.append(r_squared(train_target, train_predict))
    model_tanh.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples = X_samples.to(device)
            y_samples = y_samples.to(device)
            outputs = model_tanh(X_samples)
            val_predict += outputs.tolist()
            val_target += y_samples.tolist()
            loss = criterion(outputs, y_samples)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses_tanh.append(val_loss)
    val_r2_tanh.append(r_squared(val_target, val_predict))
    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}')

# Đánh giá mô hình Tanh trên tập test
model_tanh.eval()
with torch.no_grad():
    y_hat_tanh = model_tanh(X_test)
    test_set_r2_tanh = r_squared(y_test, y_hat_tanh)
    print(f'Evaluation on test set for Tanh: R2: {test_set_r2_tanh}')


Epoch 1/20, Train Loss: 229.109, Val Loss: 74.251
Epoch 2/20, Train Loss: 25.180, Val Loss: 8.916
Epoch 3/20, Train Loss: 17.625, Val Loss: 10.553
Epoch 4/20, Train Loss: 13.618, Val Loss: 7.745
Epoch 5/20, Train Loss: 11.973, Val Loss: 8.701
Epoch 6/20, Train Loss: 11.126, Val Loss: 7.141
Epoch 7/20, Train Loss: 11.149, Val Loss: 5.337
Epoch 8/20, Train Loss: 9.696, Val Loss: 8.109
Epoch 9/20, Train Loss: 9.603, Val Loss: 6.747
Epoch 10/20, Train Loss: 8.458, Val Loss: 6.038
Epoch 11/20, Train Loss: 8.329, Val Loss: 5.067
Epoch 12/20, Train Loss: 8.272, Val Loss: 5.081
Epoch 13/20, Train Loss: 8.251, Val Loss: 5.481
Epoch 14/20, Train Loss: 8.314, Val Loss: 6.373
Epoch 15/20, Train Loss: 7.679, Val Loss: 5.615
Epoch 16/20, Train Loss: 7.589, Val Loss: 6.847
Epoch 17/20, Train Loss: 8.399, Val Loss: 5.784
Epoch 18/20, Train Loss: 7.942, Val Loss: 12.657
Epoch 19/20, Train Loss: 9.215, Val Loss: 5.368
Epoch 20/20, Train Loss: 7.401, Val Loss: 5.251
Evaluation on test set for Tanh: R2: 0