In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/HCMUTE/data-mining

/content/drive/MyDrive/HCMUTE/data-mining


In [None]:
# Data Analysis
import pandas as pd
import numpy as np

# Data Visualization
from matplotlib import pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Metrics (distance) and stats
from scipy.spatial.distance import euclidean
from scipy.stats import median_abs_deviation

# Unsupervised (Machine) Learning
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

#ANN
import torch
import torch.nn as nn
import torch.optim as optim

# save data
import pickle

In [None]:
# Tải dữ liệu training từ file
with open('./ieee-fraud-detection/train-scaled.pickle', 'rb') as f:
    X_train_scaled = pickle.load(f)

with open('./ieee-fraud-detection/label-train-scaled.pickle', 'rb') as f:
    y_train = pickle.load(f)
    
with open('./ieee-fraud-detection/val-scaled.pickle', 'rb') as f:
    X_val_scaled = pickle.load(f)

with open('./ieee-fraud-detection/label-val-scaled.pickle', 'rb') as f:
    y_val = pickle.load(f)

In [None]:
X_train_anc = X_train_scaled[y_train == 0]

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Sigmoid(),
            nn.Linear(hidden_dim, hidden_dim//2),
            nn.Sigmoid(),
            nn.Linear(hidden_dim//2, hidden_dim//4),
            nn.Sigmoid()
            
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim//4, hidden_dim//2),
            nn.Sigmoid(),
            nn.Linear(hidden_dim//2, hidden_dim),
            nn.Sigmoid(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
input_dim = 245
hidden_dim = 128
learning_rate = 0.01
num_epochs = 54
batch_size = 32

In [None]:
model = AutoEncoder(input_dim, hidden_dim)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
# Train the model
for epoch in range(num_epochs):
    for i in range(0, len(X_train_anc), batch_size):
        batch = X_train_anc[i:i+batch_size]
        batch = torch.tensor(batch, dtype=torch.float32)

        # Forward pass
        outputs = model(batch)

        # Compute the loss
        loss = criterion(outputs, batch)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
#   Print the loss every 10 epochs
    if (epoch+1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

Epoch [50/54], Loss: 0.011390430852770805
Epoch [51/54], Loss: 0.01139017567038536
Epoch [52/54], Loss: 0.011389922350645065
Epoch [53/54], Loss: 0.011389674618840218
Epoch [54/54], Loss: 0.011389431543648243


In [None]:
model

AutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=245, out_features=128, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): Sigmoid()
  )
  (decoder): Sequential(
    (0): Linear(in_features=32, out_features=64, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=128, out_features=245, bias=True)
    (5): Sigmoid()
  )
)

In [None]:
torch.save(model.state_dict(), 'weights_sgd_1.pth')