In [None]:
# Βασικά imports
import os
import random
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Συσκευή
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)


Using device: cpu


In [None]:
# Ανέβασε το creditcard.csv μέσω του Files (ή σέρνοντάς το στο αριστερό pane)
# και βεβαιώσου ότι φαίνεται π.χ. σαν /content/creditcard.csv

csv_path = "creditcard.csv"   # ΑΛΛΑΞΕ το αν το αρχείο έχει άλλο όνομα!

df = pd.read_csv(csv_path)

print("Shape:", df.shape)
df.head()


Shape: (284807, 31)


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
print(df.info())

print("\nClass value counts:")
print(df["Class"].value_counts())
print("\nClass distribution (%):")
print(df["Class"].value_counts(normalize=True) * 100)

print("\nDescribe numeric columns:")
df.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.168375e-15,3.416908e-16,-1.379537e-15,2.074095e-15,9.604066e-16,1.487313e-15,-5.556467e-16,1.213481e-16,-2.406331e-15,...,1.654067e-16,-3.568593e-16,2.578648e-16,4.473266e-15,5.340915e-16,1.683437e-15,-3.660091e-16,-1.22739e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


31 rows opou to prwto einai to time, o xronos se deuterolepta metaksu ths prwths sunallaghs sto dataset. meta exoume tis V1-V28 opou einai oi sthles pou proekupsan apo PCA kai einai ta features twn sunallagwn. kai telos exoume to class pou einai eite 1 eite 0. to 0 shmainei legit sunallagh enw to 1 fraud.

In [None]:
# Έλεγχος NaN στη Class
print("NaN in Class:", df["Class"].isna().sum())

# Δες τις μοναδικές τιμές
print("Unique values in Class:", df["Class"].unique())


NaN in Class: 0
Unique values in Class: [0 1]


In [None]:
# Κράτα μόνο γραμμές όπου το Class δεν είναι NaN
df = df[df["Class"].notna()]

# (προαιρετικό) cast σε int για σιγουριά
df["Class"] = df["Class"].astype(int)

print("After cleaning:")
print("NaN in Class:", df["Class"].isna().sum())
print(df["Class"].value_counts())


After cleaning:
NaN in Class: 0
Class
0    284315
1       492
Name: count, dtype: int64


In [None]:
print(df.shape)


(284807, 31)


In [None]:
X = df.drop(columns=["Class"]).values
y = df["Class"].values

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)

print("Fraud ratio train:", y_train.mean())
print("Fraud ratio val:", y_val.mean())
print("Fraud ratio test:", y_test.mean())


Train: (199364, 30)
Val: (42721, 30)
Test: (42722, 30)
Fraud ratio train: 0.0017254870488152324
Fraud ratio val: 0.0017321691907960957
Fraud ratio test: 0.0017321286456626562


Εδώ κόβουμε το dataset σε τρία κομμάτια. Το train είναι αυτό με το οποίο θα εκπαιδεύσουμε τον teacher. Το validation το χρησιμοποιούμε για να παρακολουθούμε τι κάνει κατά τη διάρκεια του training. Το test το κρατάμε για το τέλος, για τελική αξιολόγηση. Το stratify=y κρατάει παρόμοιο fraud ratio σε όλα τα split.

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)


Εδώ κανονικοποιούμε όλα τα features ώστε να έχουν παρόμοια κλίμακα. Αυτό βοηθάει πολύ τα νευρωνικά να εκπαιδευτούν σταθερά, ειδικά επειδή το Amount και το Time μπορεί να είναι σε άλλη κλίμακα από τα PCA components.


Χωρίς scaling:

το Amount και το Time θα “σκεπάζουν” τα PCA features

το μοντέλο θα βασίζεται υπερβολικά σε αυτά

το KD αργότερα θα αποστάζει λάθος patterns


edw sthn ousia allazw ta numpy arrays se batches kai tensors wste na xrhsimopoihthoun sto neurwniko mou diktuo

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class FraudDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_ds = FraudDataset(X_train, y_train)
val_ds   = FraudDataset(X_val, y_val)
test_ds  = FraudDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=512, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=1024, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=1024, shuffle=False)

len(train_ds), len(val_ds), len(test_ds)


(199364, 42721, 42722)

Edw exoume ftiaksei to basiko neurwna tou teacher model mas opou to input (30) tha pernaei apo 256 neurwnes sthn arxh, 128 meta kai 64 sto telos kai kathe fora tha efarmozetai RELU

In [None]:
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

input_dim = X_train.shape[1] #30 inputs, sthn ousia kathe sunallagh monh ths.
print("Input dim:", input_dim)

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dims):
        super().__init__()
        layers = []
        prev = input_dim
        for h in hidden_dims:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            prev = h
        layers.append(nn.Linear(prev, 2))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

teacher = MLP(input_dim, hidden_dims=[256, 128, 64]).to(device)
teacher


Using device: cpu
Input dim: 30


MLP(
  (net): Sequential(
    (0): Linear(in_features=30, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=2, bias=True)
  )
)

Εδώ ορίζουμε το νευρωνικό που παίζει τον ρόλο του teacher. Είναι ένα απλό MLP: 30 είσοδοι → κρυφά layers 256–128–64 → έξοδος 2 logits (για τις δύο κλάσεις). Αυτό είναι το “μεγάλο” μοντέλο που θέλουμε αργότερα να αποστάξουμε σε ένα μικρότερο student.

In [None]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

classes = np.array([0, 1])
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=y_train
)
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)
print("Class weights:", class_weights)


def train_teacher(model, train_loader, val_loader, epochs=5, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * xb.size(0)

        avg_loss = total_loss / len(train_loader.dataset)

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                logits = model(xb)
                preds = logits.argmax(dim=1)
                correct += (preds == yb).sum().item()
                total += yb.size(0)
        val_acc = correct / total if total > 0 else 0.0
        print(f"Epoch {epoch+1}: train_loss={avg_loss:.4f}, val_acc={val_acc:.4f}")


Class weights: tensor([  0.5009, 289.7733])


Εδώ κάνουμε δύο πράγματα. Πρώτον, υπολογίζουμε class_weights για να δώσουμε πολύ μεγαλύτερο βάρος στα fraud samples, επειδή είναι πολύ λίγα. Αυτό μπαίνει μέσα στο CrossEntropyLoss ώστε το loss να “πονάει” περισσότερο όταν κάνουμε λάθος στην fraud κλάση. Δεύτερον, γράφουμε τον training loop του teacher: για κάθε epoch, περνάει όλα τα batches, κάνει forward → loss → backward → optimizer step και στο τέλος υπολογίζει validation accuracy για να δούμε αν βελτιώνεται.

In [None]:
train_teacher(teacher, train_loader, val_loader, epochs=5, lr=1e-3)


Epoch 1: train_loss=0.2106, val_acc=0.9892
Epoch 2: train_loss=0.1219, val_acc=0.9835
Epoch 3: train_loss=0.0837, val_acc=0.9883
Epoch 4: train_loss=0.0775, val_acc=0.9875
Epoch 5: train_loss=0.0721, val_acc=0.9770


In [None]:
from sklearn.metrics import classification_report, roc_auc_score

def evaluate(model, loader):
    model.eval()
    all_y = []
    all_p = []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            logits = model(xb)
            probs = F.softmax(logits, dim=1)[:, 1].cpu().numpy()
            all_p.extend(probs)
            all_y.extend(yb.numpy())

    preds = [1 if p > 0.5 else 0 for p in all_p]
    print(classification_report(all_y, preds, digits=4))
    try:
        print("ROC AUC:", roc_auc_score(all_y, all_p))
    except ValueError:
        print("ROC AUC: cannot compute")

print("Teacher performance on test set:")
evaluate(teacher, test_loader)


Teacher performance on test set:
              precision    recall  f1-score   support

           0     0.9998    0.9767    0.9881     42648
           1     0.0623    0.8919    0.1164        74

    accuracy                         0.9765     42722
   macro avg     0.5310    0.9343    0.5523     42722
weighted avg     0.9982    0.9765    0.9866     42722

ROC AUC: 0.9788475870355442


Αυτό είναι για την τελική αξιολόγηση. Υπολογίζουμε τις predicted probabilities για την fraud κλάση, βγάζουμε classification report (precision, recall, F1) και ROC AUC. Αυτά είναι τα νούμερα που θα χρησιμοποιήσεις στην εργασία σαν performance του teacher και μετά θα τα συγκρίνεις με τον student.