In [None]:
!pip install torch torchvision torch-geometric scikit-learn tqdm python-docx seaborn matplotlib

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torchvision import datasets, transforms, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neighbors import kneighbors_graph
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from torch.utils.data import DataLoader
from tqdm import tqdm
from google.colab import drive

drive.mount('/content/drive')

input_folder = "/content/drive/MyDrive/defense/data/test"
output_path = "/content/drive/MyDrive/defense/output"
os.makedirs(output_path, exist_ok=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(root=input_folder, transform=transform)
class_names = dataset.classes
print("Classes:", class_names)

feature_extractor = models.resnet18(pretrained=True)
feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-1])
feature_extractor.eval().to(device)

features, labels = [], []
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

with torch.no_grad():
    for images, targets in tqdm(dataloader, desc='Extracting Features'):
        images = images.to(device)
        outputs = feature_extractor(images)
        outputs = outputs.squeeze(-1).squeeze(-1)
        features.append(outputs.cpu())
        labels.append(targets.cpu())

features = torch.cat(features).numpy()
labels = torch.cat(labels).numpy()
print("Feature matrix shape:", features.shape)

idx = np.arange(len(labels))
idx_train, idx_test = train_test_split(idx, test_size=0.3, stratify=labels, random_state=42)
idx_val, idx_test = train_test_split(idx_test, test_size=0.5, stratify=labels[idx_test], random_state=42)

adj = kneighbors_graph(features, n_neighbors=5, metric='cosine', mode='connectivity', include_self=False)
edge_index = torch.tensor(np.array(adj.nonzero()), dtype=torch.long)

data = Data(
    x=torch.tensor(features, dtype=torch.float),
    edge_index=edge_index,
    y=torch.tensor(labels, dtype=torch.long)
).to(device)

class GCNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.dropout = nn.Dropout(0.5)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

def train_model(model, name):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10)
    criterion = nn.NLLLoss()
    history = {k: [] for k in ['train_loss', 'val_loss', 'test_loss', 'train_acc', 'val_acc', 'test_acc']}
    best_val_acc, best_test_acc, best_epoch = 0, 0, 0

    for epoch in range(1, 201):
        model.train()
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[idx_train], data.y[idx_train])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            out = model(data)
            pred = out.argmax(dim=1)
            losses = [criterion(out[i], data.y[i]).item() for i in [idx_train, idx_val, idx_test]]
            accs = [(pred[i] == data.y[i]).sum().item() / len(i) for i in [idx_train, idx_val, idx_test]]

        scheduler.step(accs[1])
        for i, k in enumerate(history): history[k].append((losses + accs)[i])

        if accs[1] > best_val_acc:
            best_val_acc, best_test_acc, best_epoch = accs[1], accs[2], epoch
            torch.save(model.state_dict(), os.path.join(output_path, f'{name}_best_model.pth'))

        if epoch % 20 == 0:
            print(f"Epoch {epoch} | Train Acc: {accs[0]:.4f} | Val Acc: {accs[1]:.4f} | Test Acc: {accs[2]:.4f}")

    model.load_state_dict(torch.load(os.path.join(output_path, f'{name}_best_model.pth')))
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out.argmax(dim=1)

    cm = confusion_matrix(data.y[idx_test].cpu(), pred[idx_test].cpu())
    report = classification_report(data.y[idx_test].cpu(), pred[idx_test].cpu(), target_names=class_names, output_dict=True)
    return best_val_acc, best_test_acc, best_epoch, history, cm, pd.DataFrame(report).transpose()

gcn_model = GCNModel(
    input_dim=data.num_features,
    hidden_dim=64,
    output_dim=len(class_names)
)

best_val_acc, best_test_acc, best_epoch, history, cm, report = train_model(gcn_model, "GCN")

print("Best Epoch:", best_epoch)
print("Validation Accuracy:", best_val_acc)
print("Test Accuracy:", best_test_acc)
print(report)

plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt="d", xticklabels=class_names, yticklabels=class_names, cmap="Blues")
plt.title("GCN Confusion Matrix")
plt.show()
