<a href="https://colab.research.google.com/github/roksanaoni/Maching-learning-/blob/main/GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# Step 1: Import libraries
import pandas as pd
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Step 2: Load CSV
data = pd.read_csv('spam.csv', encoding='latin-1')
data = data[['v1', 'v2']]
data.columns = ['label', 'message']

# Step 3: Encode labels
le = LabelEncoder()
data['label'] = le.fit_transform(data['label'])  # spam=1, ham=0
y = torch.tensor(data['label'].values, dtype=torch.long)

# Step 4: Convert text to numeric features
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(data['message']).toarray()  # shape = (num_samples, 500)
x = torch.tensor(X, dtype=torch.float)

# Step 5: Build edges based on similarity (cosine similarity)
similarity = cosine_similarity(X)
threshold = 0.5  # connect messages with similarity > threshold
edge_index = np.array(np.where(similarity > threshold))
edge_index = torch.tensor(edge_index, dtype=torch.long)

# Step 6: Create PyG Data object
graph_data = Data(x=x, edge_index=edge_index, y=y)

# Step 7: Define GCN model
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(500, 128)
        self.conv2 = GCNConv(128, 2)  # 2 classes: spam or ham

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Step 8: Train GCN
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.NLLLoss()

for epoch in range(20):
    model.train()
    optimizer.zero_grad()
    out = model(graph_data)
    loss = loss_fn(out, graph_data.y)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Step 9: Evaluate
model.eval()
pred = out.argmax(dim=1)
accuracy = (pred == graph_data.y).sum().item() / graph_data.y.size(0)
print("Accuracy:", accuracy)


ModuleNotFoundError: No module named 'torch_geometric'

In [17]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


After the installation is complete, please run the code cell again.