# **FCNN**

The Fully Connected Neural Network is more advanced Neural Network.The text data is converted into numeric features, and a PyTorch-based model is trained to make predictions. The FCNN contain 2 layers and one of them is hidden layer,
Also The FCNN uses activation function such as ReLU.
The downside of FCNN is that the computation cost of FCNN is higher than the SC.



In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
import pandas as pd
# Load the phishing dataset
dataset = load_dataset("ealvaradob/phishing-dataset", "texts", trust_remote_code=True)
# Check the structure of the dataset
print("Dataset Head:")
print(dataset['train'][:5])  # Print the first 5 entries of the dataset

# Extract text and labels
texts = [example['text'] for example in dataset['train']]
labels = [example['label'] for example in dataset['train']]

# Convert to DataFrame for better visualization and sanity check
df = pd.DataFrame({'text': texts, 'label': labels})
print("\nDataFrame Head:")
print(df.head())
# Step 1: TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=150000)
X_tfidf = vectorizer.fit_transform(df['text']).toarray()

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, df['label'], test_size=0.2, random_state=42)

# Convert to PyTorch Tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Step 3: Define Fully Connected Neural Network
class SimpleFCNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SimpleFCNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # Fully connected layer 1
        self.fc2 = nn.Linear(hidden_size, 1)          # Fully connected layer 2 (output)
        self.sigmoid = nn.Sigmoid()                   # Apply sigmoid for binary classification

    def forward(self, x):
        x = F.relu(self.fc1(x))  # Apply ReLU activation after fc1
        x = self.sigmoid(self.fc2(x))  # Apply sigmoid after fc2
        return x

# Initialize Model
input_size = X_train_tensor.shape[1]
hidden_size = 128  # Number of neurons in the hidden layer
model = SimpleFCNN(input_size, hidden_size)

# Define Loss and Optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train the Model
epochs = 10
batch_size = 64
for epoch in range(epochs):
    model.train()
    permutation = torch.randperm(X_train_tensor.size(0))

    for i in range(0, X_train_tensor.size(0), batch_size):
        optimizer.zero_grad()

        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X_train_tensor[indices], y_train_tensor[indices]

        # Forward Pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)

        # Backward Pass and Optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

# Step 5: Evaluate the Model
model.eval()
with torch.no_grad():
    y_pred_probs = model(X_test_tensor)
    y_pred = (y_pred_probs > 0.5).int()

# Print Metrics
accuracy = accuracy_score(y_test_tensor, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_tensor, y_pred, target_names=["Legitimate", "Phishing"]))

```
DataFrame Head:
                                                text  label
0  re : 6 . 1100 , disc : uniformitarianism , re ...      0
1  the other side of * galicismos * * galicismo *...      0
2  re : equistar deal tickets are you still avail...      0
3  \nHello I am your hot lil horny toy.\n    I am...      1
4  software at incredibly low prices ( 86 % lower...      1

Epoch [1/10], Loss: 0.1542
Epoch [2/10], Loss: 0.0516
Epoch [3/10], Loss: 0.0044
Epoch [4/10], Loss: 0.0032
Epoch [5/10], Loss: 0.0022
Epoch [6/10], Loss: 0.0041
Epoch [7/10], Loss: 0.0010
Epoch [8/10], Loss: 0.0010
Epoch [9/10], Loss: 0.0006
Epoch [10/10], Loss: 0.0011

Accuracy: 0.9767

Classification Report:
              precision    recall  f1-score   support

  Legitimate       0.98      0.99      0.98      2493
    Phishing       0.98      0.96      0.97      1535

    accuracy                           0.98      4028
   macro avg       0.98      0.97      0.98      4028
weighted avg       0.98      0.98      0.98      4028

```

