In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
import pandas as pd


In [26]:
df = pd.read_csv('C:/Users/Rudra Thakar/Jupyter/preprocessed_IFND_dataset.csv')
# Load and preprocess the dataset
# Assuming 'df' is your preprocessed DataFrame
X = df['Statement']
y = df['Label'].apply(lambda x: 1 if x == 'TRUE' else 0)  # Convert labels to binary (1 for true, 0 for false)

# Convert text data to numerical data using CountVectorizer
vectorizer = CountVectorizer(max_features=5000)  # Limit to top 5000 features
X_vectorized = vectorizer.fit_transform(X).toarray()

In [27]:
print(f"Number of unique values in y: {y.nunique()}")

Number of unique values in y: 2


In [28]:
# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X_vectorized, y, test_size=0.3, random_state=1)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=9)


In [29]:
print(y_train)

14676    1
40635    0
35219    1
29293    1
47372    0
        ..
50057    0
32511    1
5192     1
12172    1
33003    1
Name: Label, Length: 39699, dtype: int64


In [30]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)


In [31]:
# Define the ANN model
class ANNModel(nn.Module):
    def __init__(self, input_size):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # First hidden layer
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)  # Second hidden layer
        self.fc3 = nn.Linear(64, 1)  # Output layer
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation for binary classification

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Initialize the model, loss function, and optimizer
input_size = X_train.shape[1]
model = ANNModel(input_size)
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [34]:
# Train the model
epochs = 20
for epoch in range(epochs):
    # Training phase
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Calculate training accuracy
    with torch.no_grad():
        train_preds = (outputs > 0.5).float()
        train_accuracy = accuracy_score(y_train_tensor.numpy(), train_preds.numpy())

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_preds = (val_outputs > 0.5).float()
        val_accuracy = accuracy_score(y_val_tensor.numpy(), val_preds.numpy())

    # Print loss, training accuracy, and validation accuracy
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Training Accuracy: {train_accuracy * 100:.2f}%, Validation Accuracy: {val_accuracy * 100:.2f}%')


Epoch 1/20, Loss: 0.6581, Training Accuracy: 89.92%, Validation Accuracy: 89.69%
Epoch 2/20, Loss: 0.6515, Training Accuracy: 90.32%, Validation Accuracy: 90.16%
Epoch 3/20, Loss: 0.6441, Training Accuracy: 90.69%, Validation Accuracy: 90.68%
Epoch 4/20, Loss: 0.6359, Training Accuracy: 91.16%, Validation Accuracy: 91.16%
Epoch 5/20, Loss: 0.6270, Training Accuracy: 91.59%, Validation Accuracy: 91.48%
Epoch 6/20, Loss: 0.6174, Training Accuracy: 91.94%, Validation Accuracy: 91.74%
Epoch 7/20, Loss: 0.6070, Training Accuracy: 92.30%, Validation Accuracy: 91.92%
Epoch 8/20, Loss: 0.5958, Training Accuracy: 92.62%, Validation Accuracy: 92.08%
Epoch 9/20, Loss: 0.5838, Training Accuracy: 92.82%, Validation Accuracy: 92.19%
Epoch 10/20, Loss: 0.5710, Training Accuracy: 92.98%, Validation Accuracy: 92.25%
Epoch 11/20, Loss: 0.5574, Training Accuracy: 93.10%, Validation Accuracy: 92.32%
Epoch 12/20, Loss: 0.5430, Training Accuracy: 93.24%, Validation Accuracy: 92.49%
Epoch 13/20, Loss: 0.5279

In [35]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_preds = (test_outputs > 0.5).float()
    test_accuracy = accuracy_score(y_test_tensor.numpy(), test_preds.numpy())
    print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 93.58%
