In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize

In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
data = pd.read_excel('/content/finaldataset.xlsx')

In [None]:
def preprocess_text(text):
    return word_tokenize(str(text).lower())

In [None]:
data['question_tokens'] = data['Question'].apply(preprocess_text)
data['context_tokens'] = data['Context'].apply(preprocess_text)
data['answer_tokens'] = data['Answer'].apply(preprocess_text)

In [None]:
all_tokens = data['question_tokens'] + data['context_tokens'] + data['answer_tokens']

# 3. Train Word2Vec Model (Skip-Gram)
w2v_model = Word2Vec(sentences=all_tokens, vector_size=100, window=5, min_count=1, sg=1)  # sg=1 for Skip-Gram

In [None]:
class QuestionDataset(Dataset):
    def __init__(self, dataframe, w2v_model, max_len=50):
        self.data = dataframe
        self.w2v = w2v_model
        self.max_len = max_len

    def vectorize_tokens(self, tokens):
        vectors = [self.w2v.wv[token] for token in tokens if token in self.w2v.wv]
        if len(vectors) < self.max_len:
            vectors.extend([np.zeros(100)] * (self.max_len - len(vectors)))  # Padding
        return np.array(vectors[:self.max_len])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        question_vec = self.vectorize_tokens(row['question_tokens'])
        context_vec = self.vectorize_tokens(row['context_tokens'])
        answer_vec = self.vectorize_tokens(row['answer_tokens'])
        label = torch.tensor(row['Label'], dtype=torch.long)
        return torch.tensor(question_vec, dtype=torch.float32), \
               torch.tensor(context_vec, dtype=torch.float32), \
               torch.tensor(answer_vec, dtype=torch.float32), label


In [None]:
train_df, test_df = train_test_split(data, test_size=0.2, stratify=data['Label'], random_state=42)
train_dataset = QuestionDataset(train_df, w2v_model)
test_dataset = QuestionDataset(test_df, w2v_model)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [None]:
class CNN_GRU_Classifier(nn.Module):
    def __init__(self):
        super(CNN_GRU_Classifier, self).__init__()

        # CNN for Question
        self.cnn_q = nn.Sequential(
            nn.Conv1d(100, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )

        # CNN for Context
        self.cnn_c = nn.Sequential(
            nn.Conv1d(100, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )

        # GRU Layers
        self.gru_q = nn.GRU(128, 64, batch_first=True)
        self.gru_c = nn.GRU(128, 64, batch_first=True)
        self.gru_a = nn.GRU(100, 64, batch_first=True)  # Answer GRU without CNN

        # Keyword Matching Layer
        self.keyword_match = nn.Linear(64, 64)

        # Fully Connected Layers
        self.fc = nn.Sequential(
            nn.Linear(64*4, 128),  # Adjusted for keyword matching
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 2)  # Binary classification
        )

    def forward(self, question, context, answer):
        # CNN + GRU for Question
        q = self.cnn_q(question.permute(0, 2, 1))
        q, _ = self.gru_q(q.permute(0, 2, 1))
        q_out = q[:, -1, :]

        # CNN + GRU for Context
        c = self.cnn_c(context.permute(0, 2, 1))
        c, _ = self.gru_c(c.permute(0, 2, 1))
        c_out = c[:, -1, :]

        # GRU for Answer
        a, _ = self.gru_a(answer)
        a_out = a[:, -1, :]

        # Keyword Matching (element-wise multiplication for similarity)
        keyword_features = self.keyword_match(q_out * c_out)

        # Concatenate outputs
        combined = torch.cat((q_out, c_out, a_out, keyword_features), dim=1)
        output = self.fc(combined)
        return output

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_GRU_Classifier().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for question, context, answer, Labels in train_loader:
        question, context, answer, Labels = question.to(device), context.to(device), answer.to(device), Labels.to(device)
        optimizer.zero_grad()
        outputs = model(question, context, answer)
        loss = criterion(outputs, Labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')


Epoch [1/10], Loss: 0.6866
Epoch [2/10], Loss: 0.6690
Epoch [3/10], Loss: 0.6193
Epoch [4/10], Loss: 0.5840
Epoch [5/10], Loss: 0.5609
Epoch [6/10], Loss: 0.5648
Epoch [7/10], Loss: 0.5565
Epoch [8/10], Loss: 0.5506
Epoch [9/10], Loss: 0.5301
Epoch [10/10], Loss: 0.5227


In [None]:
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for question, context, answer, labels in test_loader:
        question, context, answer, labels = question.to(device), context.to(device), answer.to(device), labels.to(device)
        outputs = model(question, context, answer)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')


Accuracy: 73.61%


In [None]:
!pip install torchviz

Collecting torchviz
  Downloading torchviz-0.0.3-py3-none-any.whl.metadata (2.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->torchviz)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->torchviz)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->torchviz)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->torchviz)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->torchviz)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->torchviz)
  Downloading nvidia_cufft_cu12-11.2.1.3-py

In [None]:
from torchviz import make_dot

# Sample input to visualize model architecture
sample_question = torch.randn(1, 50, 100).to(device)  # [Batch size, Sequence length, Embedding size]
sample_context = torch.randn(1, 50, 100).to(device)
sample_answer = torch.randn(1, 50, 100).to(device)

# Forward pass to generate the computational graph
output = model(sample_question, sample_context, sample_answer)

# Create visualization
make_dot(output, params=dict(model.named_parameters())).render("cnn_gru_architecture", format="png")


'cnn_gru_architecture.png'