In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load dataset
df = pd.read_csv("chatbot_dataset.csv")

In [3]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['Category'])

In [4]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(df['Question'], df['label'], test_size=0.2, random_state=42)


In [11]:
# Convert text to numerical representation (Embedding Model needed here)
from sentence_transformers import SentenceTransformer

In [12]:
model_embed = SentenceTransformer("all-MiniLM-L6-v2")
X_train_vectors = model_embed.encode(X_train.tolist())
X_test_vectors = model_embed.encode(X_test.tolist())

In [13]:
X_train_torch = torch.tensor(X_train_vectors, dtype=torch.float32)
X_test_torch = torch.tensor(X_test_vectors, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.long)
y_test_torch = torch.tensor(y_test.values, dtype=torch.long)


In [14]:
# Define Chatbot Model
class ChatbotModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ChatbotModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [15]:
# Initialize Model
input_size = X_train_vectors.shape[1]
hidden_size = 128  # Tunable
output_size = len(label_encoder.classes_)


In [16]:
model = ChatbotModel(input_size, hidden_size, output_size)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
# Training Loop
num_epochs = 50
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")

In [18]:
# Evaluate Model
y_test_pred = model(X_test_torch)
y_test_pred = torch.argmax(y_test_pred, dim=1)
accuracy = accuracy_score(y_test, y_test_pred.numpy())

In [None]:
print(f"Model Accuracy: {accuracy * 100:.2f}%")

In [24]:
import torch

# Assuming you have a model defined (e.g., ChatbotModel)
torch.save(model.state_dict(), "chatbot_model.pth")
