In [1]:
%pip install matplotlib

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sentence_transformers import SentenceTransformer
import matplotlib.pyplot as plt

# Load the pre-trained embedding model
emb_model = SentenceTransformer('all-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange


In [1]:
# Generate Sample Data
knowledge_questions = [
    "Who directed The Godfather?",
    "What's the release year for Inception?",
    "Who was the screenwriter for The Mask?",
    "What's the box office revenue of Titanic?",
    "Who composed the score for The Lord of the Rings?",
    "What's the MPAA rating for Jaws?",
    "Who starred in Pulp Fiction?",
    "Who edited Jurassic Park?",
    "What awards did Parasite win?",
    "What's the runtime of Gladiator?",
    "Who produced The Godfather Part II?",
    "Who wrote the screenplay for Schindler's List?",
    "What is the production budget of Avatar?",
    "Who won an Oscar for the movie Joker?",
    "What's the genre of The Silence of the Lambs?",
    "Who was the director of Titanic?",
    "How long is the movie The Irishman?",
    "What's the release date of Forrest Gump?",
    "Who played the lead role in The Matrix?",
    "Who is the composer for the soundtrack of Dune?",
    "How many Academy Awards did La La Land win?",
    "Who was the cinematographer for Blade Runner 2049?",
    "What awards did Moonlight receive?",
    "Who is the executive producer of The Shawshank Redemption?",
    "Who wrote the book that inspired The Shining?",
    "What's the budget of Avengers: Endgame?",
    "Who directed The Grand Budapest Hotel?",
    "Who was the editor for The Wolf of Wall Street?",
    "How many Oscars did Parasite win?",
    "Who played Frodo in The Lord of the Rings?",
    "Who is the director of 1917?",
    "What's the age rating for Toy Story 4?",
    "Who did the visual effects for Inception?",
    "What studio produced Frozen?",
    "Who was the lead actress in Gravity?",
    "What's the box office collection of Black Panther?",
    "Who wrote the screenplay for Gladiator?",
    "What year was The Lion King originally released?",
    "Who directed the movie Interstellar?",
    "What's the MPAA rating of The Dark Knight?",
    "What is the genre of Good Neighbors?",
    "Do you know Shoplifters? What genre does this movie have?",
    "What genres are in Good Will Hunting?",
    "Tell me all of the players in Avatar",
    "What do you think of the genre of Shoplifters?",
]

multimedia_questions = [
    "Show me a picture of Leonardo DiCaprio.",
    "What does Scarlett Johansson look like?",
    "Can I see an image of Robert De Niro?",
    "Show me Harrison Ford’s appearance.",
    "Let me know what Viola Davis looks like.",
    "Can I see a picture of Emma Stone?",
    "Show me Robert Downey Jr.'s photo.",
    "What does Gal Gadot look like?",
    "Can I see what Tom Cruise looks like?",
    "Display an image of Gal Gadot.",
    "Show me a recent photo of Keanu Reeves.",
    "What does Denzel Washington look like these days?",
    "Can I see an image of Angelina Jolie?",
    "What does Matt Damon look like?",
    "Show me a picture of Natalie Portman.",
    "What does Morgan Freeman look like now?",
    "Can you display an image of Hugh Jackman?",
    "Show me a photo of Charlize Theron.",
    "What does Johnny Depp look like currently?",
    "Can I see a picture of Chris Hemsworth?",
    "Show me what Jennifer Lawrence looks like.",
    "What does Ryan Gosling look like?",
    "Can I see an image of Zoe Saldana?",
    "Show me a photo of Anne Hathaway.",
    "What does Chris Evans look like?",
    "Show me a recent image of Brad Pitt.",
    "Can I see a picture of Emily Blunt?",
    "Show me what Tom Hardy looks like.",
    "What does Sandra Bullock look like now?",
    "Display an image of Will Smith.",
    "What does Jason Momoa look like?",
    "Can I see an image of Robert Pattinson?",
    "Show me a picture of Mila Kunis.",
    "What does Benedict Cumberbatch look like?",
    "Can you show me a photo of Kristen Stewart?",
    "Show me a recent image of Gal Gadot.",
    "What does Scarlett Johansson look like in her latest movie?",
    "Can I see a picture of Margot Robbie?",
    "Show me what Mark Ruffalo looks like.",
    "What does Idris Elba look like now?",
]

recommendation_questions = [
    "Recommend movies like Jurassic Park.",
    "Suggest films similar to The Matrix.",
    "Can you recommend dramas like The Godfather?",
    "Recommend animated films similar to Toy Story.",
    "What are movies like Casablanca?",
    "Any horror films like Halloween?",
    "Suggest romantic comedies like Notting Hill.",
    "Recommend science fiction movies like Blade Runner.",
    "Can you suggest musicals like La La Land?",
    "Suggest family films like Home Alone.",
    "What are some action movies similar to Die Hard?",
    "Recommend movies like The Notebook.",
    "Can you suggest thrillers like Seven?",
    "Any fantasy films similar to Harry Potter?",
    "Recommend heist movies like Ocean's Eleven.",
    "Suggest animated films like Finding Nemo.",
    "What are some movies similar to The Shawshank Redemption?",
    "Recommend sci-fi films like Interstellar.",
    "Can you recommend horror films like The Conjuring?",
    "Suggest adventure films like Indiana Jones.",
    "Any films like Pulp Fiction?",
    "Recommend romantic dramas like Pride and Prejudice.",
    "Suggest superhero movies like The Avengers.",
    "Can you recommend biopics like Bohemian Rhapsody?",
    "What are some comedies like Superbad?",
    "Suggest movies based on true stories like Spotlight.",
    "Recommend war films like Saving Private Ryan.",
    "Any sci-fi films like The Terminator?",
    "Can you suggest documentaries like Free Solo?",
    "Recommend courtroom dramas like A Few Good Men.",
    "What are some musicals like The Greatest Showman?",
    "Suggest dystopian movies like The Hunger Games.",
    "Recommend animated movies like Moana.",
    "Any crime thrillers similar to The Departed?",
    "Suggest movies with time travel like Back to the Future.",
    "What are some horror movies like A Nightmare on Elm Street?",
    "Recommend psychological thrillers like Black Swan.",
    "Can you suggest classic movies like Gone with the Wind?",
    "Any adventure films similar to Jumanji?",
    "Recommend spy movies like James Bond.",
]

smalltalk = [
    "Hi, how are you?",
    "Hello! What's up?",
    "Good morning!",
    "Good afternoon!",
    "Good evening!",
    "Hey there!",
    "How's your day going?",
    "Nice to meet you!",
    "What's new with you?",
    "How's it going?",
    "Thank you!",
    "You're welcome!",
    "No problem!",
    "Have a great day!",
    "See you later!",
    "Take care!",
    "How can I help you?",
    "What's on your mind?",
    "Long time no see!",
    "How have you been?",
    "Goodbye!",
    "Catch you later!",
    "What's happening?",
    "It's nice to chat with you!",
    "Hope you're doing well!",
    "I'm here if you need anything!",
    "Have a nice weekend!",
    "Let's talk about something fun!",
    "You're awesome!",
    "Thanks for stopping by!"
]


In [3]:
# Prepare Data
sentences = knowledge_questions + multimedia_questions + recommendation_questions + smalltalk
labels = ( ['knowledge'] * len(knowledge_questions) + 
            ['multimedia'] * len(multimedia_questions) + 
            ['recommendation'] * len(recommendation_questions) + 
            ['smalltalk'] * len(smalltalk)
)

# Convert labels to numeric
label_mapping = {'knowledge': 0, 'multimedia': 1, 'recommendation': 2, 'smalltalk': 3}
y = [label_mapping[label] for label in labels]

# Embedding the Sentences
print("Generating embeddings for the sentences...")
X = emb_model.encode(sentences)

# Split Data into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Generating embeddings for the sentences...


In [8]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
import torch
import numpy as np

# Prepare Dataset for PyTorch
class TextDataset(Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = embeddings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.embeddings[idx], dtype=torch.float32), self.labels[idx]

# Convert data to PyTorch tensors
train_data = TextDataset(X_train, y_train)
test_data = TextDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Define Neural Network
class ClassifierNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassifierNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize Model
input_dim = X_train.shape[1]  # Embedding dimension
num_classes = len(label_mapping)
model = ClassifierNN(input_dim, num_classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        embeddings, labels = batch
        embeddings, labels = embeddings.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Evaluation
model.eval()
y_pred = []
y_true = []
with torch.no_grad():
    for batch in test_loader:
        embeddings, labels = batch
        embeddings = embeddings.to(device)
        outputs = model(embeddings)
        predictions = torch.argmax(outputs, dim=1).cpu().numpy()
        y_pred.extend(predictions)
        y_true.extend(labels)

# Classification Report
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_mapping.keys()))

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))


Epoch 1/50, Loss: 1.3829
Epoch 2/50, Loss: 1.3640
Epoch 3/50, Loss: 1.3408
Epoch 4/50, Loss: 1.3080
Epoch 5/50, Loss: 1.2654
Epoch 6/50, Loss: 1.2116
Epoch 7/50, Loss: 1.1296
Epoch 8/50, Loss: 1.0312
Epoch 9/50, Loss: 0.9161
Epoch 10/50, Loss: 0.7805
Epoch 11/50, Loss: 0.6456
Epoch 12/50, Loss: 0.5161
Epoch 13/50, Loss: 0.3768
Epoch 14/50, Loss: 0.2664
Epoch 15/50, Loss: 0.1860
Epoch 16/50, Loss: 0.1355
Epoch 17/50, Loss: 0.0869
Epoch 18/50, Loss: 0.0588
Epoch 19/50, Loss: 0.0452
Epoch 20/50, Loss: 0.0348
Epoch 21/50, Loss: 0.0243
Epoch 22/50, Loss: 0.0216
Epoch 23/50, Loss: 0.0178
Epoch 24/50, Loss: 0.0148
Epoch 25/50, Loss: 0.0139
Epoch 26/50, Loss: 0.0118
Epoch 27/50, Loss: 0.0097
Epoch 28/50, Loss: 0.0091
Epoch 29/50, Loss: 0.0085
Epoch 30/50, Loss: 0.0072
Epoch 31/50, Loss: 0.0076
Epoch 32/50, Loss: 0.0053
Epoch 33/50, Loss: 0.0053
Epoch 34/50, Loss: 0.0045
Epoch 35/50, Loss: 0.0042
Epoch 36/50, Loss: 0.0047
Epoch 37/50, Loss: 0.0052
Epoch 38/50, Loss: 0.0040
Epoch 39/50, Loss: 0.

In [11]:
# Function to Classify a New Sentence
def classify_sentence(test_sentence):
    test_embedding = emb_model.encode([test_sentence])  # Ensure this uses the same model as training

    # Convert embedding to PyTorch tensor
    test_tensor = torch.tensor(test_embedding, dtype=torch.float32).to(device)

    # Make prediction
    model.eval()
    with torch.no_grad():
        output = model(test_tensor)
        prediction = torch.argmax(output, dim=1).item()  # Get the class index

    # Map numeric prediction to label
    predicted_label = [label for label, index in label_mapping.items() if index == prediction][0]
    # print(f"Input: {test_sentence}")
    # print(f"Predicted Label: {predicted_label}")
    return predicted_label

In [12]:
# Example Usage with Multiple Sentences
new_sentences = [
    "What is the release date for Titanic?",
    "Can you show me an image of Emma Watson?",
    "Suggest a movie similar to The Godfather.",
    "Who was the director of Avatar?",
    "Show me a photo of Tom Hanks.",
    "Recommend horror films like The Exorcist.",
    "What's the budget of Titanic?",
    "Display an image of Brad Pitt.",
    "Suggest science fiction movies like Interstellar.",
    "Who won an Oscar for The Revenant?",
    "Can I see a picture of Ryan Gosling?",
    "What is the genre of Inception?",
    "Show me what Natalie Portman looks like.",
    "Suggest romantic movies like Pride and Prejudice.",
    "Who produced The Godfather?",
    "What does Keanu Reeves look like?",
    "Recommend animated films similar to Shrek.",
    "Can you show me a photo of Johnny Depp?",
    "What is the runtime of The Dark Knight?",
    "Recommend family-friendly movies like Finding Nemo.",
    "Who composed the music for Star Wars?",
    "Show me a picture of Angelina Jolie.",
    "What is the box office revenue of Avengers: Endgame?",
    "Suggest comedy films like The Hangover.",
    "Can I see what Benedict Cumberbatch looks like?",
    "Recommend dramas similar to Schindler's List.",
    "Who edited the movie Gladiator?",
    "Show me a photo of Chris Hemsworth.",
    "What is the rating for Jaws?",
    "Suggest musicals like The Greatest Showman.",
    "Can you show me a picture of Jennifer Lawrence?",
    "What awards did The Shape of Water win?",
    "Recommend movies like The Lion King.",
    "What does Brad Pitt look like?",
    "Who wrote the screenplay for Inception?",
    "Suggest thriller movies like Gone Girl.",
    "Show me what Scarlett Johansson looks like.",
    "What is the genre of The Matrix?",
    "Recommend action films like Die Hard.",
    "Can I see a picture of Charlize Theron?",
    "What is the MPAA rating for Finding Nemo?",
    "Suggest animated films like Toy Story.",
    "Who was the executive producer for Titanic?",
    "Show me a picture of Morgan Freeman.",
    "Recommend horror films from the 1980s.",
    "What does Robert De Niro look like?",
    "Who starred in The Shawshank Redemption?",
    "Can Suggest adventure movies like Indiana Jones, thank you!",
    "Heyo, Show me an image of Matt Damon.",
    "Hi, What is the release year of The Godfather?",
    "Thank you have recommended be good answers!"
]
# Classify and Display Results in a DataFrame
results = []

for sentence in new_sentences:
    category = classify_sentence(sentence)
    results.append({'Sentence': sentence, 'Category': category})

# Create a DataFrame to display the results
results_df = pd.DataFrame(results)
print(results_df)


                                             Sentence        Category
0               What is the release date for Titanic?       knowledge
1            Can you show me an image of Emma Watson?      multimedia
2           Suggest a movie similar to The Godfather.  recommendation
3                     Who was the director of Avatar?       knowledge
4                       Show me a photo of Tom Hanks.      multimedia
5           Recommend horror films like The Exorcist.  recommendation
6                       What's the budget of Titanic?       knowledge
7                      Display an image of Brad Pitt.      multimedia
8   Suggest science fiction movies like Interstellar.  recommendation
9                  Who won an Oscar for The Revenant?       knowledge
10               Can I see a picture of Ryan Gosling?      multimedia
11                    What is the genre of Inception?       knowledge
12           Show me what Natalie Portman looks like.      multimedia
13  Suggest romantic

In [9]:
%pip install plotly nbformat>=4.2.0

Note: you may need to restart the kernel to use updated packages.


# save the model

In [None]:
# Step 7: Save the Model    
torch.save(model.state_dict(), "./question_classifier_model.pth")
print("Model saved to question_classifier_model.pth")

Model saved to question_classifier_model.pth


: 