In [None]:
!pip install torch
!pip install transformers

#  Creating Knowlege-base and Queries

In [24]:
# Example usage
# Enhanced knowledge base with more challenging questions
# Expanded knowledge base with nuanced and potentially tricky questions
knowledge_base = [
    # Python basics
    {'question': 'What is Python?',
     'answer': 'Python is a high-level, interpreted programming language known for its readability and versatility.'},

    {'question': 'What are Python libraries?',
     'answer': 'Python libraries are collections of pre-written code that users can import and use in their programs.'},

    # Specific technical concept
    {'question': 'What is list comprehension in Python?',
     'answer': 'List comprehension is a concise way to create lists in Python, allowing you to generate new lists based on existing lists or other iterable objects using a compact syntax.'},

    # Specific programming concept
    {'question': 'What is object-oriented programming?',
     'answer': 'Object-oriented programming (OOP) is a programming paradigm that uses objects and classes to organize and structure code, focusing on creating reusable and modular software design.'},

    # A tricky question with semantic nuance
    {'question': 'Explain the concept of functional programming',
     'answer': 'Functional programming is a programming paradigm that treats computation as the evaluation of mathematical functions, emphasizing immutable data, pure functions, and avoiding changing state and mutable data.'}
]



In [25]:
# Queries to test Assistants.
test_queries = [
    "Tell me about lists and tuples in Python",
    "What are Python decorators and how do they work?",
    "Explain the Global Interpreter Lock",
    "How does Python manage memory?",
    "What are lambda functions used for?",
    "Tell me about creating lists in a pythonic way",  # Should match list comprehension
    "What is programming that focuses on functions?",  # Should match functional programming
    "Describe how to write modular code using objects",  # Should match OOP

    # A deliberately tricky query to test semantic matching
    "How do programmers create efficient and clean code structures?"
]



# **TF-IDF Assistant**

In [26]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#TF-IDF Class

In [29]:
# Class consisting of functions for TF-IDF Assistant
class TFIDFAssistant:
    def __init__(self, knowledge_base):
        """
        Initialize the assistant with a knowledge base.

        Parameters:
        knowledge_base (list): List of dictionaries with 'question' and 'answer' keys
        """
        self.questions = [item['question'] for item in knowledge_base]
        self.answers = [item['answer'] for item in knowledge_base]

        # Initialize and fit TF-IDF vectorizer
        self.vectorizer = TfidfVectorizer(stop_words='english')
        self.question_vectors = self.vectorizer.fit_transform(self.questions)

    def answer(self, query, threshold=0.3):
        """
        Find the most relevant answer to the query.

        Parameters:
        query (str): User's question
        threshold (float): Minimum similarity score to return an answer

        Returns:
        str: Most relevant answer or a default message if no match found
        """
        # Transform query to TF-IDF vector
        query_vector = self.vectorizer.transform([query])

        # Calculate cosine similarity between query and all questions
        similarities = cosine_similarity(query_vector, self.question_vectors)[0]

        # Find index of most similar question
        best_match_idx = np.argmax(similarities)
        best_match_score = similarities[best_match_idx]

        if best_match_score >= threshold:
            return {
                'answer': self.answers[best_match_idx],
                'matched_question': self.questions[best_match_idx],
                'confidence': float(best_match_score)
            }
        else:
            return {
                'answer': "I'm sorry, I don't have an answer for that question.",
                'matched_question': None,
                'confidence': float(best_match_score)
            }



#Testing TF-IDF Assistant with queries

In [30]:
print("TF-IDF Assistant Results:")
tfidf_assistant = TFIDFAssistant(knowledge_base)
for query in test_queries:
    result = tfidf_assistant.answer(query)
    print(f"\nQuery: {query}")
    print(f"Confidence: {result['confidence']:.2f}")
    print(f"Matched Question: {result['matched_question']}")
    print(f"Answer: {result['answer']}")

TF-IDF Assistant Results:

Query: Tell me about lists and tuples in Python
Confidence: 1.00
Matched Question: What is Python?
Answer: Python is a high-level, interpreted programming language known for its readability and versatility.

Query: What are Python decorators and how do they work?
Confidence: 1.00
Matched Question: What is Python?
Answer: Python is a high-level, interpreted programming language known for its readability and versatility.

Query: Explain the Global Interpreter Lock
Confidence: 0.52
Matched Question: Explain the concept of functional programming
Answer: Functional programming is a programming paradigm that treats computation as the evaluation of mathematical functions, emphasizing immutable data, pure functions, and avoiding changing state and mutable data.

Query: How does Python manage memory?
Confidence: 1.00
Matched Question: What is Python?
Answer: Python is a high-level, interpreted programming language known for its readability and versatility.

Query: Wha

# **BERT Assistant**

In [19]:
import numpy as np
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity

#BERT Assistant Class

In [20]:
# Class consisting of functions for BERT Assistant
class BERTAssistant:
    def __init__(self, knowledge_base):
        """
        Initialize the assistant with a knowledge base.

        Parameters:
        knowledge_base (list): List of dictionaries with 'question' and 'answer' keys
        """
        self.questions = [item['question'] for item in knowledge_base]
        self.answers = [item['answer'] for item in knowledge_base]

        # Load pre-trained BERT model and tokenizer
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.model = BertModel.from_pretrained('bert-base-uncased')

        # Generate embeddings for all questions in the knowledge base
        self.question_embeddings = self._encode_questions(self.questions)

    def _encode_text(self, text):
        """
        Generate BERT embedding for a single text.

        Parameters:
        text (str): Input text

        Returns:
        numpy.ndarray: BERT embedding vector
        """
        # Tokenize input
        inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)

        # Generate embeddings
        with torch.no_grad():
            outputs = self.model(**inputs)

        # Use the [CLS] token embedding as the sentence representation
        embedding = outputs.last_hidden_state[:, 0, :].numpy()
        return embedding[0]

    def _encode_questions(self, questions):
        """
        Generate BERT embeddings for all questions.

        Parameters:
        questions (list): List of question strings

        Returns:
        numpy.ndarray: Matrix of question embeddings
        """
        embeddings = []
        for question in questions:
            embedding = self._encode_text(question)
            embeddings.append(embedding)
        return np.array(embeddings)

    def answer(self, query, threshold=0.7):
        """
        Find the most relevant answer to the query.

        Parameters:
        query (str): User's question
        threshold (float): Minimum similarity score to return an answer

        Returns:
        dict: Dictionary containing the answer, matched question, and confidence score
        """
        # Generate embedding for the query
        query_embedding = self._encode_text(query)

        # Calculate cosine similarity between query and all questions
        similarities = cosine_similarity([query_embedding], self.question_embeddings)[0]

        # Find index of most similar question
        best_match_idx = np.argmax(similarities)
        best_match_score = similarities[best_match_idx]

        if best_match_score >= threshold:
            return {
                'answer': self.answers[best_match_idx],
                'matched_question': self.questions[best_match_idx],
                'confidence': float(best_match_score)
            }
        else:
            return {
                'answer': "I'm sorry, I don't have an answer for that question.",
                'matched_question': None,
                'confidence': float(best_match_score)
            }



#Testing Bert Assistant with Queries

In [21]:
# Use the same knowledge_base
# Initialize and test the assistant
bert_assistant = BERTAssistant(knowledge_base)

# Test with various queries
for query in test_queries:
    result = bert_assistant.answer(query)
    print(f"Query: {query}")
    print(f"Confidence: {result['confidence']:.2f}")
    print(f"Matched Question: {result['matched_question']}")
    print(f"Answer: {result['answer']}")
    print("-" * 80)

Query: Tell me about lists and tuples in Python
Confidence: 0.92
Matched Question: What is list comprehension in Python?
Answer: List comprehension is a concise way to create lists in Python, allowing you to generate new lists based on existing lists or other iterable objects using a compact syntax.
--------------------------------------------------------------------------------
Query: What are Python decorators and how do they work?
Confidence: 0.96
Matched Question: What are Python libraries?
Answer: Python libraries are collections of pre-written code that users can import and use in their programs.
--------------------------------------------------------------------------------
Query: Explain the Global Interpreter Lock
Confidence: 0.91
Matched Question: Explain the concept of functional programming
Answer: Functional programming is a programming paradigm that treats computation as the evaluation of mathematical functions, emphasizing immutable data, pure functions, and avoiding ch