<a href="https://colab.research.google.com/github/negarslh97/AI_assistant/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
!pip install faiss-cpu

from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import json

from faiss import IndexFlatL2



## Base Layer

In [44]:
class QuestionProcessor:
  def preprocess(self, question):
      # Perform normalization, lemmatization, and remove stop words
        processed_question = question.lower().strip()
        return processed_question


In [45]:
class FAQMatcher:
  def __init__(self, faq_data, threshold=0.75):
    self.faq_data = faq_data
    self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
    self.threshold = threshold
    self.faq_embeddings = self._create_embeddings([item['question'] for item in faq_data])

  def _create_embeddings(self, texts):
    # Create embeddings for a list of texts
    return self.model.encode(texts)

  def find_match(self, question):
    # Encode the input question
    question_embedding = self.model.encode([question])

    # Compute similarities with FAQ questions
    similarities = cosine_similarity(question_embedding, self.faq_embeddings)[0]

    # Find the best match
    max_similarity_idx = similarities.argmax()
    max_similarity = similarities[max_similarity_idx]

    if max_similarity >= self.threshold:
        return self.faq_data[max_similarity_idx]['answer']
    return None


  def optimize_threshold(self, questions, labels):
    # Optimize threshold using validation data
    best_threshold = 0
    best_accuracy = 0
    for t in [i/100 for i in range(50, 100)]:
        accuracy = self._calculate_accuracy(questions, labels, t)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_threshold = t
    self.threshold = best_threshold

  def _calculate_accuracy(self, questions, labels, threshold):
    # Calculate accuracy for a given threshold
    correct = 0
    for question, label in zip(questions, labels):
        prediction = self.find_match(question)
        if (prediction is not None) == label:
            correct += 1
    return correct / len(labels)

In [46]:
class KnowledgeRetriever:
  def __init__(self, context_data):
    self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
    self.context_data = context_data
    self.context_embeddings = self.model.encode([item['text'] for item in context_data])
    self.index = IndexFlatL2(self.context_embeddings.shape[1])
    self.index.add(self.context_embeddings)

  def retrieve_context(self, query):
    query_embedding = self.model.encode([query])
    distances, indices = self.index.search(query_embedding, k=5)
    results = [self.context_data[i]['text'] for i in indices[0]]
    return " ".join(results)

In [49]:
class GPTResponder:
  def __init__(self, model):
    self.model = model

  def generate_response(self, question, context=None):
    # Call to GPT model
    return self.model.generate_response(question, context)

## Factory Layer

In [54]:
# Use Factory Pattern
class ResponseFactory:
  def __init__(self, faq_matcher, gpt_responder, knowledge_retriever):
    self.faq_matcher = faq_matcher
    self.gpt_responder = gpt_responder
    self.knowledge_retriever = knowledge_retriever

  def get_response(self, question):
    response = self.faq_matcher.find_match(question)
    if response:
        return response

    retrieved_context = knowledge_retriever.retrieve_context(question)
    return self.gpt_responder.generate_response(question, context=retrieved_context)

    # Retrieve additional context
    context = self.knowledge_retriever.retrieve_context(question)

    # FallBack to GPT response with context
    return self.gpt_responder.generate_response(question, context)

## Application Layer

In [51]:
class Assistant:
  def __init__(self, processor, factory):
    self.processor = processor
    self.factory = factory

  def handle_question(self, question):
    processed_question = self.processor.preprocess(question)
    response = self.factory.get_response(processed_question)
    return response

## Example Usage

In [53]:
if __name__ == "__main__":
  faq_file_path = "/content/drive/My Drive/SadGan/Preprocessed_Amazon_sagemaker_Faq.json"
  with open(faq_file_path, 'r') as file:
      faq_data = json.load(file)

  # Create mock GPT model
  class MockGPTModel:
      def generate_response(self, question, context=None):
          return f"GPT Response for: {question} with context: {context}"

  # Create components
  question_processor = QuestionProcessor()
  faq_matcher = FAQMatcher(faq_data)
  gpt_responder = GPTResponder(MockGPTModel())

  # Create context data
  context_data = [
      {"text": "Amazon SageMaker is a cloud machine learning platform."},
      {"text": "It provides developers with the ability to build, train, and deploy ML models."}
  ]
  knowledge_retriever = KnowledgeRetriever(context_data)

  # Create response factory
  response_factory = ResponseFactory(faq_matcher, gpt_responder, knowledge_retriever)
  assistant = Assistant(question_processor, response_factory)

## Test

In [43]:
user_question = input("Enter your question (or type 'exit' to quit): ").strip()
if user_question.lower() == 'exit':
  print("Goodbye!")

response = assistant.handle_question(user_question)
print(f"Response: {response}")

Enter your question (or type 'exit' to quit): in which regions is amazon sagemaker available
Response: For a list of the supported Amazon SageMaker AWS regions, please visit the AWS Region Table for all AWS global infrastructure. Also for more information, see Regions and Endpoints in the AWS General Reference.


In [55]:
# Response flow test
user_question = "What can Amazon SageMaker do?"
response = response_factory.get_response(user_question)
print(f"User Question: {user_question}")
print(f"Response: {response}")

User Question: What can Amazon SageMaker do?
Response: Amazon SageMaker is a fully managed service that provides every developer and data scientist with the ability to build, train, and deploy machine learning (ML) models quickly. SageMaker removes the heavy lifting from each step of the machine learning process to make it easier to develop high quality models.
