In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import json

  from tqdm.autonotebook import tqdm, trange


## Base Layer

In [3]:
class QuestionProcessor:
    def preprocess(self, question):
        # Perform normalization, lemmatization, and remove stop words
        processed_question = question.lower().strip()
        return processed_question

In [4]:
class FAQMatcher:
    def __init__(self, faq_data, threshold=0.75):
        self.faq_data = faq_data
        # Use Multilingual model for Semantic Similarity
        self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
        self.threshold = threshold
        self.faq_embeddings = self._create_embeddings([item['question'] for item in faq_data])

    def _create_embeddings(self, texts):
        return self.model.encode(texts)

    def find_match(self, question):
        question_embedding = self.model.encode([question])
        similarities = cosine_similarity(question_embedding, self.faq_embeddings)[0]
        max_similarity_idx = similarities.argmax()
        max_similarity = similarities[max_similarity_idx]

        if max_similarity >= self.threshold:
            return self.faq_data[max_similarity_idx]['answer']
        return None

In [5]:
class GPTResponder:
    def __init__(self, model):
        self.model = model

    def generate_response(self, question, context=None):
        # Example call to GPT-like model
        return self.model.generate_response(question, context)

## Factory Layer

In [6]:
# Use Factory Pattern
class ResponseFactory:
    def __init__(self, faq_matcher, gpt_responder):
        self.faq_matcher = faq_matcher
        self.gpt_responder = gpt_responder

    def get_response(self, question):
        # First attempt FAQ match
        response = self.faq_matcher.find_match(question)
        if response:
            return response

        # FallBack to GPT response
        return self.gpt_responder.generate_response(question)

## Application Layer

In [7]:
class Assistant:
    def __init__(self, processor, factory):
        self.processor = processor
        self.factory = factory

    def handle_question(self, question):
        processed_question = self.processor.preprocess(question)
        response = self.factory.get_response(processed_question)
        return response

## Example Usage

In [8]:
if __name__ == "__main__":
    # Load FAQ data from file
    faq_file_path = "/content/drive/My Drive/SadGan/Preprocessed_Amazon_sagemaker_Faq.json"
    with open(faq_file_path, 'r') as file:
        faq_data = json.load(file)

    class MockGPTModel:
        def generate_response(self, question, context=None):
            return f"GPT Response for: {question}"

    question_processor = QuestionProcessor()
    faq_matcher = FAQMatcher(faq_data)
    gpt_responder = GPTResponder(MockGPTModel())

    response_factory = ResponseFactory(faq_matcher, gpt_responder)
    assistant = Assistant(question_processor, response_factory)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.12k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## Test

In [9]:
user_question = input("Enter your question (or type 'exit' to quit): ").strip()
if user_question.lower() == 'exit':
  print("Goodbye!")

response = assistant.handle_question(user_question)
print(f"Response: {response}")

Enter your question (or type 'exit' to quit): in which regions is amazon sagemaker available
Response: For a list of the supported Amazon SageMaker AWS regions, please visit the AWS Region Table for all AWS global infrastructure. Also for more information, see Regions and Endpoints in the AWS General Reference.
