### Med RAG

In [1]:
import json
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

from langchain.vectorstores import FAISS
from openai import OpenAI

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def load_articles(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data['articles']

articles = load_articles('../../data/raw/articles.json')

In [None]:
from langchain.schema import Document

documents = [Document(page_content=article['content'], metadata=article.get('metadata', {})) for article in articles]

In [None]:
hf_model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200
    )
splits = text_splitter.split_documents(documents)

Retriever

In [None]:
vectorstore = FAISS.from_documents(
    documents=splits, 
    embedding=embedding
    )
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

KEY-BERT key word search

In [None]:
from keybert import KeyBERT

keybert_model = KeyBERT()

def extract_keywords_from_text(text, top_n=5):
    keywords = keybert_model.extract_keywords(text, keyphrase_ngram_range=(1, 1), stop_words='english', top_n=top_n)
    return [keyword[0] for keyword in keywords]

def search_using_keywords(user_query, k_candidates=3):
    keywords = extract_keywords_from_text(user_query)
    
    relevant_docs = []
    for doc in splits:
        doc_keywords = extract_keywords_from_text(doc.page_content)
        if any(keyword in doc_keywords for keyword in keywords):
            relevant_docs.append(doc)

    return relevant_docs[:k_candidates]

k_candidates = 3  

Generator

In [None]:
import openai
from openai import OpenAI

class LLMClient:
    def __init__(self, api_key, model_name="meta-llama/Llama-3.2-3B-Instruct"):
        self.client = OpenAI(
            base_url="https://api-inference.huggingface.co/v1/",
            api_key=api_key
        )
        self.model_name = model_name

    def get_response_from_model(self, context, user_query):
        prompt_text = f"""
        Answer the following question based on the context provided:
        
        Context:
        {context}
        
        Question: 
        {user_query}
        """

        messages = [{"role": "user", "content": prompt_text}]
        
        completion = self.client.chat.completions.create(
            model=self.model_name, 
            messages=messages, 
            max_tokens=500
        )

        return completion.choices[0].message.content

In [None]:
api_key = "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"  
llm_client = LLMClient(api_key)

In [None]:
def rag_system(user_query):
    retrieved_docs = search_using_keywords(user_query, k_candidates) 
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    response = llm_client.get_response_from_model(context, user_query)
    return response

In [None]:
user_query = "Why does Peterson believe that opportunities often lie in places others avoid?"
response = rag_system(user_query)

print("Response from RAG:", response)

Response from RAG: There is not enough context provided to determine the specific area or topic you are referring to. The phrase "how many rules there are" could be related to various fields such as:

- Laws and regulations
- Sports
- Traffic rules
- Social norms
- Game rules

Without more context, it's difficult to give a specific answer. If you could provide more information about the topic, I'd be happy to try and help.
