In [None]:
%pwd

In [None]:
import os
os.chdir('../')
%pwd

In [None]:
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                    glob='*.pdf',
                    loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents
    

In [None]:
extracted_data = load_pdf_file(data = 'Data/')

In [None]:
#extracted_data

In [None]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [None]:
text_chunks = text_split(extracted_data)
len(text_chunks)

In [None]:
#text_chunks

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

In [None]:
def download_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [None]:
embeddings = download_huggingface_embeddings()

In [None]:
query_vector = embeddings.embed_query("hello World")
print("length",len(query_vector))
print(query_vector)

In [None]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
#OPENAI_API_KEY

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import  ServerlessSpec

index_name = 'medicalbot'
pc = Pinecone(api_key=PINECONE_API_KEY)
pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [None]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
)

In [None]:
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [None]:
#docsearch

In [None]:
retreiver = docsearch.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [None]:
retrieved_doc = retreiver.invoke("what is Acne?")

In [None]:
retrieved_doc

In [None]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4,max_tokens=500)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompts = (
    "You are an assistant for Q/A tasks."
    "Use the following peace of retreived for answers"
    "the questions,If you don't know the answer,say that"
    "you don't know,use 3 sentence fmaximum"
    "and keep the naswer concise."
    "\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
    ('system',system_prompts),
    ('human',"{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retreiver,question_answer_chain)

In [None]:
import time
from openai import RateLimitError

max_retries = 5
retry_delay = 2  # seconds

for i in range(max_retries):
    try:
        response = rag_chain.invoke({"input": "what is Acne?"})
        break
    except RateLimitError as e:
        print(f"Rate limited. Retry {i + 1}/{max_retries} in {retry_delay} seconds.")
        time.sleep(retry_delay)
        retry_delay *= 2  # exponential backoff


# Usage
response
