In [1]:
from langchain_community.document_loaders import DirectoryLoader , PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
import os

In [2]:
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

In [3]:
FAISS_PATH = "/faiss"

In [4]:
path = os.getcwd()
UPLOAD_FOLDER = os.path.join(path, 'uploads')

In [5]:
print(UPLOAD_FOLDER)

c:\LangchainProjects\Multi Document Chatbot\research\uploads


In [6]:
os.chdir('..')

In [7]:
def get_document_loader():
    loader = DirectoryLoader('static', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader)
    docs = loader.load()
    return docs

In [8]:
#docs = get_document_loader()

#for i in docs:
   # print(i.metadata)

In [9]:
def get_text_chunks(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
        
    )
    chunks = text_splitter.split_documents(documents)
    return chunks

In [10]:
def get_embeddings():
    documents = get_document_loader()
    chunks = get_text_chunks(documents)
    db = FAISS.from_documents(
        chunks, OpenAIEmbeddings()
    )
    
    return db

In [11]:
def get_retriever():
    db = get_embeddings()
    retriever = db.as_retriever()
    return retriever

In [12]:
retriever = get_retriever()
llm = ChatOpenAI(model="gpt-3.5-turbo")

retriever.get_relevant_documents("Explain commision of european Communities")[0].metadata

100%|██████████| 2/2 [00:03<00:00,  1.84s/it]


{'source': 'static\\CELEX_31994L0062_EN_TXT-packaging_and_packaging_waste.pdf',
 'page': 7}

In [13]:
relevant_document = retriever.get_relevant_documents("Explain commision of european Communities")[0].metadata
print(type(relevant_document['source']))
print(relevant_document['page'])

<class 'str'>
7


In [14]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(llm = llm,
                                    chain_type="stuff",
                                    retriever = retriever,
                                    return_source_documents = True
                                    )


In [15]:
def process_llm_response(chain, question):

    llm_response = chain(question)
    print(llm_response)
    list_of_pages = []
    print('Sources:')
    for i, source in enumerate(llm_response['source_documents']):
        result = llm_response['result']
        print(source.metadata)
        source_document = source.metadata['source']
        page_number = source.metadata['page']
        list_of_pages.append(page_number)
        print(f"page {list_of_pages}")
        source_document = source_document[7:]
        
        return result, source_document, page_number

question = "explain CHROMATOGRAPHIC conditions"
process_llm_response(chain=chain, question=question)

  warn_deprecated(


{'query': 'explain CHROMATOGRAPHIC conditions', 'result': 'The chromatographic conditions outlined in the provided context detail the specific parameters and settings used for conducting gas chromatography. These conditions include information on the pre-column, column, temperature settings, gas supplies, detector specifications, and calculations. \n\nFor example:\n- The pre-column tubing is made of stainless steel, with a length of 300mm and a diameter of 3 or 6mm.\n- The column uses a stationary phase made of Hallcomid M18 on chromosorb, with specific dimensions and packing material indicated.\n- Temperature conditions for the injector, detector, and column are specified.\n- Gas supplies such as carrier gas (nitrogen) and auxiliary gas (hydrogen) pressure, flow rates, and types are provided.\n- The recommended termination of chromatography by adjusting temperature over time to eliminate interfering substances is mentioned.\n- Calculation details for coefficient of proportionality and

('The chromatographic conditions outlined in the provided context detail the specific parameters and settings used for conducting gas chromatography. These conditions include information on the pre-column, column, temperature settings, gas supplies, detector specifications, and calculations. \n\nFor example:\n- The pre-column tubing is made of stainless steel, with a length of 300mm and a diameter of 3 or 6mm.\n- The column uses a stationary phase made of Hallcomid M18 on chromosorb, with specific dimensions and packing material indicated.\n- Temperature conditions for the injector, detector, and column are specified.\n- Gas supplies such as carrier gas (nitrogen) and auxiliary gas (hydrogen) pressure, flow rates, and types are provided.\n- The recommended termination of chromatography by adjusting temperature over time to eliminate interfering substances is mentioned.\n- Calculation details for coefficient of proportionality and other factors are outlined.\n\nOverall, these chromatogr

In [26]:
llm_response = chain("explain IDENTIFICATION AND DETERMINATION OF NITROMETHANE")
pages_list = []
for i in range(len(llm_response['source_documents'])):
    pages_list.append(llm_response['source_documents'][i].metadata['page'])

In [28]:
print(pages_list)

[11, 0, 11, 13]


In [25]:
question = "explain IDENTIFICATION AND DETERMINATION OF NITROMETHANE"
result, source_document, page_number = process_llm_response(chain=chain, question=question)

{'query': 'explain IDENTIFICATION AND DETERMINATION OF NITROMETHANE', 'result': "The identification and determination of nitromethane involve a method that is suitable for products like cosmetic items that are packed in aerosol dispensers. The nitromethane content is expressed as a percentage by mass in the total aerosol dispenser content. The principle involves identifying nitromethane through a color reaction and then determining it gas chromatographically after the addition of an internal standard. The method uses specific reagents and procedures, like sodium hydroxide solution and Folin's reagent, to detect nitromethane. The calculations involve response factors and concentration measurements using gas chromatography with specific temperature settings and gas supplies.", 'source_documents': [Document(page_content="expressed inpercentage bymassofnitromethane ,inthetotalaerosoldispenser\ncontent .\n3. PRINCIPLE\nThenitromethane isidentified bycolourreaction.Nitromethane isdetermined\

In [19]:
print(result)

The chromatographic conditions described in the provided context include details about the pre-column, column, and associated parameters. 

1. **Precolumn:**
   - Tubing: stainless steel
   - Length: 300mm
   - Diameter: 3 or 6mm
   - Packing: Same material as used for the analytical column packing

2. **Column:**
   - The stationary phase is made of Hallcomid M18 on chromosorb.
   - The column must yield a resolution 'R' equal to or better than 1.5.
   - Examples of columns that meet these criteria are provided, including details such as material, length, diameter, support, sieve analysis, and stationary phase.
   - It is recommended to terminate chromatography by regulating the temperature from 90 to 150°C at a rate of 10°C per minute to eliminate substances that may interfere with subsequent measurements.

3. **Gas Chromatographic Conditions:**
   - Column: stainless steel
   - Length: 1.7m
   - Diameter: 3mm
   - Support: chromosorb - WAW, sieve analysis: 80 to 100 mesh
   - Statio

In [20]:
source_document

'CELEX_31983L0514_EN_TXT-methods_of_analysis.pdf'

In [2]:
dic = dict()
for i, value in enumerate([13, 1, 4, 5, 9, 3]):
    dic[i] = value
print(dic)

{0: 13, 1: 1, 2: 4, 3: 5, 4: 9, 5: 3}


In [10]:
def give_me_indexes():
    
    

[4, 5]
[5, 4]
