### LLM Configuring

In [1]:
import google.generativeai as genai
from dotenv import load_dotenv
import os

from langchain_google_genai import GoogleGenerativeAI

In [2]:
def create_llm():
    load_dotenv()
    genai.configure(api_key = os.getenv('GOOGLE_API_KEY'))
    llm = GoogleGenerativeAI(model='gemini-pro', temperature=True, convert_system_message_to_human=True)
    
    return llm

In [3]:
llm = create_llm()

### Loaders

In [16]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders.pdf import PyPDFLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import TextLoader

In [17]:
# Define a function to create a DirectoryLoader for a specific file type
def create_directory_loader(file_type, directory_path):
    
    # Define a dictionary to map file extensions to their respective loaders
    loaders = {
        '.pdf': PyPDFLoader,
        '.txt': TextLoader,
        '.csv': CSVLoader,
    }
    
    directory_loader = DirectoryLoader(
        path=directory_path,
        glob=f"**/*{file_type}",
        loader_cls=loaders[file_type],
    )
    
    return directory_loader

In [18]:
def load_all_documents(folder_path):
    
    # Create DirectoryLoader instances for each file type
    pdf_loader = create_directory_loader('.pdf', folder_path)
    txt_loader = create_directory_loader('.txt', folder_path)
    csv_loader = create_directory_loader('.csv', folder_path)
    
    # Load the files
    pdf_documents = pdf_loader.load()
    txt_documents = txt_loader.load()
    csv_documents = csv_loader.load()
    
    return pdf_documents, txt_documents, csv_documents


In [19]:
pdf_documents, txt_documents, csv_documents = load_all_documents('Files')

### Splitters

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [21]:
# Function to split all documents all together
def split_documents(pdf_documents, txt_documents, csv_documents):
    
    # Recursive character text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300, 
        chunk_overlap=60, 
        length_function=len
    )
    
    # Colelcting the splits for all documents combined
    splits = text_splitter.split_documents(pdf_documents + txt_documents + csv_documents)
    
    return splits

In [22]:
splits = split_documents(pdf_documents, txt_documents, csv_documents)

### Embeddings

In [23]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

def create_embeddings():
    embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
    return embeddings

In [24]:
embeddings = create_embeddings()

### Vectorstore

In [25]:
from langchain.vectorstores import Chroma

In [26]:
def create_retriever(embeddings, splits):
    # Creating vector database
    db = Chroma.from_documents(documents=splits, embedding=embeddings)
    
    # Create retriever form db
    retriever = db.as_retriever()
    
    return retriever

In [27]:
retriever = create_retriever(embeddings, splits)

### Question Answering with Memory

In [30]:
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory

In [36]:
qa = RetrievalQA.from_chain_type(memory=ConversationBufferMemory(), chain_type='stuff', llm=llm, retriever=retriever, verbose=True)

In [47]:
qa.run('In the csv file, How many genders are there?')



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'I cannot provide an answer to this question as the provided text does not contain any information about a csv file or the number of genders mentioned in it.'