In [2]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [3]:
llm = Ollama(model="mistral",  callbacks=CallbackManager([StreamingStdOutCallbackHandler()]))

In [62]:
import google.generativeai as palm
import os
import dotenv
dotenv.load_dotenv()
from langchain.llms import GooglePalm

google_api_key=os.environ.get('PALM')
palm.configure(api_key=google_api_key)

prompt = 'Explain the difference between effective and affective with examples'

llm = GooglePalm(google_api_key=google_api_key)


In [44]:
from langchain.embeddings import HuggingFaceEmbeddings


modelPath = "BAAI/bge-base-en-v1.5"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cuda:0'}
encode_kwargs = {'normalize_embeddings': True}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embedding = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [35]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader

loader = DirectoryLoader('mytext', glob='*.txt', loader_cls=TextLoader)
documents = loader.load()


In [49]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [54]:
len(texts)
import torch
torch.cuda.empty_cache()
vectordb = None
print(torch.cuda.memory_summary(device=None, abbreviated=False))
import gc
torch.cuda.empty_cache()
gc.collect()


|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 6            |        cudaMalloc retries: 6         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   1390 MiB |   1404 MiB | 109024 MiB | 107633 MiB |
|       from large pool |   1387 MiB |   1401 MiB | 107266 MiB | 105878 MiB |
|       from small pool |      2 MiB |      3 MiB |   1757 MiB |   1755 MiB |
|---------------------------------------------------------------------------|
| Active memory         |   1390 MiB |   1404 MiB | 109024 MiB | 107633 MiB |
|       from large pool |   1387 MiB |   1401 MiB | 107266 MiB | 105878 MiB |
|       from small pool |      2 MiB |      3 MiB |   1757 MiB |   1755 MiB |
|---------------------------------------------------------------

12071

In [58]:
from langchain.vectorstores import Chroma
persist_directory = './db'
vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)
vectordb.persist()


In [59]:
import torch
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
dev

'cuda:0'

In [60]:
retriever = vectordb.as_retriever(search_kwargs={'k':5})

In [63]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  retriever=retriever, 
                                  return_source_documents=True)
     

In [64]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [70]:
query = "What happened after bees won the lawsuit?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

Bee work camps were shut down, the bees got back their honey, and the glorification of honey was ended.


Sources:
mytext/bee-movie.txt
mytext/bee-movie.txt
mytext/bee-movie.txt
mytext/bee-movie.txt
mytext/bee-movie.txt
