In [None]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [33]:
llm = Ollama(model="mistral",  callbacks=CallbackManager([StreamingStdOutCallbackHandler()]),num_gpu=1, base_url="http://localhost:11434")

In [34]:
llm("Hello world")

```python
print("Hello World!")
```

'```python\nprint("Hello World!")\n```'

In [35]:
from langchain.embeddings import HuggingFaceEmbeddings


modelPath = "BAAI/bge-base-en-v1.5"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cuda:0'}
encode_kwargs = {'normalize_embeddings': True}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embedding = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader

loader = DirectoryLoader('mytext', glob='*.txt', loader_cls=TextLoader)
documents = loader.load()


In [37]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

In [38]:
len(texts)
import torch
torch.cuda.empty_cache()
print(torch.cuda.memory_summary(device=None, abbreviated=False))
import gc
torch.cuda.empty_cache()
gc.collect()

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 11           |        cudaMalloc retries: 11        |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      | 660537 KiB |    872 MiB | 158678 MiB | 158033 MiB |
|       from large pool | 659840 KiB |    871 MiB | 156571 MiB | 155927 MiB |
|       from small pool |    697 KiB |      2 MiB |   2106 MiB |   2105 MiB |
|---------------------------------------------------------------------------|
| Active memory         | 660537 KiB |    872 MiB | 158678 MiB | 158033 MiB |
|       from large pool | 659840 KiB |    871 MiB | 156571 MiB | 155927 MiB |
|       from small pool |    697 KiB |      2 MiB |   2106 MiB |   2105 MiB |
|---------------------------------------------------------------

938

In [39]:
from langchain.vectorstores import Chroma
persist_directory = './db'
vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)
vectordb.persist()

In [40]:
import torch
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
dev

'cuda:0'

In [41]:
retriever = vectordb.as_retriever(search_kwargs={'k':5})

In [42]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  retriever=retriever, 
                                  return_source_documents=True)
     

In [43]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [44]:
query = "Who won the Piston cup?"
llm_response = qa_chain(query)
process_llm_response(llm_response)


[Fanfare]
Ladies and gentlemen,
for the first time
in Piston Cup history...
[rewing] A rookie
has won the Piston Cup.
Yes!
[Bob]... we have a three-way tie.
[Crowd cheers]
[Cameras flash]
[Chuckling] Hey,
that must be really embarrassing.
But I wouldn't worry about it.
Because I didn't do it!

Based on the context given:

* The Piston Cup is being contested by three drivers: The King, Chick Hicks, and Lightning McQueen.
* There will be a tiebreaker race between these three drivers to determine the winner of the Piston Cup.
* The country has almost shut down in anticipation of this race.
* Tickets to the race are hotter than a black leather seat on a hot summer day.
* There is a crowd of nearly 200,000 cars at the Los Angeles International Speedway for this race.
* Darrell is also present at the race and is excited about it.

It is not specified in the context who won the Piston Cup, but it is mentioned that a rookie has won it for the first time.
[Fanfare]
Ladies and gentlemen,
for th