In [13]:
import ollama
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import FastEmbedEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain.vectorstores.utils import filter_complex_metadata
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank
from PyPDF2 import PdfReader
import os 

os.environ["COHERE_API_KEY"] = "<token>"

In [14]:
def ask_gemma(question):
    """
    
    Sends a question to the Ollama API and returns the response.
    """
    response = ollama.chat(
        model='gemma:2b-instruct-v1.1-q2_K',
        messages=[
            {'role': 'user', 'content': question},
        ],
    )

    return response['message']['content']

In [15]:
response_content = ask_gemma("What are Random variables?")
print(response_content)

**Random variables** are mathematical objects that represent quantities whose values are subject to uncertainty. They are typically described by probability distributions, which specify the likelihood of different outcomes occurring.

**Characteristics of Random Variables:**

* **Uncertainty:** Their values are unknown until they are measured or observed.
* **Probability distribution:** A probability distribution defines the likelihood of different outcomes occurring.
* **Randomness:** Random variables have a random nature, meaning their values are not predetermined.

**Types of Random Variables:**

* **Continuous:** Values can take any value within a certain range.
* **Discrete:** Values can only take specific integer or non-integer values.
* **Categorical:** Values can only belong to a finite set of categories.

**Examples of Random Variables:**

* Coin flip
* Temperature
* Stock price
* Test outcome

**Uses of Random Variables:**

* Statistical inference
* Probability estimation
* R

In [16]:
reader = docs = PyPDFLoader(file_path='./cs229-prob.pdf').load()
reader

[Document(page_content='Probability Theory Review for Machine Learning\nSamuel Ieong\nNovember 6, 2006\n1 Basic Concepts\nBroadly speaking, probability theory is the mathematical study of uncertainty. It plays a\ncentral role in machine learning, as the design of learning algorithms often relies on proba-\nbilistic assumption of the data. This set of notes attempts to cover some basic probability\ntheory that serves as a background for the class.\n1.1 Probability Space\nWhen we speak about probability, we often refer to the probability of an event of uncertain\nnature taking place. For example, we speak about the probability of rain next Tuesday.\nTherefore, in order to discuss probability theory formally, we must ﬁrst clarify what the\npossible events are to which we would like to attach probability.\nFormally, a probability space is deﬁned by the triple (Ω ,F, P), where\n•Ω is the space of possible outcomes (oroutcome space ),\n• F ⊆ 2Ω(the power set of Ω) is the space of (measurable

In [20]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
chunks = text_splitter.split_documents(reader)
chunks = filter_complex_metadata(chunks)

model = ChatOllama(model="gemma:2b-instruct-v1.1-q2_K")

prompt = PromptTemplate.from_template(
            """
            <s> [INST] You are a helper for question answering tasks. Use the following context to answer the question. 
            If you don't know the answer, just say you don't know. Use three sentences
             maximum and be concise in your response. [/INST] </s> 
            [INST] Question: {question} 
            Context: {context} 
            Answer: [/INST]
            """
        )

vector_store = Chroma.from_documents(documents=chunks, embedding=FastEmbedEmbeddings())

retriever = vector_store.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={
                "k": 3,
                "score_threshold": 0.5,
            },
        )

chain = ({"context": retriever, "question": RunnablePassthrough()}
                      | prompt
                      | model
                      | StrOutputParser())

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [24]:
query="What are Random variables?"

chain.invoke(query)

'Random variables are functions that map outcomes to real values, allowing probability theory to handle outcomes that are not variables.'

# Re-Ranking

In [25]:
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

compressed_docs = compression_retriever.get_relevant_documents(query)
pretty_print_docs(compressed_docs)

  warn_deprecated(


Document 1:

since each of these events are disjoint.
1.2 Random Variables
Random variables play an important role in probability theory. The most important fact
about random variables is that they are notvariables. They are actually functions that
map outcomes (in the outcome space) to real values. In terms of notation, we usually denote
random variables by a capital letter. Let’s see an example.
Example 3. Again, consider the process of throwing a dice. Let Xbe a random variable that
depends on the outcome of the throw. A natural choice for Xwould be to map the outcome
ito the value i, i.e., mapping the event of throwing an “one” to the value of 1. Note that
we could have chosen some strange mappings too. For example, we could have a random
variable Ythat maps all outcomes to 0, which would be a very boring function, or a random
variable Zthat maps the outcome ito the value of 2iifiis odd and the value of −iifiis
even, which would be quite strange indeed.
----------------------------

In [26]:
chain_reranker = ({"context": compression_retriever, "question": RunnablePassthrough()}
                      | prompt
                      | model
                      | StrOutputParser())

In [27]:
chain.invoke(query)

'Random variables are functions that map outcomes to real values, allowing probability theory to be applied to events in a discrete way.'