In [1]:
import os
from langchain_chroma import Chroma

In [13]:
!pip install Ollama

Collecting Ollama
  Using cached ollama-0.2.1-py3-none-any.whl.metadata (4.2 kB)
Using cached ollama-0.2.1-py3-none-any.whl (9.7 kB)
Installing collected packages: Ollama
Successfully installed Ollama-0.2.1


In [2]:
import sys
print(sys.executable)

/Users/zsk4gm/Desktop/resilience_education/env/bin/python3


## Initiate Chroma DB with saved embeddings

In [4]:
# Create the file path to the 'embeddings' folder
current_directory = os.getcwd()
persist_directory = os.path.join(current_directory, 'embeddings')

In [5]:
# Load Embedding Model 
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = "multi-qa-mpnet-base-dot-v1"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [6]:
# Create the vector store and specify the persist directory\n",
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=hf)

## Load LLM

In [46]:
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

class CustomOllama(Ollama):
    def __init__(self, model, callback_manager=None, stop=None, temperature=0.2):
        super().__init__(model=model, stop=stop, temperature=temperature)
        self.callback_manager = callback_manager

# Initialize the CallbackManager
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Create an instance of the CustomOllama class
llm = CustomOllama(
    model="llama3",
    callback_manager=callback_manager
)

In [61]:
#test 
prompt = ["Why is the sky blue?"]  # Prompt should be a list of strings

# Generate text using the Ollama model
generated_text = llm.generate(prompts=prompt)


In [62]:
response = generated_text.generations[0][0].text
print(response)

The sky appears blue because of a phenomenon called scattering, which involves the interaction between light, tiny molecules of gases in the atmosphere, and particles like dust and water droplets.

Here's a simplified explanation:

1. **Sunlight enters Earth's atmosphere**: When sunlight travels from the sun to our planet, it contains all the colors of the visible spectrum (red, orange, yellow, green, blue, indigo, and violet).
2. **Scattering occurs**: As sunlight encounters tiny molecules of gases like nitrogen (N2) and oxygen (O2), as well as particles like dust, water droplets, and pollutants, it gets scattered in all directions.
3. **Short wavelengths are scattered more**: The shorter the wavelength of light (i.e., the bluer or violet end of the spectrum), the more it is scattered by these tiny molecules and particles. This is known as Rayleigh scattering, named after the British physicist Lord Rayleigh, who first described the phenomenon in the late 19th century.
4. **Long wavele

## Setup RAG

### Instantiate retriever

In [63]:
plain_retriever = vectorstore.as_retriever()

Meeting Notes 7/3/24
Sophia wants to add levels of certainty ('we do not know the answer and you might need to do my research/need to provide more information')
USE felony or misdemeanor instead of crime.
To check accuracy of model try making small changes to prompts and see if it gives us different answers. 
Maybe run 2 prompts as the same time e.g. 'what can I do and what am I not allowed to do'

### Prompt Template

In [112]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You're a legal expert tasked with translating legal restrictions into helpful plaintext for a jobseeker with a felony or misdemeanor who is trying to understand what jobs or certfications they can pursue."
    "You are given text from the Virigina legal code that contains details about the restrictions."
    "\n\nUse only the following sections of the law code to answer the user's query directly. Provide the source from the context: "
    "{context}."
    "Ensure that your answer addresses the user's query. Add as many details as you can from the context."
    "If there are restrictions that employers can waive, please describe that." 
    "If you are uncertain about any of the restrictions or if none of the sections of the code answer the query, say that you are unsure and it would be best for the jobseeker to consult a legal professional"
    "Lastly, if there are similar similar jobs or certifications that the user can legally pursue with their conviction in Virginia, please describe those at the end of your response"
    )

prompt_plain = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
prompt_plain.pretty_print()


You're a legal expert tasked with translating legal restrictions into helpful plaintext for a jobseeker with a felony or misdemeanor who is trying to understand what jobs or certfications they can pursue.You are given text from the Virigina legal code that contains details about the restrictions.

Use only the following sections of the law code to answer the user's query directly. Provide the source from the context: [33;1m[1;3m{context}[0m.Ensure that your answer addresses the user's query. Add as many details as you can from the context.If there are restrictions that employers can waive, please describe that.If you are uncertain about any of the restrictions or if none of the sections of the code answer the query, say that you are unsure and it would be best for the jobseeker to consult a legal professionalLastly, if there are similar similar jobs or certifications that the user can legally pursue with their conviction in Virginia, please describe those at the end of your respons

### Chain

In [75]:
from typing import List, Dict
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableMap

# Function to format documents with their metadata
def format_docs(docs: List[Document]):
    formatted_docs = []
    for doc in docs:
        metadata_str = "\n".join(f"{key}: {value}" for key, value in doc.metadata.items())
        formatted_doc = f"Metadata:\n{metadata_str}\n\nContent:\n{doc.page_content}"
        formatted_docs.append(formatted_doc)
    return "\n\n".join(formatted_docs)

# Chain to format documents, process with LLM, and parse the output
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt_plain  # Assuming 'prompt' is defined somewhere in your setup
    | llm  # Assuming 'llm' is defined somewhere in your setup
    | StrOutputParser()
)

# Function to retrieve documents based on a query
retrieve_docs = (lambda x: x["input"]) | plain_retriever  # Assuming 'retriever' is defined somewhere in your setup

# Define the final chain
chain_plain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

### Test with prompt

In [79]:
# Assuming you have a string for your prompt template
query = "Can I be a teacher with a violent crime on my record?"

In [113]:
result = chain_plain.invoke({"input": query})

In [114]:
print(result['answer'])

Based on the provided code section 22.1-307, it appears that teachers may be dismissed for various reasons, including incompetency, immorality, noncompliance with school laws and regulations, disability, conviction of a felony or a crime of moral turpitude, or other good and just cause.

Regarding your query about having a violent crime on your record, the code section does mention that a teacher shall be dismissed if they are convicted of certain crimes, such as rape of a child. However, it also states that in the case of a felony conviction, the individual must have had their civil rights restored by the Governor before being eligible for employment.

It seems that having a violent crime on your record may not necessarily preclude you from becoming a teacher, but rather, you would need to have your civil rights restored and possibly provide additional information or certification to ensure compliance with school regulations. It is essential to note that the specific circumstances sur

In [118]:
result['context']

[Document(metadata={'ArticleName': 'Terms of Employment Generally', 'ArticleNum': '2', 'ChapterName': 'Teachers, Officers and Employees', 'ChapterNum': '15', 'Hrefs': '/vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; http://lis.virginia.gov/cgi-bin/legp604.exe?961+ful+CHAP0960; http://lis.virginia.gov/cgi-bin/legp604.exe?971+ful+CHAP0103; http://lis.virginia.gov/cgi-bin/legp604.exe?031+ful+CHAP0723; http://lis.virginia.gov/cgi-bin/legp604.exe?061+ful+CHAP0790; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0245; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0431; http://lis.virginia.gov/cgi-bin/legp604.exe?081+ful+CHAP0555; http://lis.virginia.gov/cgi-bin/legp604.exe?201+ful+CHAP0877; http://lis.virginia.gov/cgi-bin/legp604.exe?231+ful+CHAP0670; http://lis.virginia.gov/cgi-bin/legp604.exe?231+ful+CHAP0703', 'SectionNumber': '22.1-296.1', 'SectionTitle': 'Data on convictions for certain

In [115]:
query2 = "What jobs can I get in healthcare if I've been convicted of a drug offense?"
result2 = chain_plain.invoke({"input": query2})

In [116]:
print(result2['answer'])

As a legal expert, I can help you understand how this section of the Virginia Code may impact your job prospects in healthcare.

Section 19.2-392.02 of the Virginia Criminal Procedure Act requires businesses and organizations that provide care to children or vulnerable populations (such as the elderly or disabled) to conduct national criminal background checks on employees or volunteers who will be working with these populations. The law specifies certain types of offenses that may disqualify an individual from being hired or volunteering in these roles.

In your case, if you've been convicted of a drug offense, it's likely that this section of the code will apply to your job search in healthcare. Specifically, subsection (ii) refers to violations of § 18.2-89 et seq., which includes offenses related to controlled substances and drugs.

While there may be some flexibility in how these requirements are applied, employers may be hesitant to hire or retain individuals with drug-related co

In [117]:
result2['context']

[Document(metadata={'ArticleName': '', 'ArticleNum': '', 'ChapterName': 'General Provisions', 'ChapterNum': '1', 'Hrefs': '/vacode/19.2-70.3/; /vacode/18.2-47/; /vacode/18.2-48/; /vacode/18.2-49/; /vacode/18.2-346/; /vacode/18.2-346.01/; /vacode/18.2-347/; /vacode/18.2-348/; /vacode/18.2-348.1/; /vacode/18.2-349/; /vacode/18.2-355/; /vacode/18.2-356/; /vacode/18.2-357/; /vacode/18.2-374.1/; /vacode/18.2-374.1:1/; /vacode/18.2-374.1:2/; /vacode/18.2-374.3/; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0802; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0814; http://lis.virginia.gov/cgi-bin/legp604.exe?141+ful+CHAP0166; http://lis.virginia.gov/cgi-bin/legp604.exe?151+ful+CHAP0544; http://lis.virginia.gov/cgi-bin/legp604.exe?151+ful+CHAP0625; http://lis.virginia.gov/cgi-bin/legp604.exe?191+ful+CHAP0458; http://lis.virginia.gov/cgi-bin/legp604.exe?212+ful+CHAP0188', 'SectionNumber': '19.2-10.2', 'SectionTitle': 'Administrative subpoena issued for record from provider o

# Trulens Eval

In [132]:
!pip install litellm>=1.25.2

zsh:1: 1.25.2 not found


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [128]:
# Local IP address command 
!curl ifconfig.me.

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


2600:6c67:5000:51d0:dcff:823b:1e86:d6d2

In [129]:
import trulens_eval
# Imports main tools:
from trulens_eval import TruChain, Tru
tru = Tru()
tru.reset_database()

In [130]:
# Initialize LiteLLM-based feedback function collection class:
from langchain.llms import Ollama
from trulens_eval import LiteLLM
import litellm

In [131]:
#'http://localhost:11435' refers to everyone's local device
ollama_provider = LiteLLM(model_engine='ollama/llama3', api_base='http://174.20.175.133:11435')

ModuleNotFoundError: 
litellm package is required for using LiteLLM models.
You should be able to install it with pip:

    ```bash
    pip install "litellm>=1.25.2"
    ```


In [None]:
import numpy as np
from trulens_eval import Feedback, Select

context = result['context']
context_texts = [doc.page_content for doc in context]

# Define a groundedness feedback function
f_groundedness = (
    Feedback(ollama_provider.groundedness_measure_with_cot_reasons, name="Groundedness")
    .on(Select.RecordCalls.args.context)  # This selects the context from the function call
    .on_output()
)

# Question/answer relevance between overall question and answer
f_answer_relevance = (
    Feedback(ollama_provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on(Select.RecordCalls.args.query)  # This selects the query/input
    .on_output()  # This selects the output directly
)

# Context relevance between question and each context chunk
f_context_relevance = (
    Feedback(ollama_provider.context_relevance_with_cot_reasons, name="Context Relevance")
    .on(Select.RecordCalls.args.query)  # This selects the query/input
    .on(Select.RecordCalls.args.context)  # This selects the context
    .aggregate(np.mean)
) 

In [None]:
tru_recorder = TruChain(
    app=llm,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness], 
    selectors_check_warning=True
)

In [None]:
with tru_recorder as recording:
    llm_response = chain_plain.invoke({"input": query})

display(llm_response)