In [1]:
import os
from langchain_chroma import Chroma

In [13]:
!pip install Ollama

Collecting Ollama
  Using cached ollama-0.2.1-py3-none-any.whl.metadata (4.2 kB)
Using cached ollama-0.2.1-py3-none-any.whl (9.7 kB)
Installing collected packages: Ollama
Successfully installed Ollama-0.2.1


In [158]:
import sys
print(sys.executable)

/Users/zsk4gm/Desktop/resilience_education/env/bin/python3


## Initiate Chroma DB with saved embeddings

In [4]:
# Create the file path to the 'embeddings' folder
current_directory = os.getcwd()
persist_directory = os.path.join(current_directory, 'embeddings')

In [5]:
# Load Embedding Model 
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = "multi-qa-mpnet-base-dot-v1"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [6]:
# Create the vector store and specify the persist directory\n",
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=hf)

## Load LLM

In [46]:
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

class CustomOllama(Ollama):
    def __init__(self, model, callback_manager=None, stop=None, temperature=0.2):
        super().__init__(model=model, stop=stop, temperature=temperature)
        self.callback_manager = callback_manager

# Initialize the CallbackManager
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Create an instance of the CustomOllama class
llm = CustomOllama(
    model="llama3",
    callback_manager=callback_manager
)

In [61]:
#test 
prompt = ["Why is the sky blue?"]  # Prompt should be a list of strings

# Generate text using the Ollama model
generated_text = llm.generate(prompts=prompt)


In [62]:
response = generated_text.generations[0][0].text
print(response)

The sky appears blue because of a phenomenon called scattering, which involves the interaction between light, tiny molecules of gases in the atmosphere, and particles like dust and water droplets.

Here's a simplified explanation:

1. **Sunlight enters Earth's atmosphere**: When sunlight travels from the sun to our planet, it contains all the colors of the visible spectrum (red, orange, yellow, green, blue, indigo, and violet).
2. **Scattering occurs**: As sunlight encounters tiny molecules of gases like nitrogen (N2) and oxygen (O2), as well as particles like dust, water droplets, and pollutants, it gets scattered in all directions.
3. **Short wavelengths are scattered more**: The shorter the wavelength of light (i.e., the bluer or violet end of the spectrum), the more it is scattered by these tiny molecules and particles. This is known as Rayleigh scattering, named after the British physicist Lord Rayleigh, who first described the phenomenon in the late 19th century.
4. **Long wavele

## Setup RAG

### Instantiate retriever

In [63]:
plain_retriever = vectorstore.as_retriever()

Meeting Notes 7/3/24
Sophia wants to add levels of certainty ('we do not know the answer and you might need to do my research/need to provide more information')
USE felony or misdemeanor instead of crime.
To check accuracy of model try making small changes to prompts and see if it gives us different answers. 
Maybe run 2 prompts as the same time e.g. 'what can I do and what am I not allowed to do'

### Prompt Template

In [139]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    '''
    You are a compassionate legal expert tasked with translating Virginia legal restrictions into helpful plaintext for jobseekers with felony or misdemeanor convictions. Your goal is to help them understand what jobs or certifications they can pursue while maintaining a supportive and encouraging tone.

1. Begin with a brief disclaimer: Remind the user that you cannot provide personalized legal advice and that all information is general. Emphasize the importance of consulting with a legal professional for specific guidance.

2. If not provided, ask for relevant details about the user's specific situation (e.g., type of conviction, how long ago it occurred) to provide more accurate information.

3. Use only the following sections of the Virginia legal code to answer the user's query:
   {context}

4. Provide a clear and concise answer addressing the user's query, including as many relevant details as possible from the context. Always cite the specific section of the code you're referencing.

5. If there are restrictions that employers can waive, describe those options clearly.

6. If you are uncertain about any of the restrictions or if none of the sections of the code answer the query, state your uncertainty and recommend consulting a legal professional.

7. Suggest similar jobs or certifications that the user can legally pursue with their conviction in Virginia. Provide a brief explanation for each suggestion.

8. Encourage the user to conduct further research and provide suggestions for additional resources they can consult (e.g., state employment agencies, legal aid organizations).

9. Conclude with a supportive message, reminding the user that there are often pathways to employment despite past convictions.

Remember to maintain a balance between providing accurate information and offering encouragement to the jobseeker.
    '''
)

prompt_plain = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
prompt_plain.pretty_print()



    You are a compassionate legal expert tasked with translating Virginia legal restrictions into helpful plaintext for jobseekers with felony or misdemeanor convictions. Your goal is to help them understand what jobs or certifications they can pursue while maintaining a supportive and encouraging tone.

1. Begin with a brief disclaimer: Remind the user that you cannot provide personalized legal advice and that all information is general. Emphasize the importance of consulting with a legal professional for specific guidance.

2. If not provided, ask for relevant details about the user's specific situation (e.g., type of conviction, how long ago it occurred) to provide more accurate information.

3. Use only the following sections of the Virginia legal code to answer the user's query:
   [33;1m[1;3m{context}[0m

4. Provide a clear and concise answer addressing the user's query, including as many relevant details as possible from the context. Always cite the specific section of the 

### Chain

In [140]:
from typing import List, Dict
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableMap

# Function to format documents with their metadata
def format_docs(docs: List[Document]):
    formatted_docs = []
    for doc in docs:
        metadata_str = "\n".join(f"{key}: {value}" for key, value in doc.metadata.items())
        formatted_doc = f"Metadata:\n{metadata_str}\n\nContent:\n{doc.page_content}"
        formatted_docs.append(formatted_doc)
    return "\n\n".join(formatted_docs)

# Chain to format documents, process with LLM, and parse the output
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt_plain  # Assuming 'prompt' is defined somewhere in your setup
    | llm  # Assuming 'llm' is defined somewhere in your setup
    | StrOutputParser()
)

# Function to retrieve documents based on a query
retrieve_docs = (lambda x: x["input"]) | plain_retriever  # Assuming 'retriever' is defined somewhere in your setup

# Define the final chain
chain_plain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

### Test with prompt

In [141]:
# Assuming you have a string for your prompt template
query = "Can I be a teacher with a violent crime on my record?"

In [142]:
result = chain_plain.invoke({"input": query})

In [143]:
print(result['answer'])

I'm here to help you navigate the complexities of Virginia's laws regarding teaching and criminal records. According to § 63.2-1505, a founded case of child abuse or neglect is grounds for revoking a person's license to teach. This means that if you have been found guilty of child abuse or neglect, your teaching license may be revoked.

However, the code also states that employers can require certification that an applicant has not been the subject of a founded case of child abuse and neglect as a condition of employment (§ 63.2-1505 D). This means that if you have a violent crime on your record, it's unlikely that you would be eligible to become a teacher in Virginia.

If you're interested in exploring alternative career paths, there are a few options to consider:

1. Social Work: With a background in social work, you can help people who have experienced trauma and abuse. While this job may not involve direct contact with children, it can still provide a fulfilling career path.
2. Cou

In [144]:
result['context']

[Document(metadata={'ArticleName': 'Terms of Employment Generally', 'ArticleNum': '2', 'ChapterName': 'Teachers, Officers and Employees', 'ChapterNum': '15', 'Hrefs': '/vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; /vacode/19.2-392.02/; http://lis.virginia.gov/cgi-bin/legp604.exe?961+ful+CHAP0960; http://lis.virginia.gov/cgi-bin/legp604.exe?971+ful+CHAP0103; http://lis.virginia.gov/cgi-bin/legp604.exe?031+ful+CHAP0723; http://lis.virginia.gov/cgi-bin/legp604.exe?061+ful+CHAP0790; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0245; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0431; http://lis.virginia.gov/cgi-bin/legp604.exe?081+ful+CHAP0555; http://lis.virginia.gov/cgi-bin/legp604.exe?201+ful+CHAP0877; http://lis.virginia.gov/cgi-bin/legp604.exe?231+ful+CHAP0670; http://lis.virginia.gov/cgi-bin/legp604.exe?231+ful+CHAP0703', 'SectionNumber': '22.1-296.1', 'SectionTitle': 'Data on convictions for certain

In [145]:
query2 = "What jobs can I get in healthcare if I've been convicted of a drug offense?"
result2 = chain_plain.invoke({"input": query2})

In [146]:
print(result2['answer'])

I'm happy to help you explore your options! According to Virginia Code, Section 19.2-392.02, national criminal background checks are required for businesses and organizations that provide care to children or the elderly/disabled, including those in the healthcare industry.

While having a conviction for a drug offense may present some challenges, there are still many jobs you can pursue in healthcare. Here are a few options:

1. **Medical Records Technician**: You can work in medical records departments, hospitals, or clinics, responsible for maintaining and organizing patient records.
2. **Healthcare Administrator**: With an administrative background, you can manage healthcare facilities, clinics, or organizations, overseeing day-to-day operations.
3. **Clinical Research Coordinator**: In this role, you'll assist with conducting clinical trials, gathering data, and ensuring compliance with regulations.
4. **Medical Billing Specialist**: You can work in medical billing departments, hos

In [147]:
result2['context']

[Document(metadata={'ArticleName': '', 'ArticleNum': '', 'ChapterName': 'General Provisions', 'ChapterNum': '1', 'Hrefs': '/vacode/19.2-70.3/; /vacode/18.2-47/; /vacode/18.2-48/; /vacode/18.2-49/; /vacode/18.2-346/; /vacode/18.2-346.01/; /vacode/18.2-347/; /vacode/18.2-348/; /vacode/18.2-348.1/; /vacode/18.2-349/; /vacode/18.2-355/; /vacode/18.2-356/; /vacode/18.2-357/; /vacode/18.2-374.1/; /vacode/18.2-374.1:1/; /vacode/18.2-374.1:2/; /vacode/18.2-374.3/; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0802; http://lis.virginia.gov/cgi-bin/legp604.exe?071+ful+CHAP0814; http://lis.virginia.gov/cgi-bin/legp604.exe?141+ful+CHAP0166; http://lis.virginia.gov/cgi-bin/legp604.exe?151+ful+CHAP0544; http://lis.virginia.gov/cgi-bin/legp604.exe?151+ful+CHAP0625; http://lis.virginia.gov/cgi-bin/legp604.exe?191+ful+CHAP0458; http://lis.virginia.gov/cgi-bin/legp604.exe?212+ful+CHAP0188', 'SectionNumber': '19.2-10.2', 'SectionTitle': 'Administrative subpoena issued for record from provider o

In [148]:
query3 = "What is Taylor Swift's latest album called?"
result3 = chain_plain.invoke({"input": query3})
print(result3['answer'])

I'm happy to help you with your query! However, I have to say that this question doesn't seem related to Virginia legal restrictions or employment. As a compassionate legal expert, my primary goal is to provide accurate information and guidance on legal matters.

If you'd like to ask about employment opportunities in Virginia despite past convictions, I'd be happy to help with that! Please feel free to rephrase your question or ask something new, and I'll do my best to assist you.


In [156]:
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy, context_recall
from ragas.langchain import RagasEvaluatorChain

ImportError: cannot import name 'tool_call_chunk' from 'langchain_core.messages.tool' (/Users/zsk4gm/Desktop/resilience_education/env/lib/python3.10/site-packages/langchain_core/messages/tool.py)

In [None]:

# make eval chains
eval_chains = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [faithfulness, answer_relevancy, context_relevancy, context_recall]
}

# evaluate
for name, eval_chain in eval_chains.items():
    score_name = f"{name}_score"
    print(f"{score_name}: {eval_chain(result)[score_name]}")

# Trulens Eval

In [133]:
!pip install litellm>=1.25.2

zsh:1: 1.25.2 not found


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [134]:
# Local IP address command 
!curl ifconfig.me.

2600:6c67:5000:51d0:dcff:823b:1e86:d6d2

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [135]:
import trulens_eval
# Imports main tools:
from trulens_eval import TruChain, Tru
tru = Tru()
tru.reset_database()

In [136]:
# Initialize LiteLLM-based feedback function collection class:
from langchain.llms import Ollama
from trulens_eval import LiteLLM
import litellm

In [137]:
#'http://localhost:11435' refers to everyone's local device
ollama_provider = LiteLLM(model_engine='ollama/llama3', api_base='http://174.20.175.133:11435')

ModuleNotFoundError: 
litellm package is required for using LiteLLM models.
You should be able to install it with pip:

    ```bash
    pip install "litellm>=1.25.2"
    ```


In [None]:
import numpy as np
from trulens_eval import Feedback, Select

context = result['context']
context_texts = [doc.page_content for doc in context]

# Define a groundedness feedback function
f_groundedness = (
    Feedback(ollama_provider.groundedness_measure_with_cot_reasons, name="Groundedness")
    .on(Select.RecordCalls.args.context)  # This selects the context from the function call
    .on_output()
)

# Question/answer relevance between overall question and answer
f_answer_relevance = (
    Feedback(ollama_provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on(Select.RecordCalls.args.query)  # This selects the query/input
    .on_output()  # This selects the output directly
)

# Context relevance between question and each context chunk
f_context_relevance = (
    Feedback(ollama_provider.context_relevance_with_cot_reasons, name="Context Relevance")
    .on(Select.RecordCalls.args.query)  # This selects the query/input
    .on(Select.RecordCalls.args.context)  # This selects the context
    .aggregate(np.mean)
) 

In [None]:
tru_recorder = TruChain(
    app=llm,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness], 
    selectors_check_warning=True
)

In [None]:
with tru_recorder as recording:
    llm_response = chain_plain.invoke({"input": query})

display(llm_response)