In [9]:
import os
import together

import logging
from typing import Any, Dict, List, Mapping, Optional

from pydantic import Extra, Field, root_validator, model_validator

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

os.environ["TOGETHER_API_KEY"] = ""

In [5]:


# set your API key
together.api_key = os.environ["TOGETHER_API_KEY"]

# list available models and descriptons
models = together.Models.list()
print(f"{len(models)} models available")

# print the first 10 models on the menu
model_names = [model_dict['name'] for model_dict in models]
model_names[:100]

89 models available


['Austism/chronos-hermes-13b',
 'EleutherAI/llemma_7b',
 'EleutherAI/pythia-12b-v0',
 'EleutherAI/pythia-1b-v0',
 'EleutherAI/pythia-2.8b-v0',
 'EleutherAI/pythia-6.9b',
 'Gryphe/MythoMax-L2-13b',
 'HuggingFaceH4/starchat-alpha',
 'NousResearch/Nous-Hermes-13b',
 'NousResearch/Nous-Hermes-Llama2-13b',
 'NousResearch/Nous-Hermes-Llama2-70b',
 'NousResearch/Nous-Hermes-llama-2-7b',
 'NumbersStation/nsql-llama-2-7B',
 'Open-Orca/Mistral-7B-OpenOrca',
 'OpenAssistant/llama2-70b-oasst-sft-v10',
 'OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5',
 'OpenAssistant/stablelm-7b-sft-v7-epoch-3',
 'Phind/Phind-CodeLlama-34B-Python-v1',
 'Phind/Phind-CodeLlama-34B-v2',
 'SG161222/Realistic_Vision_V3.0_VAE',
 'WizardLM/WizardCoder-15B-V1.0',
 'WizardLM/WizardCoder-Python-34B-V1.0',
 'WizardLM/WizardLM-70B-V1.0',
 'bigcode/starcoder',
 'databricks/dolly-v2-12b',
 'databricks/dolly-v2-3b',
 'databricks/dolly-v2-7b',
 'defog/sqlcoder',
 'garage-bAInd/Platypus2-70B-instruct',
 'huggyllama/llama-13b',
 'h

In [10]:
#together.Models.start("togethercomputer/llama-2-70b-chat")

In [12]:

class TogetherLLM(LLM):
    """Together large language models."""

    model: str = "togethercomputer/llama-2-70b-chat"
    """model endpoint to use"""

    together_api_key: str = os.environ["TOGETHER_API_KEY"]
    """Together API key"""

    temperature: float = 0.7
    """What sampling temperature to use."""

    max_tokens: int = 512
    """The maximum number of tokens to generate in the completion."""

    class Config:
        extra = Extra.forbid

#     @model_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that the API key is set."""
        api_key = get_from_dict_or_env(
            values, "together_api_key", "TOGETHER_API_KEY"
        )
        values["together_api_key"] = api_key
        return values

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "together"

    def _call(
        self,
        prompt: str,
        **kwargs: Any,
    ) -> str:
        """Call to Together endpoint."""
        together.api_key = self.together_api_key
        output = together.Complete.create(prompt,
                                          model=self.model,
                                          max_tokens=self.max_tokens,
                                          temperature=self.temperature,
                                          )
        text = output['output']['choices'][0]['text']
        return text


/tmp/ipykernel_38518/994963610.py:17: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  extra = Extra.forbid


In [16]:
loader = DirectoryLoader('/home/austin/code/ai/RAGS/data', glob="./*.pdf", loader_cls=PyPDFLoader)

documents = loader.load()

In [17]:
len(documents)

57

In [18]:
#splitting the text into
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

len(texts)

178

In [20]:
texts[3]

Document(page_content='N.R., L.Z., and C.A.V . synthesized synthetic guides and advised on synthetic RNA experiments. K.H., J.A.W, A.P.K, and A.E.Z. \nsynthesized synthetic guides and advised on synthetic RNA experiments. H.M., J.X., and G.G. produced AA V and adenovirus. S.K.D., \nY .M., and D.R.R. provided primary human hepatocytes and advice for in vivo  experiments with humanized mouse models. L.F. and \nG.B. provided humanized-liver mice, managed in vivo  injections and harvests, and advised on the in vivo  aspects of the project. \nO.O.A. and J.S.G. wrote the manuscript with help from all authors. M.Y ., E.I.I., C.S., and R.N.K. contributed equally and have the \nright to list their name first in their CV .\nCode Availability:  Code to predict atgRNA efficiency and support information are available at https://github.com/abugoot-lab/\natgRNA_rank .\nCompeting interests:  O.O.A. and J.S.G. are co-inventors on patent applications filed by MIT relating to work in this manuscript.', m

In [24]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-base-en"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

model_norm = HuggingFaceBgeEmbeddings(
    model_name=model_name,
#     model_kwargs={'device': 'cuda'},
    model_kwargs={'device': 'cpu'},
    encode_kwargs=encode_kwargs
)


In [25]:
# create db (may need a gpu here)

# %%time
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
import time
t1 = time.perf_counter()

persist_directory = 'db'

# persist_directory = '/home/austin/code/ai/RAGS/db'
## Here is the nmew embeddings being used
embedding = model_norm

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

t2 = time.perf_counter()
print(f'time taken to run embed ${len(texts)} chunks:',t2-t1)


time taken to run: 45.213992424993194


In [26]:
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

In [27]:
## Default LLaMA-2 prompt style
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

In [28]:
sys_prompt = """You are both a professor of medicine and a highly esteemed researcher in human genetic engineering. Your goal is to invent novel treatments for human cancers.

Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.

If a question does not make any sense, or is not factually coherent, provide what information is needed for the question to be answered. If you don't know the answer to a question, please don't share false information.

Your superior logic and reasoning abilities coupled with you vast knowledge in biology, genetics, and medicine allow you to conduct innovative experiments resulting in significant advancements in medicine.
"""

instruction = """CONTEXT:/n/n {context}/n

Question: {question}"""
get_prompt(instruction, sys_prompt)

"[INST]<<SYS>>\nYou are both a professor of medicine and a highly esteemed researcher in human genetic engineering. Your goal is to invent novel treatments for human cancers.\n\nAlways answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.\n\nIf a question does not make any sense, or is not factually coherent, provide what information is needed for the question to be answered. If you don't know the answer to a question, please don't share false information.\n\nYour superior logic and reasoning abilities coupled with you vast knowledge in biology, genetics, and medicine allow you to conduct innovative experiments resulting in significant advancements in medicine.\n\n<</SYS>>\n\nCONTEXT:/n/n {context}/n\n\nQuestion: {question}[/INST]"

In [29]:
llm = TogetherLLM(
    model= "togethercomputer/llama-2-70b-chat",
    temperature = 0.1,
    max_tokens = 2024
)

In [30]:
from langchain.prompts import PromptTemplate
prompt_template = get_prompt(instruction, sys_prompt)

llama_prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [31]:
chain_type_kwargs = {"prompt": llama_prompt}

In [32]:
from langchain.schema import prompt
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="stuff",
                                       retriever=retriever,
                                       chain_type_kwargs=chain_type_kwargs,
                                       return_source_documents=True)



In [33]:
## Cite sources

import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [34]:
query = "what is the paste experiment about?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 The PASTE experiment is about using a novel genome editing approach called PASTE (Programmable Adenovirus-
mediated Somatic Genome Editing) to edit the human genome in vivo. The goal is to develop a treatment for
human cancers by using PASTE to integrate specific genes into the human genome. The experiment involves
delivering PASTE components to primary human hepatocytes and evaluating the integration efficiency and
specificity of the approach.


Sources:
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
