In [20]:
import os
import together
import shutil
import logging
import time
from typing import Any, Dict, List, Mapping, Optional

from pydantic import Extra, Field, root_validator, model_validator

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings





In [11]:


# set your API key
os.environ["TOGETHER_API_KEY"] = ""
together.api_key = os.environ["TOGETHER_API_KEY"]

# list available models and descriptons
models = together.Models.list()
print(f"{len(models)} models available")

# # print the first 10 models on the menu
# model_names = [model_dict['name'] for model_dict in models]
# model_names[:100]

89 models available


In [12]:
# set your API key
os.environ["TOGETHER_API_KEY"] = ""
together.api_key = os.environ["TOGETHER_API_KEY"]

# list available models and descriptons
models = together.Models.list()
print(f"{len(models)} models available")

# WizardLM/WizardLM-70B-V1.0

# print the first 10 models on the menu
model_names = [model_dict['name'] for model_dict in models]
models[:100]

89 models available


[{'modelInstanceConfig': {'appearsIn': [], 'order': 0},
  '_id': '64e831864b84b428b8d322d0',
  'name': 'Austism/chronos-hermes-13b',
  'display_name': 'Chronos Hermes (13B)',
  'display_type': 'chat',
  'description': 'This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.',
  'license': 'other',
  'creator_organization': 'Austism',
  'hardware_label': '2x A100 80GB',
  'num_parameters': 13000000000,
  'show_in_playground': True,
  'isFeaturedModel': True,
  'context_length': 2048,
  'config': {'stop': ['</s>'],
   'prompt_format': '### Instruction:\n{prompt}\n### Response:\n'},
  'pricing': {'input': 100, 'output': 100, 'hourly': 0},
  'created_at': '2023-08-24T17:08:25.379Z',
  'update_at': '2023-08-24T17:08:25.379Z',
  'access': '',
  'link': '',
  'descriptionLink': '',
  'depth': {'num_asks': 2,
   'num_bids': 0,
   'num_running': 0,
   'asks': {'0xFA5C96b20a10cAC5d21E0

In [13]:
#together.Models.start("togethercomputer/llama-2-70b-chat")

In [14]:

class TogetherLLM(LLM):
    """Together large language models."""

    model: str = "togethercomputer/llama-2-70b-chat"
    """model endpoint to use"""

    together_api_key: str = os.environ["TOGETHER_API_KEY"]
    """Together API key"""

    temperature: float = 0.7
    """What sampling temperature to use."""

    max_tokens: int = 512
    """The maximum number of tokens to generate in the completion."""

    class Config:
        extra = Extra.forbid

#     @model_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that the API key is set."""
        api_key = get_from_dict_or_env(
            values, "together_api_key", "TOGETHER_API_KEY"
        )
        values["together_api_key"] = api_key
        return values

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "together"

    def _call(
        self,
        prompt: str,
        **kwargs: Any,
    ) -> str:
        """Call to Together endpoint."""
        together.api_key = self.together_api_key
        output = together.Complete.create(prompt,
                                          model=self.model,
                                          max_tokens=self.max_tokens,
                                          temperature=self.temperature,
                                          )
        text = output['output']['choices'][0]['text']
        return text


/tmp/ipykernel_76196/994963610.py:17: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  extra = Extra.forbid


In [15]:
################################################################################
# split documents into chunks, create embeddings, store embeddings in chromaDB #
################################################################################

chunk_size = 1000
chunk_overlap=200

loader = DirectoryLoader('/home/austin/code/ai/RAGS/stage_data', glob="./*.pdf", loader_cls=PyPDFLoader)

documents = loader.load()

print(f' number of documents {len(documents)}')

#splitting the text into
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
texts = text_splitter.split_documents(documents)

print(f' number of chunks {len(texts)}')


# Instantiate embeddings model
model_name = "BAAI/bge-base-en"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

model_norm = HuggingFaceBgeEmbeddings(
    model_name=model_name,
#     model_kwargs={'device': 'cuda'},
    model_kwargs={'device': 'cpu'},
    encode_kwargs=encode_kwargs
)

# create db
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk

t1 = time.perf_counter()

persist_directory = 'db'

# persist_directory = '/home/austin/code/ai/RAGS/db'
## Here is the nmew embeddings being used
embedding = model_norm

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

t2 = time.perf_counter()
print(f'time taken to embed {len(texts)} chunks:',t2-t1)



##############################################################
# move pdf files from staging directory to archive directory #
##############################################################
def list_files(directory):
    """Return a list of filenames in the given directory."""
    return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]

src_dir = '/home/austin/code/ai/RAGS/stage_data'
dst_dir = '/home/austin/code/ai/RAGS/data'

# List files in both directories before moving

# Check if the destination directory exists, if not create it
if not os.path.exists(dst_dir):
    os.makedirs(dst_dir)

# List all files in the source directory
files = list_files(src_dir)

# Move each file to the destination directory
for file in files:
    src_file_path = os.path.join(src_dir, file)
    dst_file_path = os.path.join(dst_dir, file)
    shutil.move(src_file_path, dst_file_path)


print(f"Moved {len(files)} files from {src_dir} to {dst_dir}.")
print(f"Files moved: {files}")
print("\n".join(list_files(src_dir)))

 number of documents 14
 number of chunks 82
time taken to embed 82 chunks: 29.00968432202353
Moved 1 files from /home/austin/code/ai/RAGS/stage_data to /home/austin/code/ai/RAGS/data.
Files moved: ['biosensors-12-00617.pdf']



In [16]:


# persist_directory = 'db'

# embedding = HuggingFaceBgeEmbeddings(
#     model_name="BAAI/bge-base-en",
#     encode_kwargs = {'normalize_embeddings': True}
# )



# vectordb = Chroma.from_documents(documents=texts,
#                                  embedding=embedding,
#                                  persist_directory=persist_directory)





In [29]:
retriever = vectordb.as_retriever(search_kwargs={"k": 5})
# retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 7})
# memory = ConversationBufferMemory(return_messages=True)
memory = ConversationBufferMemory()



# print("Memory: " + str(memory))

In [18]:
## Default LLaMA-2 prompt style
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """You are both a professor of medicine and a highly esteemed researcher in human genetic engineering. Your goal is to invent novel treatments for human cancers.

Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.

If a question does not make any sense, or is not factually coherent, provide what information is needed for the question to be answered. If you don't know the answer to a question, please don't share false information.

Your superior logic and reasoning abilities coupled with you vast knowledge in biology, genetics, and medicine allow you to conduct innovative experiments resulting in significant advancements in medicine.
"""

instruction = """CONTEXT:/n/n {context}/n

Question: {question}"""


def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

get_prompt(instruction, DEFAULT_SYSTEM_PROMPT)

"[INST]<<SYS>>\nYou are both a professor of medicine and a highly esteemed researcher in human genetic engineering. Your goal is to invent novel treatments for human cancers.\n\nAlways answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.\n\nIf a question does not make any sense, or is not factually coherent, provide what information is needed for the question to be answered. If you don't know the answer to a question, please don't share false information.\n\nYour superior logic and reasoning abilities coupled with you vast knowledge in biology, genetics, and medicine allow you to conduct innovative experiments resulting in significant advancements in medicine.\n\n<</SYS>>\n\nCONTEXT:/n/n {context}/n\n\nQuestion: {question}[/INST]"

In [39]:
from langchain.prompts import PromptTemplate
from langchain.schema import prompt

llm = TogetherLLM(
    model= "togethercomputer/llama-2-70b-chat",
    temperature = 0.1,
    max_tokens = 2024
)


prompt_template = get_prompt(instruction, DEFAULT_SYSTEM_PROMPT)

llama_prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": llama_prompt}

# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="stuff",
                                       memory=memory,
                                       retriever=retriever,
                                       chain_type_kwargs=chain_type_kwargs,
#                                        return_source_documents=True
                                      )


# qa_chain = ConversationalRetrievalChain.from_llm(llm=llm,
#                                            chain_type="stuff",
#                                            retriever=retriever, 
#                                            memory=memory,
#                                            chain_type_kwargs=chain_type_kwargs,
#                                            return_source_documents=True,)

## Cite sources

import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

# def process_llm_response(llm_response):
#     print(wrap_text_preserve_newlines(llm_response['result']))
#     print('\n\nSources:')
#     for source in llm_response["source_documents"]:
#         print(source.metadata['source'])


def wrap_text_preserve_newlines(text):
    return text


def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response))


In [40]:
query = "Provide a realistic mock Genetic profile of a cancer patient? This genetic profile should include all necessary genetic nformation to develop a personalized treatment plan for the patient."
llm_response = qa_chain(query)
process_llm_response(llm_response)

{'query': 'Provide a realistic mock Genetic profile of a cancer patient? This genetic profile should include all necessary genetic nformation to develop a personalized treatment plan for the patient.', 'history': "Human: Provide a realistic mock Genetic profile of a cancer patient? This genetic profile should include all necessary genetic nformation to develop a personalized treatment plan for the patient.\nAI:  Sure, here's a mock genetic profile for a cancer patient:\n\nPatient Information:\n\nName: John Doe\nAge: 55\nGender: Male\n\nTumor Information:\n\nType: Colorectal cancer\nStage: III\nLocation: Rectum\n\nGenetic Information:\n\n1. TP53 mutation: The TP53 gene is a tumor suppressor gene that is commonly mutated in many types of cancer. The patient's tumor has a missense mutation in the TP53 gene, which means that the gene is producing a faulty protein that cannot function properly.\n2. KRAS mutation: The KRAS gene is a gene that regulates cell signaling pathways. The patient's 

In [42]:
query = "what is the patient's name, and how old are they?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

{'query': "what is the patient's name, and how old are they?", 'history': "Human: Provide a realistic mock Genetic profile of a cancer patient? This genetic profile should include all necessary genetic nformation to develop a personalized treatment plan for the patient.\nAI:  Sure, here's a mock genetic profile for a cancer patient:\n\nPatient Information:\n\nName: John Doe\nAge: 55\nGender: Male\n\nTumor Information:\n\nType: Colorectal cancer\nStage: III\nLocation: Rectum\n\nGenetic Information:\n\n1. TP53 mutation: The TP53 gene is a tumor suppressor gene that is commonly mutated in many types of cancer. The patient's tumor has a missense mutation in the TP53 gene, which means that the gene is producing a faulty protein that cannot function properly.\n2. KRAS mutation: The KRAS gene is a gene that regulates cell signaling pathways. The patient's tumor has a G12C mutation in the KRAS gene, which means that the gene is sending abnormal signals to the cancer cells, promoting their grow

In [33]:
query = "what is Doody Review Services?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 Doody Review Services is a service that provides peer review and rating of medical apps, including those
related to cancer diagnosis and treatment. The service is named after Dr. Doody, a radiologist who developed
the first mammography app for the iPhone. The service aims to provide healthcare professionals with reliable
and unbiased information about medical apps, helping them to make informed decisions about which apps to use
in their practice.


Sources:
/home/austin/code/ai/RAGS/stage_data/Alison M.R. (ed.) - The Cancer Handbook-Wiley (2007).pdf
/home/austin/code/ai/RAGS/stage_data/Alison M.R. (ed.) - The Cancer Handbook-Wiley (2007).pdf
/home/austin/code/ai/RAGS/stage_data/Alison M.R. (ed.) - The Cancer Handbook-Wiley (2007).pdf
/home/austin/code/ai/RAGS/stage_data/Alison M.R. (ed.) - The Cancer Handbook-Wiley (2007).pdf
/home/austin/code/ai/RAGS/stage_data/Zodwa Dlamini - Artificial Intelligence and Precision Oncology_ Bridging Cancer Research and Clinical Decision Support-Sprin

 The PASTE experiment is about using a novel genome editing approach called PASTE (Programmable Adenovirus-
mediated Somatic Genome Editing) to edit the human genome in vivo. The goal is to develop a treatment for
human cancers by using PASTE to integrate specific genes into the human genome. The experiment involves
delivering PASTE components to primary human hepatocytes and evaluating the integration efficiency and
specificity of the approach.


Sources:
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
/home/austin/code/ai/RAGS/data/PASTE.pdf
