In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.docstore import InMemoryDocstore
from langchain_community.vectorstores import FAISS, InMemoryVectorStore
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
import faiss
from sentence_transformers import SentenceTransformer
from typing import Optional, TypedDict
import torch

from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
from deepseek_vl.utils.io import load_pil_images


PDF_STORE = r"/Users/revathsankar/Documents/Adv Big AI/Midterm/Pub1564webNew-74666420.pdf"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader = PyPDFLoader(PDF_STORE,
                     mode="page",)
                    #  images_inner_format='markdown-img',
                    #  images_parser=RapidOCRBlobParser())
docs = loader.load()
docs

[Document(metadata={'producer': 'Adobe PDF Library 10.0.1', 'creator': 'Adobe InDesign CS6 (Windows)', 'creationdate': '2014-09-04T10:08:27+02:00', 'author': 'IAEA', 'keywords': 'Diagnostic Radiology Physics: A Handbook for Teachers and Students', 'moddate': '2015-03-11T11:20:36+01:00', 'subject': 'STI/PUB/1564 Diagnostic Radiology Physics', 'title': 'Diagnostic Radiology Physics: A Handbook for Teachers and Students', 'trapped': '/False', 'source': '/Users/revathsankar/Documents/Adv Big AI/Midterm/Pub1564webNew-74666420.pdf', 'total_pages': 710, 'page': 0, 'page_label': 'I'}, page_content='@\nDiagnostic\nRadiology \nPhysics\nA Handbook for  \nTeachers and  \nStudents\nD.R. Dance\nS. Christofides\nA.D.A. Maidment\nI.D. McLean\nK.H. Ng\nTechnical Editors\nDiagnostic Radiology Physics\nA Handbook for Teachers and Students\n1\nInternational Atomic Energy Agency\nVienna\nISBN 978–92–0–131010–1\nThis publication provides a comprehensive review of topics relevant to \ndiagnostic radiology ph

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=200,
    add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
print(f"{len(all_splits)} chunks generated")

4717 chunks generated


In [5]:
model_path = "deepseek-ai/deepseek-vl-7b-chat"
embed_model = "BAAI/bge-large-en-v1.5"
model = SentenceTransformer(embed_model)

# Compute text embeddings
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=embed_model,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

#NOTE: Using FAISS Model
index = faiss.IndexFlatL2(len(embeddings.embed_query("what is the weather?")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)


doc_ids = vector_store.add_documents(documents=all_splits)

In [6]:
vector_store.save_local('radiology_faiss_index')

In [None]:
def retrieve_context(query):
    vs = FAISS.load_local('radiology_faiss_index', embeddings=embeddings, allow_dangerous_deserialization=True)
    docs = vs.similarity_search_with_score(query)
    return docs

: 

In [None]:
# specify the path to the model
vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
tokenizer = vl_chat_processor.tokenizer

vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
vl_gpt = vl_gpt.to(torch.bfloat16).cpu().eval()

class IMGQAPrompts(TypedDict):
    def get_initial_analysis(img_path):
        initial_analysis = [
            {
                "role": "User",
                "content": "<image_placeholder>Describe each stage of this image.",
                "images": [f"{img_path}"]
            },
            {
                "role": "Assistant",
                "content": ""
            }
        ]
        return initial_analysis
    def extend_analysis(query:str, context:str, img_path:str):
        further_analysis = ([
            {
                "role": "User",
                "content": f"<image_placeholder>Reference this image when responding to user questions as well as the context provided, look at no other sources. Question: {query}\n\nContext: {context}",
                "images": [f"{img_path}"]
            },
            {
                "role": "Assistant",
                "content": ""
            }
        ])
        return further_analysis



def analyze_image(conversation:str):

    # load images and prepare for inputs
    pil_images = load_pil_images(conversation)
    prepare_inputs = vl_chat_processor(
        conversations=conversation,
        images=pil_images,
        force_batchify=True
    ).to(vl_gpt.device)

    # run image encoder to get the image embeddings
    inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)

    # run the model to get the response
    outputs = vl_gpt.language_model.generate(
        inputs_embeds=inputs_embeds,
        attention_mask=prepare_inputs.attention_mask,
        pad_token_id=tokenizer.eos_token_id,
        bos_token_id=tokenizer.bos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=512,
        do_sample=False,
        use_cache=True
    )

    answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
    return (f"{prepare_inputs['sft_format'][0]}", answer)
