In [1]:
!pip install langchain
!pip install torch
!pip install sentence_transformers
!pip install faiss-cpu
!pip install huggingface-hub
!pip install pypdf
!pip -q install accelerate
!pip install llama-cpp-python
!pip -q install git+https://github.com/huggingface/transformers

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader

In [3]:
#load pdf files
loader = PyPDFDirectoryLoader("/content/sample_data/Data/")
data = loader.load()

In [4]:
data


[Document(page_content="None:\nNone:\nHRCT - Basic Interpretation:\nRobin Smithuis, Otto van Delden and Cornelia Schaefer-Prokop\nRadiology Department of the Rijnland Hospital, Leiderdorp and the Academical Medical Centre, Amsterdam, the Netherlands:\nPublicationdate 2006-12-24 / Update 2022-03-19 In this article a practical approach is given for the interpretation of HRCT examinations. We will discuss the following subjects: Dif\nferential diagnosis of interstitial lung diseases Introduction\nIntroduction:\nSecundary lobules. The centrilobular artery (in blue: oxygen-poor blood) and the terminal bronchiole run in the center. Lymphatics and veins (in red: oxygen-rich blood) run within t\nhe interlobular septa\nAnatomy of Secondary lobule:\nKnowledge of the lung anatomy is essential for understanding HRCT. The interpretation of interstitial lung diseases is based on the type of involvement of the secondary lobule. The \nsecondary lobule is the basic anatomic unit of pulmonary structure 

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
#splitting text into chunks
text_chunks = text_splitter.split_documents(data)


In [6]:
len(text_chunks)

3468

In [7]:
text_chunks[2]

Document(page_content='al part of the secundary lobule. It is usually the site of diseases, that enter the lung through the airways ( i.e. hypersensitivity pneumonitis, respiratory bronchiolitis, centrilo\nbular emphysema ). Perilymphatic area is the peripheral part of the secundary lobule. It is usually the site of diseases, that are located in the lymphatics of in the interlobular s\nepta ( i.e. sarcoid, lymphangitic carcinomatosis, pulmonary edema). These diseases are usually also located in the central network of lymphatics that surround the bronchovascular bu\nndle.\nBasic Interpretation:\nA structured approach to interpretation of HRCT involves the following questions: Typical UIP with honeycombing and traction bronchiectasis in a patient with idiopathic pulmonary fi\nbrosis (IPF) These morphologic findings have to be combined with the history of the patient and important clinical findings. When we study patients with HRCT, we have to realize tha', metadata={'source': '/content/s

In [8]:
#importing embeddings
embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-large")

In [9]:
#creating a vector store for text chunks in the document
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)

In [10]:
#connecting to google drive
from google.colab import drive

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
#importing Model
llm = LlamaCpp(
    streaming = True,
    model_path="/content/drive/MyDrive/models/zephyr-7b-beta.Q4_K_M.gguf",
    temperature=0.75,
    top_p=1,
    verbose=True,
    n_ctx=4096
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 
Model metadata: {'tokenizer.ggml.padding_token_id': '2', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '32768', 'general.name': 'huggingfaceh4_zephyr-7b-beta', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '14336', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '8', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}


In [13]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(search_kwargs={"k": 2}))

In [14]:
query = "What is the radiological presentation of Glioma?"

In [15]:
result = qa.invoke(query)

In [16]:
result['result']

' The radiological presentation of glioma varies based on several factors such as location, grade, and histology. However, in general, gliomas are often of low signal intensity on T1WI and high signal intensity on T2WI. They may also have contrast enhancement and demonstrate restricted diffusion on DWI. Radiological evaluation of gliomas involves assessment of the tissue characteristics like calcifications, fat, cystic components, and signal intensity on various sequences to differentiate them from other conditions that can simulate tumors such as abscesses, multiple sclerosis plaques, vascular malformations, aneurysms, or infarcts with luxury perfusion. The incidence of CNS tumors is approximately one-third metastatic lesions, one-third gliomas, and one-third of non-glial origin.'

In [17]:
pip install --upgrade gradio



In [18]:
import gradio as gr
def qa_invoke(query):
    answer = qa.invoke(query)['result']
    return answer

iface = gr.Interface(
    fn=qa_invoke,
    inputs=gr.Textbox(),
    outputs=gr.Textbox(),

    title="Radiology Q & A",
    description="Ask a question"
)

iface.launch()



Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://00fcb436afb8280d20.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


