In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import os
import re

import time 
import glob
import json
import shutil

from pprint import pprint
from tqdm.notebook import tqdm

In [3]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_KEY = os.getenv("PINECONE_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")

In [4]:
import openai
import pinecone
import langchain

from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI


  from tqdm.autonotebook import tqdm


In [5]:
# Collect the file
datadir = "data"
filenames = sorted(glob.glob(os.path.join(datadir, r"*.txt")))
display(filenames)
display(len(filenames))

['data/Corona-Figueroa et al. - 2022 - MedNeRF Medical Neural Radiance Fields for Recons.txt',
 'data/Fridovich-Keil et al. - 2022 - Plenoxels Radiance Fields without Neural Networks.txt',
 'data/Ge et al. - 2022 - X-CTRSNet 3D cervical vertebra CT reconstruction .txt',
 'data/Jiang et al. - 2021 - Reconstruction of 3D CT from A Single X-ray Projec.txt',
 'data/Lin et al. - 2021 - BARF Bundle-Adjusting Neural Radiance Fields.txt',
 'data/Loyen et al. - 2023 - Patient-specific three-dimensional image reconstru.txt',
 'data/Mildenhall et al. - 2020 - NeRF Representing Scenes as Neural Radiance Field.txt',
 'data/Muller et al. - 2022 - Instant neural graphics primitives with a multires.txt',
 'data/Ratul et al. - 2021 - CCX-rayNet A Class Conditioned Convolutional Neur.txt',
 'data/Shen et al. - 2019 - Harnessing the power of deep learning for volumetr.txt',
 'data/Shen et al. - 2019 - Patient-specific reconstruction of volumetric comp.txt',
 'data/Shen et al. - 2022 - Novel-view X-ray pr

20

In [6]:
# Loader
loaders = [TextLoader(filename) for filename in filenames]
docs = []
for loader in loaders:
    docs.extend(loader.load())
docs

[Document(page_content='2022 44th Annual International Conference of the IEEE Engineering in Medicine \\& Biology Society (EMBC) Scottish Event Campus, Glasgow, UK, July 11-15, 2022 \n\nMedNeRF: Medical Neural Radiance Fields for Reconstructing 3D-aware CT-Projections from a Single X-ray \n\nAbstract{\\textemdash} Computed tomography (CT) is an effective medical imaging modality, widely used in the field of clinical medicine for the diagnosis of various pathologies. Advances in Multidetector CT imaging technology have enabled additional functionalities, including generation of thin slice multiplanar cross-sectional body imaging and 3D reconstructions. However, this involves patients being exposed to a considerable dose of ionising radiation. Excessive ionising radiation can lead to deterministic and harmful effects on the body. This paper proposes a Deep Learning model that learns to reconstruct CT projections from a few or even a single-view X-ray. This is based on a novel architectur

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
texts = text_splitter.split_documents(docs)
len(texts)

875

In [8]:
embeddings = OpenAIEmbeddings(
    openai_api_key=OPENAI_API_KEY
)

pinecone.init(
    api_key=PINECONE_KEY, 
    environment=PINECONE_ENV, 
)

index_name = "document"

if index_name not in pinecone.list_indexes():
    print("Index does not exist: ", index_name)
    
index = pinecone.Index(index_name)
index.describe_index_stats()
# index.delete(deleteAll='true', namespace='')

{'dimension': 1536,
 'index_fullness': 0.014,
 'namespaces': {'': {'vector_count': 1400}},
 'total_vector_count': 1400}

In [9]:
docsearch = Pinecone.from_texts([text.page_content for text in texts], 
                                embeddings, 
                                index_name=index_name)
type(docsearch)

langchain.vectorstores.pinecone.Pinecone

In [10]:
# set up the llm model for our qa session
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)

In [11]:
# Let's set up the query 
query = "What is NeRF?"
docs = docsearch.similarity_search(query)

# Run the QA chain with your query to get the answer
chain = load_qa_chain(llm, chain_type="stuff")
response = chain.run(input_documents=docs, question=query)

In [12]:
pprint(response)

(' NeRF is a neural scene representation that combines a single-scene '
 'optimization setting with a neural scene representation capable of '
 'representing complex scenes much more efficiently than a discrete 3D voxel '
 'grid.')


In [13]:
# Helper function to process the response from the QA chain 
# and isolate result and source docs and page numbers
def parse_response(response):
    print(response['result'])
    print('\n\nSources:')
    for source_name in response["source_documents"]:
        print(source_name.metadata['source'], "page #:", source_name.metadata['page'])

# Set up the retriever on the pinecone vectorstore
# Make sure to set include_metadata = True
retriever = docsearch.as_retriever(include_metadata=True, metadata_key = 'source')

# Set up the RetrievalQA chain with the retriever
# Make sure to set return_source_documents = True
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    return_source_documents=True
)

In [14]:
# Call the QA chain to get the response
query = "How are NeRF and iNeRF different?"
response = qa_chain(query)


In [15]:
pprint(type(response))
pprint(response)

<class 'dict'>
{'query': 'How are NeRF and iNeRF different?',
 'result': ' NeRF optimizes a set of given camera poses and image observations '
           'to recover a scene or object parameterized by Θ, while iNeRF '
           'solves the inverse problem of recovering the camera pose T given '
           'the weights Θ and the image I as input.',
 'source_documents': [Document(page_content='and pixels in an observed image. In our experiments, we first study 1) how to sample rays during pose refinement for iNeRF to collect informative gradients and 2) how different batch sizes of rays affect iNeRF on a synthetic dataset. We then show that for complex real-world scenes from the LLFF dataset, iNeRF can improve NeRF by estimating the camera poses of novel images and using these images as additional training data for NeRF. Finally, we show iNeRF can perform category-level object pose estimation, including object instances not seen during training, with RGB images by inverting a NeRF model