In [36]:
import os

base_dir = base_dir = os.getcwd()  
parent_dir = os.path.dirname(base_dir)  


file_path = os.path.join(parent_dir, "artifacts", "research_paper_yolo.pdf")
output_path = os.path.join(parent_dir, "artifacts", "images")



In [37]:
from unstructured.partition.pdf import partition_pdf
import os


chunks = partition_pdf(
    filename=file_path,
    infer_table_structure=True,            # extract tables
    strategy="hi_res",                     # mandatory to infer tables

    extract_image_block_types=["Image","Table"],   # Add 'Table' to list to extract image of tables
    image_output_dir_path= output_path,   # if None, images and tables will saved in base64

    extract_image_block_to_payload=True,   # if true, will extract base64 for API usage

    chunking_strategy="by_title",          # or 'basic'
    max_characters=5000,                  # defaults to 500
    combine_text_under_n_chars=2000,       # defaults to 0
    new_after_n_chars=4000,

    # extract_images_in_pdf=True,          # deprecated
)



In [38]:
print(chunks)

[<unstructured.documents.elements.CompositeElement object at 0x320754a40>, <unstructured.documents.elements.CompositeElement object at 0x320754260>, <unstructured.documents.elements.CompositeElement object at 0x320757fe0>, <unstructured.documents.elements.CompositeElement object at 0x320754080>, <unstructured.documents.elements.CompositeElement object at 0x320777110>, <unstructured.documents.elements.CompositeElement object at 0x31f64c290>, <unstructured.documents.elements.CompositeElement object at 0x3203cd8e0>, <unstructured.documents.elements.CompositeElement object at 0x30cfb9d60>, <unstructured.documents.elements.CompositeElement object at 0x3357d6060>, <unstructured.documents.elements.CompositeElement object at 0x320754410>, <unstructured.documents.elements.CompositeElement object at 0x31f389cd0>, <unstructured.documents.elements.CompositeElement object at 0x1744c4c20>, <unstructured.documents.elements.CompositeElement object at 0x17ccfb080>, <unstructured.documents.elements.Comp

In [39]:
from dotenv import load_dotenv
import os

# Load variables from .env
load_dotenv()

True

In [40]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


In [41]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)

In [43]:
from pinecone import ServerlessSpec

# Create a new index with chosen dimension and metric
index_name = "rag-demo"
dimension = 1536  # Dimension should match your embedding model
existing_indexes = pc.list_indexes().names()
if index_name not in existing_indexes:
    index = pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    print(f"Index '{index_name}' created!")
else:
    # Connect to existing index
    index = pc.Index(index_name)
    print(f"Index '{index_name}' already exists and is ready.")


Index 'rag-demo' already exists and is ready.


In [44]:
print(OPENAI_API_KEY)

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


In [61]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=OPENAI_API_KEY
)

for i, chunk in enumerate(chunks):
    if "CompositeElement" in str(type(chunk)):
        chunk_elements = chunk.metadata.orig_elements
        
        # Combine all elements in this chunk into a single string
        combined_content = ""
        for element in chunk_elements:
            if "Text" in str(type(element)):
                combined_content += element.text + "\n"
            elif "Table" in str(type(element)):
                combined_content += element.metadata.text_as_html + "\n"
            #elif "Image" in str(type(element)):
                # Optional: store a placeholder or base64 if needed
            #    combined_content += "[IMAGE]\n"

        # Generate a single embedding for the entire chunk
        embedded_chunk = embeddings.embed_documents([combined_content])[0]

        # Upsert into Pinecone with one ID per chunk
        chunk_id = f"chunk-{i}"
        index.upsert([{
            "id": chunk_id,
            "values": embedded_chunk,
            "metadata": {
                "chunk_id": i,
                "type": "composite",
                "content": combined_content,
                "text": combined_content
            }
        }])

In [62]:
query_text = "what is YOLO"
query_vector = embeddings.embed_query(query_text)

# Step 2: Query Pinecone index
results = index.query(
    vector=query_vector,
    top_k=5,  # number of most similar results
    include_metadata=True,
    include_values=True
)

# Step 3: Print results
for match in results['matches']:
    print(f"ID: {match['id']}")
    print(f"Content: {match['metadata'].get('content', '')}")
    print(f"Score: {match['score']}")
    print(f"Metadata: {match['metadata']}")
    print("----")

ID: chunk-12
Content: This paper gives us a review of the YOLO versions. Here we draw the following remarks. First, the YOLO version has a lot of differences. However, they still have some features in common. Hence, they are still similar. Second. The YOLO versions are still very new, have a lot of room for future research. Especially for scenario implementations.
There is still room for future improvement. This paper can focus more on the implementations comparing, such as scenario analysis. Further, the research for YOLO V1 is very limited in this paper. For example, in the trend subsection, both the figure and tabular have ignored YOLO V1. Future research can do better on this point.
This research has been partially supported by grants from the National Natural Science Foundation of China (Nos. 71774134, U1811462). This research is also supported by the Fundamental Research Funds for the Central Universities,Southwest Minzu University(Grant Number 2020NGD04,and 2018NZD02).
Author na

In [63]:
from langchain.vectorstores import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

import pinecone

index = pc.Index(index_name)
vectorstore = PineconeVectorStore(index=index, embedding=embeddings)




In [64]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

In [65]:
query = "What is YOLO"
docs = retriever.get_relevant_documents(query)
for doc in docs:
    print(doc.page_content)

This paper gives us a review of the YOLO versions. Here we draw the following remarks. First, the YOLO version has a lot of differences. However, they still have some features in common. Hence, they are still similar. Second. The YOLO versions are still very new, have a lot of room for future research. Especially for scenario implementations.
There is still room for future improvement. This paper can focus more on the implementations comparing, such as scenario analysis. Further, the research for YOLO V1 is very limited in this paper. For example, in the trend subsection, both the figure and tabular have ignored YOLO V1. Future research can do better on this point.
This research has been partially supported by grants from the National Natural Science Foundation of China (Nos. 71774134, U1811462). This research is also supported by the Fundamental Research Funds for the Central Universities,Southwest Minzu University(Grant Number 2020NGD04,and 2018NZD02).
Author name / Procedia Computer

In [69]:
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnableMap

template = """Answer the question based on the following context:

{context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt


PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based on the following context:\n\n{context}\n\nQuestion: {question}\n')

In [71]:
## LLM
from langchain.chat_models.base import init_chat_model
from langchain_core.output_parsers import StrOutputParser
llm=init_chat_model("openai:gpt-3.5-turbo")
#llm=init_chat_model("groq:")
llm
### LCEL Chain With retrieval

rag_chain=(
    RunnableMap(
        {
        "context": lambda x: retriever.invoke(x["question"]),
        "question": lambda x: x["question"],  
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

# --- 8. Run Query ---
query = {"question": "What is Yolo used for? what is yolo stand for"}
result = rag_chain.invoke(query)

print(result)

YOLO (You Only Look Once) is an algorithm used for object detection in the field of artificial intelligence. It is designed to directly output the position and category of bounding boxes through a neural network, allowing for fast detection of objects in images or videos.
