In [16]:
# Environment setup
from dotenv import load_dotenv
import os
import warnings
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
warnings.filterwarnings("ignore")
load_dotenv()

True

In [17]:
import faiss

from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

from langchain_text_splitters import MarkdownHeaderTextSplitter

from langchain_ollama import ChatOllama, OllamaEmbeddings

from docling.document_converter import DocumentConverter

### Convert pdf document to markdown

In [18]:
def load_and_convert_document(file_path):
    converter = DocumentConverter()
    result = converter.convert(file_path)
    return result.document.export_to_markdown()

source = "./data/input/document_1.pdf"
markdown_content = load_and_convert_document(source)

print(markdown_content)

<!-- image -->

THlE ARAB OIL CUTBACK AND HIGHER PRICES: .IMPLICATIONS AND REACTEONS

Secret

## Secret

19 October

1973

## A. The Oil Weapon and  Its  Effects

On 17 October  the Organization. f Arab  Petroleum Exporting o Countries  (OAPEC) decided to cut  oil  production.

- -Production  will  be reduced by  not less  than 5% a month until  an  Israeli ithdrawal from , ccupied territories w o is completed and  the "legal  rights"  of the Palestinians e ar restored;
- -The Arab countries  lso  promised  to  maintain il eliveries a o d to "friendly"  ountries hat ive  Arabs "effective  aterial c t g m help"; nd a
- --Threatened a  total mbargo  of countries hat  used their e t armed  forces  to  aid Israel.

Many  parts f  the  statement ere left eliberately  gue in  brder  to o w d va allow  each Arab  country  a degree  of  freedom to  act  according o ts  wn t i o best  interests.  is  ambiguity Th -similar o that f  OPEC t o decisions  n  the i past -is  intended o  give  the AP

### Split markdown into chunks to feed the database

In [19]:
def get_markdown_splits(markdown_content):
    headers_to_split_on = [("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")]
    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
    return markdown_splitter.split_text(markdown_content)


chunks = get_markdown_splits(markdown_content)

print(chunks[13].page_content)

## ANNEX


### making embeddings and vector store settings

In [20]:
def setup_vector_store(chunks):
    embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")
    single_vector = embeddings.embed_query("this is some text data")
    index = faiss.IndexFlatL2(len(single_vector))
    vector_store = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=InMemoryDocstore(),
        index_to_docstore_id={}
    )
    vector_store.add_documents(documents=chunks)
    return vector_store

In [21]:
vector_store = setup_vector_store(chunks)
# Setup retriever
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={'k': 3})

In [22]:
vector_store.index.ntotal, len(chunks)

(16, 16)

In [23]:
docs = retriever.invoke('What were 5 effects of the arabic oil cutback and higher prices?')

### Format documents for RAG

In [24]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

content = format_docs(docs)

print(content)

<!-- image -->  
THlE ARAB OIL CUTBACK AND HIGHER PRICES: .IMPLICATIONS AND REACTEONS  
Secret

## The  Effects of a 5% Cutback Over Time  
The following  graphs  show  the cumulative ffects  f  a five ercent 5 e o p all-Arab  production utback on  the United States,  estern Europe,  and c W Japan under different  onditions. c  
-  
3

## B; The  Impact of Increased  Oil Prices  
The increase  n oil  prices ill  be felt rimarily  n the  United  States, i w p i Japan,  and Western Europe.  The oil  import  bills  f  the  United  States  nd o a Japan will  each increase y about  S3  billion, ile the  additional st o b wh co t Western Europe will approach  S8  billion.  is  assumes  that eliveries om Th d fr the Gulf  will  continue as  scheduled  prior  to the announcement  of production imitations. l  
- -The oil  price ise ill urn n expected   I billion  r  so  1974 r w t a S o US  trade  surplus nto  a roughly $2 billion  eficit i d
- The  Canadian trade  balance  will  be virtually  

### RAG chain setup

In [25]:
def create_rag_chain(retriever):
    prompt = """
        You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
        If you don't know the answer, just say that you don't know.
        Answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
        ### Question: {question} 
        
        ### Context: {context} 
        
        ### Answer:
    """
    model = ChatOllama(model="deepseek-r1:8b", base_url="http://localhost:11434")
    prompt_template = ChatPromptTemplate.from_template(prompt)

    chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt_template
        | model
        | StrOutputParser()
    )
    return chain

### Piecing together everything

In [26]:
# One-time process

# Load document
source = "./data/input/document_1.pdf"
markdown_content = load_and_convert_document(source)
chunks = get_markdown_splits(markdown_content)

# Create vector store
vector_store = setup_vector_store(chunks)

# Setup retriever
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={'k': 3})

# Create RAG chain
rag_chain = create_rag_chain(retriever)

### Retrieval

In [27]:
#Questions here

question = "What were 5 effects of the arabic oil cutback and higher prices?"

print(f"Question: {question}")
for chunk in rag_chain.stream(question):
    print(chunk, end="", flush=True)
print("\n" + "-" * 50 + "\n")

Question: What were 5 effects of the arabic oil cutback and higher prices?
<think>
Okay, I need to figure out the five effects of the Arab oil cutback and higher prices based on the given context. Let me read through the context carefully.

First, the context talks about a 5% production cutback by Arab countries and how that affects oil import bills for the US, Japan, and Western Europe. It mentions specific increases in billions for each: the US around $3 billion trade surplus turning into a deficit, Japan's surplus going down from $6 to $3 billion, West Germans paying an extra $1.8 billion but still having a significant trade surplus, UK's trade deficit increasing from $3.5 to almost $5 billion, and France and Italy seeing their deficits also increase.

So the first effect is that oil import bills for these countries rise significantly. That seems straightforward.

Next, it mentions that some impacts of higher oil prices will be offset by increased repatriation of oil company profits