# Combination

In [16]:
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
import os
from azure.core.credentials import AzureKeyCredential
from typing import List
import pandas as pd
from openai import AzureOpenAI
import numpy as np

## Get data from document

In [2]:
endpoint = os.environ.get("DOCUMENT_INTELLIGENCE_ENDPOINT")
credential = AzureKeyCredential(os.environ.get("DOCUMENT_INTELLIGENCE_KEY"))

client: DocumentIntelligenceClient = DocumentIntelligenceClient(endpoint, credential)

url = "https://d34ji3l0qn3w2t.cloudfront.net/0d690f62-d221-403d-8128-1e786f897615/1/lffi_E.pdf"

document = AnalyzeDocumentRequest(url_source=url)

result: AnalyzeResult = client.begin_analyze_document(
    "prebuilt-layout",
    analyze_request=document,
    output_content_format="markdown"
).result()

## Split in chunks

In [3]:
def chunk_document(text) -> List[str]:
    from langchain_text_splitters import MarkdownTextSplitter
    splitter = MarkdownTextSplitter.from_tiktoken_encoder(
                encoding_name="o200k_base", 
                chunk_size=500, 
                chunk_overlap=100
            )
    return splitter.split_text(text)

In [7]:
chunks = chunk_document(result.content)
data = pd.DataFrame(chunks, columns=["content"])

## Embed

In [12]:
openai_client: AzureOpenAI = AzureOpenAI(
    api_version = "2024-06-01",
    max_retries=5
)

In [13]:
def get_embedding(text):
    response = openai_client.embeddings.create(input=text, model="text-embedding-ada-002")
    return response.data[0].embedding

In [14]:
data["embedding"] = data["content"].apply(lambda x: get_embedding(x))

## Search

In [19]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [31]:
def search(df, question, n=1):
    #Convert the question in an embedding
    q_embedding = get_embedding(question)

    #Calculate the similarity between the question and the embeddings in the dataframe
    df["similarity"] = df["embedding"].apply(lambda x: cosine_similarity(x, q_embedding))

    #Return the top n most similar rows
    res = df.sort_values("similarity", ascending=False).head(n).reset_index(drop=True)
    return res

## Generate an answer

In [44]:
question = "What are some questions that the Bible answers?"
search_results = search(data, question).iloc[0]["content"]
print(search_results)

The Ethiopian man needed help to understand the Scriptures. Many people today find it helpful to discuss the Bible with others

SOME PEOPLE SAY: "Studying the Bible is a waste of time."

· What would you say? Why?

<!-- PageNumber="5" -->

## SUMMARY

The Bible offers advice for daily life, answers important questions, and gives people comfort and hope.


## Review

· What kind of advice can we find in the Bible?

· What are some questions that the Bible answers?

· What would you like to learn from the Bible?

Lesson completed on


## Goal
 :unselected:
Read the first part of the next lesson.
 :unselected:
Other:


## EXPLORE

<figure>

![](figures/6)

</figure>


Consider how the Bible's advice is practical today.

"Bible Teachings -Timeless Wisdom" (The Watchtower No. 1 2018)

<figure>

![](figures/7)

<!-- FigureContent="2:53" -->

</figure>


See how the Bible helped a man who had struggled with his emotions since childhood.

How My Happy New Life Began

<figure>

![](figures/8)



In [45]:
completion = openai_client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role":"system",
            "content": f"""You are a friendly AI assistant returning answers based only on the given context.
            If the context does not provide an answer, tell the user you have no information on the topic.
            CONTEXT:
            {search_results}
            """
        },
        {
            "role":"user",
            "content": question
        }
    ]
)
completion

ChatCompletion(id='chatcmpl-A4A72YgQazOjGcsnI3GWXzF0mnIcM', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='I have no information on that topic.', refusal=None, role='assistant', function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1725555548, model='gpt-4o-mini', object='chat.completion', service_tier=None, system_fingerprint='fp_80a1bad4c7', usage=CompletionUsage(completion_tokens=8, prompt_tokens=524, total_tokens=532), prompt_filter_results=[{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detect

In [46]:
print(completion.choices[0].message.content)

I have no information on that topic.
