# Azure setup

In [19]:
import os
import openai
import sys
from openai import AzureOpenAI
from dotenv import load_dotenv

In [20]:
load_dotenv()

True

In [25]:
AZURE_KEY = os.getenv('AZURE_KEY')
AZURE_GPT4MINI_ENDPOINT = os.getenv('AZURE_GPT4MINI_ENDPOINT')
#AZURE_EMBEDINGS_ENDPOINT = os.getenv('AZURE_EMBEDINGS_ENDPOINT')
LLM_MODEL = os.getenv('LLM_MODEL')

In [11]:
client_azure = AzureOpenAI(
    api_key=AZURE_KEY,  
    #api_version="2024-02-01",
    api_version="2023-03-15-preview", #this one works for gpt-4o
    azure_endpoint = AZURE_GPT4MINI_ENDPOINT
    )

In [12]:
def get_completion_from_messages_azure(messages, 
                                 model=LLM_MODEL, 
                                 temperature=0.1, 
                                 max_tokens=4096):
    response = client_azure.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens,
    )

    return response.choices[0].message.content

In [15]:
#test call
messages =  [  
{'role':'system', 
    'content': "no context"},    
{'role':'user', 
    'content': "how are you?"},  
] 

# Read txt files

## Read all documents in the folder and stage in JSONs for review

In [16]:
import json
from pathlib import Path

In [17]:
def save_to_json(data, filename):
    json_object = json.dumps(data, indent=4)   
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(json_object)

In [18]:
from langchain.document_loaders import DirectoryLoader
from unstructured.partition.text import partition_text
from unstructured.cleaners.core import clean
from unstructured.cleaners.core import clean_non_ascii_chars
from unstructured.cleaners.core import group_broken_paragraphs
from unstructured.cleaners.core import clean_ordered_bullets
from unstructured.cleaners.core import replace_unicode_quotes
from unstructured.documents.elements import Title, NarrativeText
import docx
from unstructured.partition.docx import partition_docx

from os import listdir
from os.path import isfile, join

jsons_folder = "jsons/"

chunks = []
single_chunk = {}

elements = partition_docx(filename="kb/NLBotReview.docx", chunking_strategy="by_title", max_characters=5000, 
                          new_after_n_chars=5000, overlap_all="true", overlap=5000)
for chunk in elements:
    clean_chunk = clean(str(chunk), bullets=True, lowercase=False, 
                            extra_whitespace=True, dashes=True)
    clean_chunk = clean_non_ascii_chars(clean_chunk)
    clean_chunk = clean_ordered_bullets(clean_chunk)
    clean_chunk = group_broken_paragraphs(clean_chunk)
    clean_chunk = replace_unicode_quotes(clean_chunk)    

    #ask llm to generate possible questions
    #we will save them with the corresponding chunks
    context = clean_chunk

    delimiter = "####"
    # potential questions
    prompt = f"""Act as person who just relocated to the Netherlands and using the information from the context,
                 provide a list of possible 50 questions which you can have for this context. Only provide list of questions."
                 """
    messages =  [  
    {'role':'system', 
     'content': context},    
    {'role':'user', 
     'content': f"{delimiter}{prompt}{delimiter}"},  
    ] 
    questions = get_completion_from_messages_azure(messages).split("?\n")
    print(chunk)
    print(questions)
    print("\n" + "-" * 70 + "\n")

    single_chunk = {"content": str(clean_chunk), 
                    "questions": questions}
                    # "summary": summary}
        
    chunks.append(single_chunk)

save_to_json(chunks, jsons_folder + "Output.json")

Review NL Bot Review EPM-NL
['1. What are the best neighborhoods to live in the Netherlands', '2. How do I register my residence in the Netherlands', '3. What documents do I need to open a bank account', '4. How do I find a job in the Netherlands', '5. What is the cost of living in the Netherlands', '6. How do I get a Dutch health insurance plan', '7. What are the public transportation options available', '8. How do I apply for a Dutch residence permit', "9. What is the process for getting a Dutch driver's license", '10. Are there any language requirements for living in the Netherlands', '11. What are the best schools for expat children', '12. How do I find a good doctor or healthcare provider', '13. What are the cultural norms I should be aware of', '14. How do I set up utilities like gas, water, and electricity', '15. What is the tax system like in the Netherlands', '16. Are there any expat communities I can join', '17. How do I get a SIM card for my phone', '18. What are the best gr