# Setup

Setup Azure Services and generate or use data in the data folders to train and test any ML models or filling up Cognitive Search Index etc...

## Prerequisites
1. Please use this specific package version for Azure OpenAI `pip install openai`.

2. Please use this specific package version for Document Intelligence `pip install azure-ai-formrecognizer==3.3.0`

2. All training and testing data is store in /DOCAI/data folder

- > ! pip install azure-search-documents==11.4.0b6
- > ! pip install azure-ai-formrecognizer==3.3.0
- > ! pip install azure.identity
- > ! pip install openai

## Load all the AOAI API keys and model parameters

In [1]:
import os
import aoai

MY_AOAI_ENDPOINT = os.environ['OPENAI_API_ENDPOINT']
MY_AOAI_KEY = os.environ['OPENAI_API_KEY']
MY_AOAI_VERSION = os.environ['OPENAI_API_VERSION']
MY_GPT_ENGINE = os.environ['OPENAI_API_ENGINE']
MY_AOAI_EMBEDDING_ENGINE = 'text-embedding-3-small'

status, client = aoai.setupOpenai(
                        aoai_endpoint=MY_AOAI_ENDPOINT,
                        aoai_api_key=MY_AOAI_KEY,
                        aoai_version=MY_AOAI_VERSION
                 )
if status == True:
    print("AOAI setup succeeded")
else:
    print("AOAI setup failed")


Got OPENAI API Key from environment variable
AOAI setup succeeded


# Test AOAI

In [6]:
theMessage = "The US Presidential Election is in 2024"
my_prompt = [
              {
                "role": "user", 
                "content": f"Classify the following news headline into 1 of the following categories: \
                            Business, Tech, Politics, Sport, Entertainment, Other\
                            \n\nHeadline 1: Donna Steffensen Is Cooking Up a New Kind of Perfection. The Internet's most beloved cooking guru has a \
                            buzzy new book and a fresh new perspective\
                            \nCategory: Entertainment\
                            \n\nHeadline 2: Major Retailer Announces Plans to Close Over 100 Stores\
                            \nCategory: Business\
                            \n\nHeadline 3: {theMessage}\
                            \nCategory:"
                }
              ]      
tokens_used, finish_reason, classified_category = aoai.getChatCompletion(
                                                    the_client=client,
                                                    the_model=MY_GPT_ENGINE, 
                                                    the_messages=my_prompt)
print(f"Tokens: {tokens_used}")
print(f"Finish Reason: {finish_reason}")
print(f"Category: {classified_category}")


Tokens: 116
Finish Reason: stop
Category: Politics


# Test AOAI Embedding APIs

In [9]:
query_dutch = "tools voor softwareontwikkeling"  
  
dutch_vector=aoai.generate_embedding(
                the_client=client,
                the_model=MY_AOAI_EMBEDDING_ENGINE,
                the_text=query_dutch
       )
print(f'VECTOR:\n{dutch_vector}')

VECTOR:
[-0.00852495152503252, 0.02800823375582695, 0.04272773116827011, -0.057317160069942474, -0.009863580577075481, -0.05207103490829468, -0.00022406451171264052, 0.028636900708079338, -0.013299575075507164, 0.016952352598309517, 0.026750897988677025, -0.023260707035660744, -0.01173874456435442, -0.06529473513364792, 0.03171520680189133, 0.019705483689904213, 0.005511682014912367, -0.007424783427268267, -0.04339975491166115, -0.002166464924812317, 0.06208636239171028, -0.005132313817739487, 0.007619887124747038, 0.028918717056512833, 0.04222913086414337, 0.004910111892968416, -0.02896207384765148, 0.020507575944066048, -0.007560271769762039, -0.027639703825116158, -0.034815188497304916, -0.036527764052152634, 0.038977403193712234, 0.04433191567659378, 0.042532626539468765, -0.01083367969840765, 0.0019320695428177714, -0.028203336521983147, 0.020388346165418625, 0.014957956969738007, -0.007684921380132437, -0.0383487343788147, 0.024713147431612015, 0.015391521155834198, -0.0017925160

In [3]:
# Find the cosine similarty between two vectors

In [10]:
query_english = "tools for software development"
english_vector=aoai.generate_embedding(
                the_client=client,
                the_model=MY_AOAI_EMBEDDING_ENGINE,
                the_text=query_english
       )
print(f'VECTOR:\n{english_vector}')

VECTOR:
[-0.017953157424926758, 0.0003067270736210048, 0.040466681122779846, -0.044371820986270905, 0.021858297288417816, -0.06284915655851364, 0.0072205765172839165, 0.029144397005438805, -0.033259209245443344, 0.001139272004365921, 0.04759552329778671, -0.012757225893437862, 0.017311038449406624, -0.049010809510946274, 0.05178896337747574, 0.029616158455610275, 0.011689209379255772, -0.021897610276937485, -0.05367600917816162, 0.036404285579919815, 0.060804855078458786, -0.004557088017463684, -0.004825730342417955, 0.009487654082477093, 0.017258619889616966, 0.012285463511943817, -0.05178896337747574, 0.05367600917816162, -0.01742897741496563, -0.009841475635766983, -0.04903702065348625, -0.032263267785310745, 0.027702901512384415, 0.056244492530822754, 0.039287276566028595, 0.007646472658962011, 0.01934223435819149, -0.028462963178753853, 0.03163424879312515, -0.0018362672999501228, -0.027388393878936768, -0.0415150411427021, 0.005883918143808842, 0.020167818292975426, -0.0120102697

In [13]:
the_similarity = aoai.cosine_similarity(dutch_vector, english_vector)
print(f'Sentence in Dutch: {query_dutch}\nSentence in English: {query_english}\nThe cosine similarity: {the_similarity}')

Sentence in Dutch: tools voor softwareontwikkeling
Sentence in English: tools for software development
The cosine similarity: 0.7801345331151927


# Setup & Test Logic App and Azure Functions

# Setup and test Azure Document Intelligence

- Setup and create the custom extraction and classification models

# Setup Cosmos DB NoSQL API
- Create the database and container