### Install Python Packages

In [None]:
!pip install azure-functions
!pip install azure-core
!pip install azure-cosmos
!pip install openai
!pip install numpy
!pip install requests
!pip install pandas
!pip install azure-storage-blob 
!pip install azure-identity
!pip install smart_open
!pip install tenacity
!pip install pinecone-client
!pip install redis
!pip install tiktoken
!pip install azure-storage-file-share
!pip install python-dotenv
!pip install azure-search-documents
!pip install azure-ai-formrecognizer
!pip install beautifulsoup4
!pip install lxml
!pip install azure-ai-textanalytics

### Import Packages

In [1]:
import os
from dotenv import load_dotenv
load_dotenv(override=True)

import shutil
import sys
sys.path.append('./utils')

### Uncomment below imports as needed -- make sure that all relevant values and keys in the .env file are properly populated
# from utils import redis_helpers
# from utils import helpers
# from utils import language
# from utils import openai_helpers
# from utils import storage
# from utils import bot_helpers

%load_ext autoreload
%autoreload 2

### Activate Cognitive Search Ingestion 
#### First Run - Create Index and Indexer 
##### Caution: this will destroy any data you might already have in your index

In [35]:
#### Ingest all knowledge base documents
from utils import cogsearch_helpers

KB_BLOB_CONTAINER = os.environ["KB_BLOB_CONTAINER"]

cogsearch_helpers.ingest_kb(container = KB_BLOB_CONTAINER)

Index km-openai-sem Deleted
Index km-openai-sem created
Index km-openai Deleted
Index km-openai created
Deleted Skillset - km-openai-skills
Created new Skillset - km-openai-skills
Deleted Indexer - km-openai-indexer
Deleted Data Source - km-openai-skills
Created new Data Source Connection - km-openai-docs
Created new Indexer - km-openai-indexer
Running Indexer km-openai-indexer


#### Additional Runs - Re-indexing with delta documents

In [None]:
### Re-index additional documents
cogsearch_helpers.run_indexer()

### Activate Form Recognizer Ingestion

In [6]:
#### Ingest all form documents

from utils import storage
from utils import fr_helpers

FR_CONTAINER = os.environ['FR_CONTAINER']
OUTPUT_BLOB_CONTAINER = os.environ['OUTPUT_BLOB_CONTAINER']


fr_helpers.process_forms(in_container = FR_CONTAINER, out_container = OUTPUT_BLOB_CONTAINER)



### Interrogate the APIs with the sample Knowledge Base

In [41]:
### Use this cell to query Redis with the below queries
import json
from utils import bot_helpers

DAVINCI_003_COMPLETIONS_MODEL = os.environ['DAVINCI_003_COMPLETIONS_MODEL']
ADA_002_EMBEDDING_MODEL = os.environ['ADA_002_EMBEDDING_MODEL']

 
queries = [
        "What was mentioned about the Volcano hotel?"
    ]


for q in queries:
    output = bot_helpers.openai_interrogate_text(q, None, 'orig_lang:en')
    output = json.loads(output)
    print("\n\n", output['answer'], '\n\n\n###############################')
    


PROMPT ID 469c00d4-b05f-4644-b16c-26535a8ebb8e

looking up: Volcano hotel
CogLookup terms: Volcano hotel filter: None
pre_context  and cultural center for Nevada.  Las Vegas Hotels  Margie’s Travel offers the  following hotels in Las  Vegas:  The Volcano Hotel  In the heart of The Strip.  Stylish casino hotel with  live entertainment and an  extensive pool area.  The Fountain Hotel  Luxury accommodation in  Las Vegas with a range of  restaurants and cocktail  bars  The Canal Hotel  An opulent Italian-themed  resort with luxurious suite  accommodation.    To book your trip to Las Vegas, visit www.margiestravel.com     .


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI can use the information provided in the initial context to answer this question. I will use Redis Lookup to find information about the Volcano Hotel.

Action: 

Redis Lookup

Action Input:

"Volcano Hotel"
[0m
Observation: [33;1m[1;3m[https://storageseh2222.blob.core.windows.net/kmoaidemo/Las%20Vegas%20Bro

In [39]:
### other containers could be used as the sources of documents to index

cogsearch_helpers.create_indexer('kmoaidemo2') 
cogsearch_helpers.run_indexer()

Deleted Indexer - km-openai-indexer
Deleted Data Source - km-openai-skills
Created new Data Source Connection - km-openai-docs
Created new Indexer - km-openai-indexer
Running Indexer km-openai-indexer


## Experimentation Code Below - NO NEED TO RUN 
### For your reference only

In [26]:
#### Reset Index in Redis
from utils import redis_helpers

reset_index = True

if reset_index:
    redis_conn = redis_helpers.get_new_conn()
    redis_helpers.redis_reset_index(redis_conn)

In [None]:
### Use this cell to load embeddings directly into Redis from this notebook

import json


CHOSEN_EMB_MODEL   = os.environ['CHOSEN_EMB_MODEL']
SMALL_EMB_TOKEN_NUM  = int(os.environ['SMALL_EMB_TOKEN_NUM'])
MEDIUM_EMB_TOKEN_NUM  = int(os.environ['MEDIUM_EMB_TOKEN_NUM'])
LARGE_EMB_TOKEN_NUM  = int(os.environ['LARGE_EMB_TOKEN_NUM'])


emb_documents = []


for item in os.listdir("dump"):
    path = os.path.join("dump", item)

    with open(path, 'r') as openfile:
        data = json.load(openfile)
        
    emb_documents += helpers.generate_embeddings(data, CHOSEN_EMB_MODEL, SMALL_EMB_TOKEN_NUM,  text_suffix = 'S')

    if MEDIUM_EMB_TOKEN_NUM != 0:
        emb_documents += helpers.generate_embeddings(data, CHOSEN_EMB_MODEL, MEDIUM_EMB_TOKEN_NUM, text_suffix = 'M')

    if LARGE_EMB_TOKEN_NUM != 0:
        emb_documents += helpers.generate_embeddings(data, CHOSEN_EMB_MODEL, LARGE_EMB_TOKEN_NUM,  text_suffix = 'L')


helpers.load_embedding_docs_in_redis(emb_documents)

In [None]:
emb_documents = []

emb_documents += helpers.generate_embeddings_from_json_docs('dump', ADA_002_EMBEDDING_MODEL, ADA_002_MODEL_MAX_TOKENS, text_suffix='XL', limit=-1)

print(f"Generated {len(emb_documents)} embeddings.")
helpers.save_embdding_docs_to_pkl(emb_documents, "test.pkl")

In [24]:
emb_documents = helpers.load_embedding_docs_from_pkl("test.pkl")
helpers.load_embedding_docs_in_redis(emb_documents)

Loading 141 embeddings into Redis


In [None]:
queries = [
        "in which classes did the Danish sailors qualify?",
        "what are the reviews of the Lost City hotel?"
    ]


for q in queries:
    output = bot_helpers.openai_interrogate_text(q, DAVINCI_003_COMPLETIONS_MODEL, ADA_002_EMBEDDING_MODEL, 5, False)
    print("\n\n", output, '\n\n\n###############################')
    break

