In [None]:
! pip install bs4 chardet

### Environments setup

In [1]:
import os
from typing import Tuple
import semantic_kernel as sk
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding, AzureChatCompletion, AzureTextEmbedding

api_key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")

openai_api_key = os.getenv("OPENAI_API_KEY")
openai_org_id = os.getenv("OPENAI_ORG_ID")

kernel = sk.Kernel()

useAzureOpenAI = True

# Configure AI service used by the kernel
if useAzureOpenAI:
    kernel.add_chat_service("chat_completion", AzureChatCompletion("startping_deploymodel", endpoint, api_key))
    kernel.add_text_embedding_generation_service("ada", AzureTextEmbedding("startping_embedding", endpoint, api_key))
else:
    kernel.add_chat_service("chat-gpt", OpenAIChatCompletion("gpt-4", openai_api_key, openai_org_id))
    kernel.add_text_embedding_generation_service("ada", OpenAITextEmbedding("text-embedding-ada-002", openai_api_key, openai_org_id))


kernel.register_memory_store(memory_store=sk.memory.VolatileMemoryStore())
kernel.import_skill(sk.core_skills.TextMemorySkill())

{'recall': <semantic_kernel.orchestration.sk_function.SKFunction at 0x1cc2031b040>,
 'save': <semantic_kernel.orchestration.sk_function.SKFunction at 0x1cc2031aec0>}

### Loading html file to memory vector index

In [None]:
from bs4 import BeautifulSoup
import chardet

file_path = '../../data/html/wmt-20230430.html'

def load_local_html(file_path):
    with open(file_path, 'rb') as file:
        result = chardet.detect(file.read())

    encoding = result['encoding']
    # print(encoding)
    
    with open(file_path, 'r', encoding=encoding) as file:
        html_content = file.read()

    soup = BeautifulSoup(html_content, 'html.parser')
    return soup

soup = load_local_html(file_path)

# Now you can use BeautifulSoup methods to navigate and extract information from the HTML
# For example, print the text content of all paragraphs
outfile_path = '../../data/txt/wmt-20230430.txt'
with open(outfile_path, 'w', encoding='utf-8') as file:
    for paragraph in soup.find_all('span'):
        file.write(paragraph.get_text()+'\n\n')
        

In [2]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = TextLoader("../../data/txt/wmt-20230430.txt")

documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)


In [3]:
# docs[1].page_content
memory_collection_name = "wmt10Q12023"
print("Adding WalMart 2023 Quarter Report and their descriptions to a volatile Semantic Memory.");
i = 0
for entry, value in enumerate(docs):
    await kernel.memory.save_reference_async(
        collection=memory_collection_name,
        description=value.page_content,
        text=value.page_content,
        external_id=entry,
        external_source_name="wmt10Q12023"
    )
    i += 1
    print("  URL {} saved".format(i))

Adding WalMart 2023 Quarter Report and their descriptions to a volatile Semantic Memory.
  URL 1 saved
  URL 2 saved
  URL 3 saved
  URL 4 saved
  URL 5 saved
  URL 6 saved
  URL 7 saved
  URL 8 saved
  URL 9 saved
  URL 10 saved
  URL 11 saved
  URL 12 saved
  URL 13 saved
  URL 14 saved
  URL 15 saved
  URL 16 saved
  URL 17 saved
  URL 18 saved
  URL 19 saved
  URL 20 saved
  URL 21 saved
  URL 22 saved
  URL 23 saved
  URL 24 saved
  URL 25 saved
  URL 26 saved
  URL 27 saved
  URL 28 saved
  URL 29 saved
  URL 30 saved
  URL 31 saved
  URL 32 saved
  URL 33 saved
  URL 34 saved
  URL 35 saved
  URL 36 saved
  URL 37 saved
  URL 38 saved
  URL 39 saved
  URL 40 saved
  URL 41 saved
  URL 42 saved
  URL 43 saved
  URL 44 saved
  URL 45 saved
  URL 46 saved
  URL 47 saved


In [None]:
ask = """find context relevent to Consolidated net income"""
# ask = """Rating agency for Long-term debt"""
ask = """ Are there any legal proceedings? If yes, please summarize each legal proceeding in one paragraph with no more than five sentences."""
# ask = """find context relevent to cash equivalents and restricted cash at end of period"""

debugs = await kernel.memory.search_async(memory_collection_name, ask, limit=1, min_relevance_score=0.77)
print(debugs[0].relevance)
print(debugs[0].description)

### Native Function

In [4]:
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

async def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # a parameter that controls the randomness or creativity of the generated text, examples are 0,1,0.5
    )
    return response.choices[0].message["content"]

In [5]:
class ChatGPTSearchEngineSkill:
    """
    A ChatGPT search engine skill.
    """
    from semantic_kernel.orchestration.sk_context import SKContext
    from semantic_kernel.skill_definition import sk_function, sk_function_context_parameter

    def __init__(self, connector) -> None:
        self._connector = connector

    @sk_function(
        description="Performs a ChatGPT search on sector according to the Global Industry Classification Standard (GICS)", name="searchAsync"
    )
    @sk_function_context_parameter(
        name="query",
        description="The search query",
    )
    async def search_async(self, query: str, context: SKContext) -> str:
        query = query or context.variables.get("query")
        result = await self._connector(query)
        return result

In [6]:
class WebSearchEngineSkill:
    """
    A search engine skill.
    """
    from semantic_kernel.orchestration.sk_context import SKContext
    from semantic_kernel.skill_definition import sk_function, sk_function_context_parameter

    def __init__(self, connector) -> None:
        self._connector = connector

    @sk_function(
        description="Performs NEGATIVE news web search for a given query", name="searchAsync"
    )
    @sk_function_context_parameter(
        name="query",
        description="The search query",
    )
    async def search_async(self, query: str, context: SKContext) -> str:
        query = query or context.variables.get("query")
        # print("debug {query}")
        result = await self._connector.search_async(query, num_results=5, offset=0)
        return str(result)

In [7]:
class VectorSearchEngineSkill:
    """
    A vector search engine skill.
    """
    from semantic_kernel.orchestration.sk_context import SKContext
    from semantic_kernel.skill_definition import sk_function, sk_function_context_parameter

    @sk_function(
        description="find context relevent to Consolidated net income or \
            Rating agency for Long-term debt or \
            legal proceedings or \
            cash equivalents and restricted cash at end of period \
            ", name="searchAsync"
    )
    @sk_function_context_parameter(
        name="query",
        description="The search query",
    )
    async def search_async(self, query: str, context: SKContext) -> str:
        query = query or context.variables.get("query")
        ask = f"find context relevent to {query}"
        # print(f"debug query: {ask}")
        # msg= context.variables.get("query")
        # print(f"debug msg: {query}")

        memories = await kernel.memory.search_async(memory_collection_name, ask, limit=1, min_relevance_score=0.77)
        result = memories[0].description
        # print(memories[0].relevance)
        # print(f"debug: {result}")
        return result

In [9]:
from semantic_kernel.connectors.search_engine import BingConnector

BING_API_KEY = os.getenv("BING_SEARCH_V7_SUBSCRIPTION_KEY")
connector = BingConnector(BING_API_KEY)

kernel.import_skill(VectorSearchEngineSkill(), skill_name="vectorsearch")
kernel.import_skill(WebSearchEngineSkill(connector), skill_name="websearch")
kernel.import_skill(ChatGPTSearchEngineSkill(get_completion), skill_name="chatgptsearch")


{'searchAsync': <semantic_kernel.orchestration.sk_function.SKFunction at 0x1cc2086fb50>}

### Stepwise Planner

In [10]:
from semantic_kernel.planning import StepwisePlanner
from semantic_kernel.planning.stepwise_planner.stepwise_planner_config import (
    StepwisePlannerConfig,
)

planner = StepwisePlanner(
    kernel, StepwisePlannerConfig(max_iterations=2, min_iteration_time_ms=1000)
)

Good Questions
```
ask = """find context relevent to Consolidated net income"""
ask = """Rating agency for Long-term debt"""
ask = """find legal proceedings"""
ask = """find context relevent to cash equivalents and restricted cash at end of period"""
ask = """What is the sector of Walmart according to the Global Industry Classification Standard (GICS)?"""
ask = """Are there any negatives news about Walmart in last 12 months?"""
```


In [None]:
ask = """Rating agency for Long-term debt"""
plan = planner.create_plan(goal=ask)

for step in plan._steps:
    print(step.description, ":", step._state.__dict__)

result = await plan.invoke_async()

In [None]:


print("===========================\n" + "Query: " + ask + "\n")
print("===========================\n" + "Result: \n")
print(result)
print("===========================\n" + "Thoughts: \n")
for index, step in enumerate(plan._steps):
    print("Step:", index)
    print("Description:",step.description)
    print("Function:", step.skill_name + "." + step._function.name)
    if len(step._outputs) > 0:
        print( "  Output:\n", str.replace(result[step._outputs[0]],"\n", "\n  "))
print("============END===============\n\n")


ask = """
Could you please find relevent information for the following questions for WalMart \
Provide answer to question 1, once finish, answer to question 2 , and then answer to question 3\

question 1: find context relevent to Consolidated net income\
question 2: Rating agency for Long-term debt\
question 3: find legal proceedings
"""

 the stepwise planner allows for the AI to form "thoughts" and "observations" and execute actions based off those to achieve a user's goal. This continues until all required functions are complete and a final output is generated.

In [11]:
# ask = """Are there any negatives news about Walmart in last 12 months?"""
async def execute_plan(ask):
    plan = planner.create_plan(goal=ask)

    # for step in plan._steps:
    #     print(step.description, ":", step._state.__dict__)

    result = await plan.invoke_async()
    print("===========================\n" + "Query: " + ask + "\n")
    print("===========================\n" + "Result: \n")
    print(result)
    print("===========================\n" + "Thoughts: \n")
    for index, step in enumerate(plan._steps):
        print("Step:", index)
        print("Description:",step.description)
        print("Function:", step.skill_name + "." + step._function.name)
        if len(step._outputs) > 0:
            print( "  Output:\n", str.replace(result[step._outputs[0]],"\n", "\n  "))
    print("============END===============\n\n")


In [12]:
# Include all questions
async def execute_project(questions) -> None:
    for question in questions:
        await execute_plan(question)


In [13]:
questions = [
    # "Are there any negatives news about Walmart in last 12 months?",
    # "What is the sector of Walmart according to the Global Industry Classification Standard (GICS)?",
    "find context relevent to amount for {cash equivalents and restricted cash at end of period} as of April 30, 2023 ",
    "find context relevent to amount for {Consolidated net income} as of April 30, 2023",
    "find context relevent to information for {legal proceedings or certain regulatory matters}",
    "find context relevent to Rating for {Rating agency for Long-term debt}",
]



In [14]:
await execute_project(questions)

Query: find context relevent to amount for {cash equivalents and restricted cash at end of period} as of April 30, 2023 

Result: 

The amount for cash equivalents and restricted cash at the end of the period as of April 30, 2023, is $11,882.
Thoughts: 

Step: 0
Description: Execute a plan
Function: StepwisePlanner.ExecutePlan
  Output:
 This was my previous work (but they haven't seen any of it! They only see what I return as final answer):
  [THOUGHT]
  To find the context relevant to the amount for "cash equivalents and restricted cash at end of period" as of April 30, 2023, we can use the vectorsearch.searchAsync function with the query "cash equivalents and restricted cash at end of period". This function will search for relevant information related to the query.
  [ACTION]
  {"action": "vectorsearch.searchAsync", "action_variables": {"query": "cash equivalents and restricted cash at end of period"}}
  [OBSERVATION]
  Other financing activities
  
  (845)
  
  (838)
  
  Net cash 