### Step 1

Initialize the notebook, loading the configuration and importing libraries.

In [13]:
# Import the libraries required for the notebook
import asyncio

# Instatiate the config class
import sys
sys.path.append('..')
from config.notebook_config import *
config = notebook_config()
# Load config from file
config.load_config_from_file()


### Step 2

Load the files from a folder and create a dataframe with the data.

In [14]:
# Load pdf files from the data folder (these will be invoices)
from os import listdir
from os.path import isfile, join
mypath = "../data/"

pdf_files = [f for f in listdir(mypath) if isfile(join(mypath, f))]

# Print the full path of the files
from pathlib import Path
pdf_files = [str(Path(mypath) / f) for f in pdf_files]
print(pdf_files)

['..\\data\\2023_1-025 Sara Dias González.pdf', '..\\data\\2023_10-114 Agota Kanapienyte.pdf', '..\\data\\2023_2-015 Sara Dias González.pdf', '..\\data\\2023_2-112 Agota Kanapienyte.pdf', '..\\data\\2023_3-065 Agota Kanapienyte.pdf', '..\\data\\2023_3-134 Sara Dias González.pdf', '..\\data\\2023_4-037 Sara Dias González.pdf', '..\\data\\2023_4-113 Agota Kanapienyte.pdf', '..\\data\\2023_5-089 Sara Dias González.pdf', '..\\data\\2023_5-125 Agota Kanapienyte.pdf', '..\\data\\2023_6-031 Sara Dias González.pdf', '..\\data\\2023_6-083 Agota Kanapienyte.pdf', '..\\data\\2023_7-032 Sara Dias González.pdf', '..\\data\\2023_7-066 Agota Kanapienyte.pdf', '..\\data\\2023_7_032 Sara Dias González.pdf', '..\\data\\2023_9-015 Sara Dias González.pdf']


In [9]:
import semantic_kernel
from plugins.form_recognizer.sk_form_recognizer import FormRecognizer
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion

kernel = semantic_kernel.Kernel()
print("Kernel loaded.")

deployment = config.model
endpoint = config.endpoint
api_key = config.azure_api_key
kernel.add_chat_service("dv", AzureChatCompletion(deployment, endpoint, api_key))
print(f"Fire-up the kernel with {deployment}")

form_recognizer = kernel.import_skill(FormRecognizer(config.get_form_regonizer_endpoint(), config.get_form_regonizer_key()))
plugins_directory = "../plugins"
# Import the semantic functions
my_functions = kernel.import_semantic_skill_from_directory(plugins_directory, "form_recognizer")
print("Plugins loaded.")

Kernel loaded.
Fire-up the kernel with gpt-35-turbo
Plugins loaded.


In [16]:
# Run the semantic function to process the invoice
result = await kernel.run_async(
    form_recognizer["process_invoice"],
    input_str=pdf_files[4],
)

# Result is a json file with the information extracted from the invoice, print in pretty format
import json
# Load the JSON string into a Python object
result_obj = json.loads(result.result)
# Pretty print the object
print(json.dumps(result_obj, indent=4, sort_keys=True, ensure_ascii=False))

{
    "CustomerAddress": "AddressValue(house_number=14, po_box=None, road=C/ Carlos Zabaleta, city=Illescas, state=None, postal_code=45200, country_region=España, street_address=14 C/ Carlos Zabaleta A, unit=A, city_district=None, state_district=None, suburb=None, house=None, level=ático)",
    "CustomerAddressRecipient": "Agota Kanapienyte",
    "CustomerName": "Agota Kanapienyte",
    "InvoiceDate": "2023-03-28",
    "InvoiceId": "#2023_3-065",
    "InvoiceTotal": "€140.0",
    "Items": "[DocumentField(value_type=dictionary, value={'Amount': DocumentField(value_type=currency, value=CurrencyValue(amount=140.0, symbol=€, code=EUR), content=140,00€, bounding_regions=[BoundingRegion(page_number=1, polygon=[Point(x=7.0978, y=4.1369), Point(x=7.5795, y=4.1318), Point(x=7.5795, y=4.2788), Point(x=7.1029, y=4.2737)])], spans=[DocumentSpan(offset=423, length=7)], confidence=0.885), 'Description': DocumentField(value_type=string, value='Clase reeducación pedagógica', content=Clase reeducación 

In [11]:
# Run the semantic function to summarize the invoice
input_str = result.result
invoice_summary = asyncio.create_task(kernel.run_async(my_functions["summarize"], input_str=input_str))
invoice_summary_result = await invoice_summary
print(invoice_summary_result.result)

{
  "invoice_number": "2023_1-025",
  "customer_name": "Sara Dias González",
  "date": "2023-01-28",
  "total": 30.0,
  "description": "Clase reeducación pedagógica"
}


In [15]:
for invoice in pdf_files:
    my_execution = await kernel.run_async(
        form_recognizer["process_invoice"],
        my_functions["summarize"],
        input_str=invoice)

    print("Invoice: " + invoice)
    print(my_execution.result)



{
  "invoice_number": "2023_1-025",
  "customer_name": "Sara Dias González",
  "date": "2023-01-28",
  "total": 30.0,
  "description": "Clase reeducación pedagógica"
}
{
  "invoice_number": "2023_10-114",
  "customer_name": "Agota Kanapienyte",
  "date": "2023-10-28",
  "total": 140.0,
  "description": "Clase reeducación pedagógica"
}
{
  "invoice_number": "2023_2-015",
  "customer_name": "Sara Dias González",
  "date": "2023-02-28",
  "total": 30.0,
  "description": "Clase reeducación pedagógica"
}
{
  "invoice_number": "2023_2-112",
  "customer_name": "Agota Kanapienyte",
  "date": "2023-02-28",
  "total": 105.0,
  "description": "Clase reeducación pedagógica"
}
{
  "invoice_number": "12345",
  "customer_name": "John Doe",
  "date": "2019-01-01",
  "total": 123.45,
  "description": "This is a description"
}
{
  "invoice_number": "2023_3-134",
  "customer_name": "Sara Dias González",
  "date": "2023-03-28",
  "total": 35.0,
  "description": "Clase reeducación pedagógica"
}
{
  "invoic