# Receipt-Extraction

In [1]:
!pip install azure-ai-formrecognizer



In [26]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
import pandas as pd
import os

## setting

In [3]:
# document intelligence resource
endpoint = "endpoint"
key = "key"

In [4]:
# receipt image
url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"

## analyze

In [12]:
document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

poller = document_analysis_client.begin_analyze_document_from_url("prebuilt-receipt", url)
receipts = poller.result()

analysis_result = ""
for idx, receipt in enumerate(receipts.documents):
    merchant_name = receipt.fields.get("MerchantName")
    if merchant_name:
        analysis_result += f"merchant-name: {merchant_name.value}\n"

    transaction_date = receipt.fields.get("TransactionDate")
    if transaction_date:
        analysis_result += f"date: {transaction_date.value}\n"

    if receipt.fields.get("Items"):
        for idx, item in enumerate(receipt.fields.get("Items").value):
            analysis_result += f"item #{idx+1}\n"
            item_description = item.value.get("Description")
            if item_description:
                analysis_result += f"---- name: {item_description.value}\n"

            item_quantity = item.value.get("Quantity")
            if item_quantity:
                analysis_result += f"---- quantity: {item_quantity.value}\n"

            item_price = item.value.get("Price")
            if item_price:
                analysis_result += f"---- individual-price: {item_price.value}\n"

            item_total_price = item.value.get("TotalPrice")
            if item_total_price:
                analysis_result += f"---- total-price: {item_total_price.value}\n"

    tax = receipt.fields.get("TotalTax")
    if tax:
        analysis_result += f"tax: {tax.value}"
    tip = receipt.fields.get("Tip")
    if tip:
        analysis_result += f"tip: {tip.value}"

In [15]:
data = analysis_result.strip()
print(data)

merchant-name: Contoso
date: 2019-06-10
item #1
---- name: Surface Pro 6
---- quantity: 1.0
---- total-price: 999.0
item #2
---- name: SurfacePen
---- quantity: 1.0
---- total-price: 99.99
tax: 104.4


## make file

In [31]:
lines = data.strip().split('\n')

item_indices = [index for index, line in enumerate(lines) if line.startswith("item #")]

merchant = [lines[0].split(": ")[-1] for value in range(len(item_indices)+1)]
date = [lines[1].split(": ")[1] for value in range(len(item_indices)+1)]
item  = [lines[index+1].split(": ")[-1] for index in item_indices]
quantity = [lines[index+2].split(": ")[-1] for index in item_indices]
price = [lines[index+3].split(": ")[-1] for index in item_indices]

tax = lines[-1].split(": ")[1]

item.append("Tax")
quantity.append(" ")
price.append(tax)

df = pd.DataFrame(
    {
        "Merchant": merchant,
        "Date": date,
        "Item": item,
        "Quantity": quantity,
        "Price": price
    }
)

if not os.path.isfile(os.path.join("receipt.csv")):
    df.to_csv("receipt.csv", encoding="utf-8-sig", index=False)
else:
    df_origin = pd.read_csv("receipt.csv")

    df2 = pd.concat([df_origin, df])
    df2 = df2.drop_duplicates()
    df2.to_csv("receipt.csv", encoding="utf-8-sig", index=False)

In [30]:
df

Unnamed: 0,Merchant,Date,Item,Quantity,Price
0,Contoso,2019-06-10,Surface Pro 6,1.0,999.0
1,Contoso,2019-06-10,SurfacePen,1.0,99.99
2,Contoso,2019-06-10,Tax,,104.4
