In [9]:
from dotenv import load_dotenv
from langchain_teddynote import logging

load_dotenv()
logging.langsmith("TeddyNote-Parser")

LangSmith 추적을 시작합니다.
[프로젝트명]
TeddyNote-Parser


In [10]:
# !pip install -qU langchain-teddynote

## 저장할 State 정의

In [11]:
from typing import TypedDict


# GraphState 상태를 저장하는 용도로 사용합니다.
class GraphState(TypedDict):
    filepath: str  # path
    filetype: str  # pdf
    page_numbers: list[int]  # page numbers
    batch_size: int  # batch size
    split_filepaths: list[str]  # split files
    analyzed_files: list[str]  # analyzed files
    page_elements: dict[int, dict[str, list[dict]]]  # page elements
    page_metadata: dict[int, dict]  # page metadata
    page_summary: dict[int, str]  # page summary
    images: list[str]  # image paths
    images_summary: list[str]  # image summary
    tables: list[str]  # table
    tables_summary: dict[int, str]  # table summary
    texts: list[str]  # text
    texts_summary: list[str]  # text summary

## 문서를 배치 단위로 분할 (10 page)

In [12]:
import graphrags.core as parser_core
import graphrags.layout_utils as layout_utils
import graphrags.pdf as pdf

import importlib

importlib.reload(parser_core)
importlib.reload(layout_utils)
importlib.reload(pdf)

<module 'graphrags.pdf' from '/Users/teddy/Dev/github/teddynote-parser/graphrags/pdf.py'>

In [13]:
split_pdf_node = pdf.SplitPDFFilesNode(batch_size=10)

In [14]:
state = GraphState(filepath="data/sample-report.pdf")
state_out = split_pdf_node()(state)
state.update(state_out)
state

총 페이지 수: 22
분할 PDF 생성: data/sample-report_0000_0009.pdf
분할 PDF 생성: data/sample-report_0010_0019.pdf
분할 PDF 생성: data/sample-report_0020_0021.pdf


{'filepath': 'data/sample-report.pdf',
 'split_filepaths': ['data/sample-report_0000_0009.pdf',
  'data/sample-report_0010_0019.pdf',
  'data/sample-report_0020_0021.pdf']}

## Layout Analyzer 를 사용하여 문서를 element 단위로 분할

In [15]:
import os

layout_analyze_node = parser_core.LayoutAnalyzerNode(os.environ.get("UPSTAGE_API_KEY"))
state_out = layout_analyze_node()(state)
state.update(state_out)
state

{'filepath': 'data/sample-report.pdf',
 'split_filepaths': ['data/sample-report_0000_0009.pdf',
  'data/sample-report_0010_0019.pdf',
  'data/sample-report_0020_0021.pdf'],
 'analyzed_files': ['data/sample-report_0000_0009.json',
  'data/sample-report_0010_0019.json',
  'data/sample-report_0020_0021.json']}

## 페이지 메타데이터 추출

In [16]:
page_element_extractor_node = parser_core.ExtractPageElementsNode()
state_out = page_element_extractor_node()(state)
state.update(state_out)

## 페이지별 HTML Element 추출

In [17]:
state["page_elements"].keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

## 각 페이지의 태그를 분해합니다.

- images, tables, text 를 분해합니다.

In [18]:
state["page_elements"].keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

In [19]:
state["page_elements"][2].keys()

dict_keys(['image_elements', 'table_elements', 'text_elements', 'elements'])

In [20]:
state["page_elements"][2]["image_elements"]

[{'bounding_box': [{'x': 89, 'y': 1228},
   {'x': 597, 'y': 1228},
   {'x': 597, 'y': 1557},
   {'x': 89, 'y': 1557}],
  'category': 'figure',
  'html': '<figure><img id=\'52\' style=\'font-size:14px\' alt="Southern UK domestic Rotterdam domestic\n550\n500\n450 hhhhhhhhhhh\n400\n350\n22 Jan 16 Apr 9 Jul 1 Oct" data-coord="top-left:(89,1228); bottom-right:(597,1557)" /></figure>',
  'id': 52,
  'page': 2,
  'text': 'Southern UK domestic Rotterdam domestic\n550\n500\n450 hhhhhhhhhhh\n400\n350\n22 Jan 16 Apr 9 Jul 1 Oct'}]

In [21]:
state_out["page_elements"][2]["table_elements"]

[{'bounding_box': [{'x': 631, 'y': 261},
   {'x': 1146, 'y': 261},
   {'x': 1146, 'y': 1022},
   {'x': 631, 'y': 1022}],
  'category': 'table',
  'html': '<br><table id=\'50\' style=\'font-size:14px\'><tr><td></td><td colspan="3">€/t</td><td colspan="3">$/t</td></tr><tr><td></td><td>Low</td><td>high</td><td>±</td><td>Low</td><td>high</td><td>±</td></tr><tr><td>Domestic prices, ex-works</td><td></td><td></td><td></td><td></td><td></td><td></td></tr><tr><td>Southern UK £/t</td><td>380</td><td>390</td><td>+20.00</td><td>516</td><td>529</td><td>+22.50</td></tr><tr><td>Rotterdam, Netherlands</td><td>420</td><td>435</td><td>+20.00</td><td>490</td><td>507</td><td>+20.50</td></tr><tr><td>Antwerp, Belgium</td><td>420</td><td>435</td><td>+20.00</td><td>490</td><td>507</td><td>+20.50</td></tr><tr><td>Northern Germany</td><td>385</td><td>395</td><td>+10.00</td><td>449</td><td>460</td><td>+8.50</td></tr><tr><td>Northeast Germany</td><td>350</td><td>360</td><td>+10.00</td><td>408</td><td>420</td><td

In [22]:
state["page_elements"][2]["text_elements"]

[{'bounding_box': [{'x': 90, 'y': 83},
   {'x': 221, 'y': 83},
   {'x': 221, 'y': 106},
   {'x': 90, 'y': 106}],
  'category': 'header',
  'html': "<header id='38' style='font-size:18px'>Argus Bitumen</header>",
  'id': 38,
  'page': 2,
  'text': 'Argus Bitumen'},
 {'bounding_box': [{'x': 786, 'y': 78},
   {'x': 1150, 'y': 78},
   {'x': 1150, 'y': 107},
   {'x': 786, 'y': 107}],
  'category': 'header',
  'html': "<br><header id='39' style='font-size:20px'>Issue 21-39 | Friday 1 October 2021</header>",
  'id': 39,
  'page': 2,
  'text': 'Issue 21-39 | Friday 1 October 2021'},
 {'bounding_box': [{'x': 90, 'y': 181},
   {'x': 670, 'y': 181},
   {'x': 670, 'y': 209},
   {'x': 90, 'y': 209}],
  'category': 'paragraph',
  'html': "<p id='40' data-category='paragraph' style='font-size:22px'>North aNd CeNtral europe market CommeNtary</p>",
  'id': 40,
  'page': 2,
  'text': 'North aNd CeNtral europe market CommeNtary'},
 {'bounding_box': [{'x': 90, 'y': 238},
   {'x': 196, 'y': 238},
   {'x': 

## 페이지 번호를 추출합니다.

In [23]:
state["page_numbers"]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

## 이미지를 추출합니다.

In [24]:
sample_table = state["page_elements"][2]["table_elements"][0]
sample_table

{'bounding_box': [{'x': 631, 'y': 261},
  {'x': 1146, 'y': 261},
  {'x': 1146, 'y': 1022},
  {'x': 631, 'y': 1022}],
 'category': 'table',
 'html': '<br><table id=\'50\' style=\'font-size:14px\'><tr><td></td><td colspan="3">€/t</td><td colspan="3">$/t</td></tr><tr><td></td><td>Low</td><td>high</td><td>±</td><td>Low</td><td>high</td><td>±</td></tr><tr><td>Domestic prices, ex-works</td><td></td><td></td><td></td><td></td><td></td><td></td></tr><tr><td>Southern UK £/t</td><td>380</td><td>390</td><td>+20.00</td><td>516</td><td>529</td><td>+22.50</td></tr><tr><td>Rotterdam, Netherlands</td><td>420</td><td>435</td><td>+20.00</td><td>490</td><td>507</td><td>+20.50</td></tr><tr><td>Antwerp, Belgium</td><td>420</td><td>435</td><td>+20.00</td><td>490</td><td>507</td><td>+20.50</td></tr><tr><td>Northern Germany</td><td>385</td><td>395</td><td>+10.00</td><td>449</td><td>460</td><td>+8.50</td></tr><tr><td>Northeast Germany</td><td>350</td><td>360</td><td>+10.00</td><td>408</td><td>420</td><td>+9.00

In [25]:
sample_image = state["page_elements"][2]["image_elements"][0]
sample_image

{'bounding_box': [{'x': 89, 'y': 1228},
  {'x': 597, 'y': 1228},
  {'x': 597, 'y': 1557},
  {'x': 89, 'y': 1557}],
 'category': 'figure',
 'html': '<figure><img id=\'52\' style=\'font-size:14px\' alt="Southern UK domestic Rotterdam domestic\n550\n500\n450 hhhhhhhhhhh\n400\n350\n22 Jan 16 Apr 9 Jul 1 Oct" data-coord="top-left:(89,1228); bottom-right:(597,1557)" /></figure>',
 'id': 52,
 'page': 2,
 'text': 'Southern UK domestic Rotterdam domestic\n550\n500\n450 hhhhhhhhhhh\n400\n350\n22 Jan 16 Apr 9 Jul 1 Oct'}

In [26]:
image_cropper_node = parser_core.ImageCropperNode()
state_out = image_cropper_node()(state)
state.update(state_out)
state["images"]

page:0, id:15, path: data/sample-report/15.png
page:1, id:23, path: data/sample-report/23.png
page:1, id:29, path: data/sample-report/29.png
page:2, id:52, path: data/sample-report/52.png
page:3, id:68, path: data/sample-report/68.png
page:4, id:105, path: data/sample-report/105.png
page:5, id:126, path: data/sample-report/126.png
page:6, id:138, path: data/sample-report/138.png
page:8, id:193, path: data/sample-report/193.png
page:9, id:218, path: data/sample-report/218.png
page:10, id:251, path: data/sample-report/251.png
page:11, id:268, path: data/sample-report/268.png
page:13, id:300, path: data/sample-report/300.png
page:18, id:404, path: data/sample-report/404.png
page:19, id:424, path: data/sample-report/424.png
page:20, id:458, path: data/sample-report/458.png
page:20, id:470, path: data/sample-report/470.png
page:21, id:477, path: data/sample-report/477.png
page:21, id:501, path: data/sample-report/501.png


{15: 'data/sample-report/15.png',
 23: 'data/sample-report/23.png',
 29: 'data/sample-report/29.png',
 52: 'data/sample-report/52.png',
 68: 'data/sample-report/68.png',
 105: 'data/sample-report/105.png',
 126: 'data/sample-report/126.png',
 138: 'data/sample-report/138.png',
 193: 'data/sample-report/193.png',
 218: 'data/sample-report/218.png',
 251: 'data/sample-report/251.png',
 268: 'data/sample-report/268.png',
 300: 'data/sample-report/300.png',
 404: 'data/sample-report/404.png',
 424: 'data/sample-report/424.png',
 458: 'data/sample-report/458.png',
 470: 'data/sample-report/470.png',
 477: 'data/sample-report/477.png',
 501: 'data/sample-report/501.png'}

In [27]:
state.keys()

dict_keys(['filepath', 'split_filepaths', 'analyzed_files', 'page_metadata', 'page_elements', 'page_numbers', 'images'])

In [28]:
state["page_elements"][2]["table_elements"][0]

{'bounding_box': [{'x': 631, 'y': 261},
  {'x': 1146, 'y': 261},
  {'x': 1146, 'y': 1022},
  {'x': 631, 'y': 1022}],
 'category': 'table',
 'html': '<br><table id=\'50\' style=\'font-size:14px\'><tr><td></td><td colspan="3">€/t</td><td colspan="3">$/t</td></tr><tr><td></td><td>Low</td><td>high</td><td>±</td><td>Low</td><td>high</td><td>±</td></tr><tr><td>Domestic prices, ex-works</td><td></td><td></td><td></td><td></td><td></td><td></td></tr><tr><td>Southern UK £/t</td><td>380</td><td>390</td><td>+20.00</td><td>516</td><td>529</td><td>+22.50</td></tr><tr><td>Rotterdam, Netherlands</td><td>420</td><td>435</td><td>+20.00</td><td>490</td><td>507</td><td>+20.50</td></tr><tr><td>Antwerp, Belgium</td><td>420</td><td>435</td><td>+20.00</td><td>490</td><td>507</td><td>+20.50</td></tr><tr><td>Northern Germany</td><td>385</td><td>395</td><td>+10.00</td><td>449</td><td>460</td><td>+8.50</td></tr><tr><td>Northeast Germany</td><td>350</td><td>360</td><td>+10.00</td><td>408</td><td>420</td><td>+9.00

In [29]:
table_cropper_node = parser_core.TableCropperNode()
state_out = table_cropper_node()(state)
state.update(state_out)
# state["tables"]

page:0, id:12, path: data/sample-report/12.png
page:0, id:18, path: data/sample-report/18.png
page:1, id:32, path: data/sample-report/32.png
page:1, id:33, path: data/sample-report/33.png
page:2, id:50, path: data/sample-report/50.png
page:2, id:54, path: data/sample-report/54.png
page:2, id:55, path: data/sample-report/55.png
page:5, id:121, path: data/sample-report/121.png
page:5, id:122, path: data/sample-report/122.png
page:5, id:125, path: data/sample-report/125.png
page:8, id:196, path: data/sample-report/196.png
page:8, id:197, path: data/sample-report/197.png
page:11, id:270, path: data/sample-report/270.png
page:12, id:276, path: data/sample-report/276.png
page:12, id:278, path: data/sample-report/278.png
page:13, id:302, path: data/sample-report/302.png
page:13, id:303, path: data/sample-report/303.png
page:14, id:317, path: data/sample-report/317.png
page:15, id:348, path: data/sample-report/348.png
page:15, id:351, path: data/sample-report/351.png
page:17, id:386, path: dat

In [30]:
state["tables"]

{12: 'data/sample-report/12.png',
 18: 'data/sample-report/18.png',
 32: 'data/sample-report/32.png',
 33: 'data/sample-report/33.png',
 50: 'data/sample-report/50.png',
 54: 'data/sample-report/54.png',
 55: 'data/sample-report/55.png',
 121: 'data/sample-report/121.png',
 122: 'data/sample-report/122.png',
 125: 'data/sample-report/125.png',
 196: 'data/sample-report/196.png',
 197: 'data/sample-report/197.png',
 270: 'data/sample-report/270.png',
 276: 'data/sample-report/276.png',
 278: 'data/sample-report/278.png',
 302: 'data/sample-report/302.png',
 303: 'data/sample-report/303.png',
 317: 'data/sample-report/317.png',
 348: 'data/sample-report/348.png',
 351: 'data/sample-report/351.png',
 386: 'data/sample-report/386.png'}

In [31]:
state

{'filepath': 'data/sample-report.pdf',
 'split_filepaths': ['data/sample-report_0000_0009.pdf',
  'data/sample-report_0010_0019.pdf',
  'data/sample-report_0020_0021.pdf'],
 'analyzed_files': ['data/sample-report_0000_0009.json',
  'data/sample-report_0010_0019.json',
  'data/sample-report_0020_0021.json'],
 'page_metadata': {0: {'size': [1241, 1754]},
  1: {'size': [1241, 1754]},
  2: {'size': [1241, 1754]},
  3: {'size': [1241, 1754]},
  4: {'size': [1241, 1754]},
  5: {'size': [1241, 1754]},
  6: {'size': [1241, 1754]},
  7: {'size': [1241, 1754]},
  8: {'size': [1241, 1754]},
  9: {'size': [1241, 1754]},
  10: {'size': [1241, 1754]},
  11: {'size': [1241, 1754]},
  12: {'size': [1241, 1754]},
  13: {'size': [1241, 1754]},
  14: {'size': [1241, 1754]},
  15: {'size': [1241, 1754]},
  16: {'size': [1241, 1754]},
  17: {'size': [1241, 1754]},
  18: {'size': [1241, 1754]},
  19: {'size': [1241, 1754]},
  20: {'size': [1241, 1754]},
  21: {'size': [1241, 1754]}},
 'page_elements': {0: {

In [32]:
state["page_elements"].keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

## 페이지 텍스트 추출

In [33]:
extract_page_text = parser_core.ExtractPageTextNode()
state_out = extract_page_text()(state)
state.update(state_out)
state["texts"]

{0: 'Argus BitumenEurope, Africa, Middle East and Asia-Pacific prices and commentary\nIncorporating Argus Asphalt ReportIssue 21-39 | Friday 1 October 2021SummaryBitumen cargo values gained across Europe and Asia-Pacific,\nboosted by a jump in crude and fuel oil values.Domestic truck prices across Europe also mostly\nstrengthened as October started.Cargo premiums to high-sulphur fuel oil (HSFO) in Rot-\nterdam and the Baltics fell by $2-3/t, but outright values\ncontinued to rise, while in the Mediterranean premium\nvalues were steady on the week as refiners held their ground\namid rising outright values and slowing demand.The Asia-Pacific bitumen market remained mostly firm,\ndespite the expected slowdown due to the week-long break\nin China for Golden Week holidays.The continued buoyancy in crude and HSFO prices in the\nAsia-Pacific region pushed refiners to either seek price hikes\nor mull production cuts at their units. Supply for October\nwas seen to be balanced in key export expo

In [34]:
page_summary_node = parser_core.CreatePageSummaryNode(
    api_key=os.environ.get("OPENAI_API_KEY")
)
state_out = page_summary_node()(state)
state.update(state_out)
state["text_summary"]

{0: "- Bitumen cargo values increased in Europe and Asia-Pacific due to rising crude and fuel oil prices.\n- Domestic truck prices in Europe mostly strengthened at the start of October.\n- Cargo premiums to high-sulphur fuel oil (HSFO) in Rotterdam and the Baltics decreased by $2-3/t, but outright values continued to rise.\n- Mediterranean premium values remained steady amid rising outright values and slowing demand.\n- The Asia-Pacific bitumen market remained firm despite a slowdown expected during China's Golden Week holidays.\n- Crude and HSFO prices in Asia-Pacific led refiners to consider price hikes or production cuts.\n- Supply for October was balanced in key export locations like Singapore and South Korea due to lower bitumen production.\n- Demand is anticipated to improve from Vietnam, Australia, China, and India due to seasonal factors and easing pandemic restrictions.\n- Bitumen prices surged in sub-Saharan Africa, with higher delivered cargo and drummed import prices, as we

## Image, Table 요약을 위한 데이터 배치 생성

In [35]:
state["page_elements"][2]["image_elements"][0]

{'bounding_box': [{'x': 89, 'y': 1228},
  {'x': 597, 'y': 1228},
  {'x': 597, 'y': 1557},
  {'x': 89, 'y': 1557}],
 'category': 'figure',
 'html': '<figure><img id=\'52\' style=\'font-size:14px\' alt="Southern UK domestic Rotterdam domestic\n550\n500\n450 hhhhhhhhhhh\n400\n350\n22 Jan 16 Apr 9 Jul 1 Oct" data-coord="top-left:(89,1228); bottom-right:(597,1557)" /></figure>',
 'id': 52,
 'page': 2,
 'text': 'Southern UK domestic Rotterdam domestic\n550\n500\n450 hhhhhhhhhhh\n400\n350\n22 Jan 16 Apr 9 Jul 1 Oct'}

In [36]:
print(state["text_summary"][2])

- Domestic truck prices increased in several markets due to October supply revisions.
- German domestic prices rose by €10/t; Benelux, UK, and France saw gains of €20/t, £20/t, and €20-25/t respectively.
- Not all monthly deals were confirmed by 1 October; further details expected on 4 October.
- Czech domestic and export prices strengthened due to tight refinery supply, despite overall plentiful availability in central and southeast Europe.
- High-sulphur fuel oil (HSFO) price increases in late September raised bitumen values, but early winter signs led to a $2-3/t decrease in Rotterdam and Baltic cargo export premiums, assessed at $25-30/t and around $20/t fob respectively.
- Cross regional freight rates for 5,000t cargoes showed slight declines: £22-24/t from Rotterdam, £29-31/t from Hamburg, £47-49/t from Klaipeda, and £39-43/t from La Coruna.
- UK domestic truck prices were assessed at £380-390/t ex-works and £395-405/t delivered after October price hikes.


## Image, Table 요약 추출

In [37]:
image_summary_node = parser_core.CreateImageSummaryNode(
    api_key=os.environ.get("OPENAI_API_KEY")
)
state_out = image_summary_node()(state)
state.update(state_out)
state["image_summary"]

{15: '<image>\n<title>Bitumen Cargo Prices Trend</title>\n<summary>This line graph illustrates the trend of bitumen cargo prices from January 22 to October 1. The prices show a general upward trajectory, with fluctuations throughout the months, indicating increased values influenced by rising crude and fuel oil prices in Europe and Asia-Pacific.</summary>\n<entities>\n- Bitumen cargo prices\n- Europe\n- Asia-Pacific\n- Crude oil prices\n- High-sulphur fuel oil (HSFO)\n- Rotterdam\n- Baltics\n- Mediterranean\n- Vietnam\n- Australia\n- China\n- India\n- Sub-Saharan Africa\n- South Africa\n</entities>\n</image>',
 23: '<image>\n<title>Global Bitumen Export Prices</title>\n<summary>This image displays the export prices of bitumen from various regions, highlighting the prices per ton in different countries. Spain has a price of $460/t, while other notable prices include Ivory Coast at $552/t and Iran at $298/t.</summary>\n<entities>\n- Spain: $460/t\n- Ivory Coast: $552/t\n- Iran: $298/t\n-

In [38]:
table_summary_node = parser_core.CreateTableSummaryNode(
    api_key=os.environ.get("OPENAI_API_KEY")
)
state_out = table_summary_node()(state)
state.update(state_out)
state["table_summary"]

{12: "<table>\n<title>Bitumen Prices at Key Locations (25 Sep - 1 Oct)</title>\n<table_summary>Summary of bitumen prices across various regions, highlighting export and domestic values.</table_summary>\n<key_entities>\n- Mediterranean: $448.58 - $453.58\n- Rotterdam: $460.90 - $465.90\n- Baltic: $453.90 - $457.90\n- Singapore: $405 - $415\n- South Korea: $395 - $405\n- North Africa (Alexandria): $486 - $496\n- East Africa (Mombasa): $465 - $475\n- West Africa (Lagos): $600 - $610\n- East China coast: $425 - $475\n- Antwerp: $490 - $507\n- Southern Germany: $431 - $449\n- Hungary: $443 - $455\n- Italy: $455 - $466\n- Indonesia: $444\n- Mumbai: $544 - $594\n</key_entities>\n<data_insights>\n- Bitumen prices increased in Europe and Asia-Pacific due to rising crude and fuel oil prices.\n- Domestic truck prices in Europe strengthened at the start of October.\n- Cargo premiums to HSFO in Rotterdam and the Baltics decreased by $2-3/t, while outright values rose.\n- Mediterranean premium value

## Table Markdown 추출

In [39]:
import graphrags.core as parser_core
import graphrags.layout_utils as layout_utils
import graphrags.pdf as pdf

import importlib

importlib.reload(parser_core)
importlib.reload(layout_utils)
importlib.reload(pdf)

<module 'graphrags.pdf' from '/Users/teddy/Dev/github/teddynote-parser/graphrags/pdf.py'>

In [40]:
table_markdown_extractor = parser_core.TableMarkdownExtractorNode()
state_out = table_markdown_extractor()(state)
state.update(state_out)

## 결과물

In [41]:
state.keys()

dict_keys(['filepath', 'split_filepaths', 'analyzed_files', 'page_metadata', 'page_elements', 'page_numbers', 'images', 'tables', 'texts', 'text_summary', 'image_summary', 'table_summary_data_batches', 'table_summary', 'table_markdown'])

In [42]:
state["table_summary"]

{12: "<table>\n<title>Bitumen Prices at Key Locations (25 Sep - 1 Oct)</title>\n<table_summary>Summary of bitumen prices across various regions, highlighting export and domestic values.</table_summary>\n<key_entities>\n- Mediterranean: $448.58 - $453.58\n- Rotterdam: $460.90 - $465.90\n- Baltic: $453.90 - $457.90\n- Singapore: $405 - $415\n- South Korea: $395 - $405\n- North Africa (Alexandria): $486 - $496\n- East Africa (Mombasa): $465 - $475\n- West Africa (Lagos): $600 - $610\n- East China coast: $425 - $475\n- Antwerp: $490 - $507\n- Southern Germany: $431 - $449\n- Hungary: $443 - $455\n- Italy: $455 - $466\n- Indonesia: $444\n- Mumbai: $544 - $594\n</key_entities>\n<data_insights>\n- Bitumen prices increased in Europe and Asia-Pacific due to rising crude and fuel oil prices.\n- Domestic truck prices in Europe strengthened at the start of October.\n- Cargo premiums to HSFO in Rotterdam and the Baltics decreased by $2-3/t, while outright values rose.\n- Mediterranean premium value

In [43]:
state["table_markdown"]

{12: '| Location                     | Low   | High  | ±      |\n|------------------------------|-------|-------|--------|\n| **Export cargo prices fob**  |       |       |        |\n| Mediterranean                 | 448.58| 453.58| +20.93 |\n| Rotterdam                     | 460.90| 465.90| +18.95 |\n| Baltic                        | 453.90| 457.90| +18.95 |\n| Singapore                     | 405   | 415   | +5.00  |\n| South Korea                   | 395   | 405   | nc     |\n| Mideast Gulf                  | 298   | 425   | +5.50  |\n| **Delivered cargo prices cfr**|       |       |        |\n| North Africa                  | 486   | 496   | +18.00 |\n| East Africa                   | 465   | 475   | +26.00 |\n| West Africa                   | 600   | 610   | +21.00 |\n| East China coast              | 425   | 475   | +2.50  |\n| **Domestic prices**           |       |       |        |\n| Antwerp                       | 490   | 507   | +20.50 |\n| Southern Germany              | 431

In [44]:
state["image_summary"]

{15: '<image>\n<title>Bitumen Cargo Prices Trend</title>\n<summary>This line graph illustrates the trend of bitumen cargo prices from January 22 to October 1. The prices show a general upward trajectory, with fluctuations throughout the months, indicating increased values influenced by rising crude and fuel oil prices in Europe and Asia-Pacific.</summary>\n<entities>\n- Bitumen cargo prices\n- Europe\n- Asia-Pacific\n- Crude oil prices\n- High-sulphur fuel oil (HSFO)\n- Rotterdam\n- Baltics\n- Mediterranean\n- Vietnam\n- Australia\n- China\n- India\n- Sub-Saharan Africa\n- South Africa\n</entities>\n</image>',
 23: '<image>\n<title>Global Bitumen Export Prices</title>\n<summary>This image displays the export prices of bitumen from various regions, highlighting the prices per ton in different countries. Spain has a price of $460/t, while other notable prices include Ivory Coast at $552/t and Iran at $298/t.</summary>\n<entities>\n- Spain: $460/t\n- Ivory Coast: $552/t\n- Iran: $298/t\n-

In [45]:
state["text_summary"]

{0: "- Bitumen cargo values increased in Europe and Asia-Pacific due to rising crude and fuel oil prices.\n- Domestic truck prices in Europe mostly strengthened at the start of October.\n- Cargo premiums to high-sulphur fuel oil (HSFO) in Rotterdam and the Baltics decreased by $2-3/t, but outright values continued to rise.\n- Mediterranean premium values remained steady amid rising outright values and slowing demand.\n- The Asia-Pacific bitumen market remained firm despite a slowdown expected during China's Golden Week holidays.\n- Crude and HSFO prices in Asia-Pacific led refiners to consider price hikes or production cuts.\n- Supply for October was balanced in key export locations like Singapore and South Korea due to lower bitumen production.\n- Demand is anticipated to improve from Vietnam, Australia, China, and India due to seasonal factors and easing pandemic restrictions.\n- Bitumen prices surged in sub-Saharan Africa, with higher delivered cargo and drummed import prices, as we

In [46]:
state["texts"]

{0: 'Argus BitumenEurope, Africa, Middle East and Asia-Pacific prices and commentary\nIncorporating Argus Asphalt ReportIssue 21-39 | Friday 1 October 2021SummaryBitumen cargo values gained across Europe and Asia-Pacific,\nboosted by a jump in crude and fuel oil values.Domestic truck prices across Europe also mostly\nstrengthened as October started.Cargo premiums to high-sulphur fuel oil (HSFO) in Rot-\nterdam and the Baltics fell by $2-3/t, but outright values\ncontinued to rise, while in the Mediterranean premium\nvalues were steady on the week as refiners held their ground\namid rising outright values and slowing demand.The Asia-Pacific bitumen market remained mostly firm,\ndespite the expected slowdown due to the week-long break\nin China for Golden Week holidays.The continued buoyancy in crude and HSFO prices in the\nAsia-Pacific region pushed refiners to either seek price hikes\nor mull production cuts at their units. Supply for October\nwas seen to be balanced in key export expo

In [48]:
state["table_summary_data_batches"]

[{'table': 'data/sample-report/12.png',
  'text': "- Bitumen cargo values increased in Europe and Asia-Pacific due to rising crude and fuel oil prices.\n- Domestic truck prices in Europe mostly strengthened at the start of October.\n- Cargo premiums to high-sulphur fuel oil (HSFO) in Rotterdam and the Baltics decreased by $2-3/t, but outright values continued to rise.\n- Mediterranean premium values remained steady amid rising outright values and slowing demand.\n- The Asia-Pacific bitumen market remained firm despite a slowdown expected during China's Golden Week holidays.\n- Crude and HSFO prices in Asia-Pacific led refiners to consider price hikes or production cuts.\n- Supply for October was balanced in key export locations like Singapore and South Korea due to lower bitumen production.\n- Demand is anticipated to improve from Vietnam, Australia, China, and India due to seasonal factors and easing pandemic restrictions.\n- Bitumen prices surged in sub-Saharan Africa, with higher de

In [47]:
state["text_summary"]

{0: "- Bitumen cargo values increased in Europe and Asia-Pacific due to rising crude and fuel oil prices.\n- Domestic truck prices in Europe mostly strengthened at the start of October.\n- Cargo premiums to high-sulphur fuel oil (HSFO) in Rotterdam and the Baltics decreased by $2-3/t, but outright values continued to rise.\n- Mediterranean premium values remained steady amid rising outright values and slowing demand.\n- The Asia-Pacific bitumen market remained firm despite a slowdown expected during China's Golden Week holidays.\n- Crude and HSFO prices in Asia-Pacific led refiners to consider price hikes or production cuts.\n- Supply for October was balanced in key export locations like Singapore and South Korea due to lower bitumen production.\n- Demand is anticipated to improve from Vietnam, Australia, China, and India due to seasonal factors and easing pandemic restrictions.\n- Bitumen prices surged in sub-Saharan Africa, with higher delivered cargo and drummed import prices, as we