In [29]:
import psycopg
import config
import pandas as pd
from typing import List, Dict, Any
from datetime import datetime

DB 연결

In [30]:
def get_db_connection():
    try:
        conn = psycopg.connect(
            dbname=config.DB_NAME,
            user=config.DB_USER,
            password=config.DB_PASSWORD,
            host=config.DB_HOST,
            port=config.DB_PORT
        )
        print("데이터베이스에 성공적으로 연결되었습니다.")
        return conn
    except Exception as e:
        print(f"데이터베이스 연결 실패: {e}")
        return None

In [31]:
def list_tables():
    conn = get_db_connection()
    if conn is None:
        return []

    try:
        with conn.cursor() as cur:
            cur.execute("""
                SELECT table_name
                FROM information_schema.tables
                WHERE table_schema = 'public'
                ORDER BY table_name;
            """)
            tables = cur.fetchall()
        conn.close()
        return [t[0] for t in tables]  # [(‘table1’,), (‘table2’,)] → [‘table1’, ‘table2’]
    except Exception as e:
        print(f"테이블 조회 실패: {e}")
        return []


In [32]:
list_tables()

데이터베이스에 성공적으로 연결되었습니다.


['bkpf',
 'bseg',
 'likp',
 'lips',
 'mkpf',
 'mseg',
 'vbak',
 'vbap',
 'vbfa',
 'vbrk',
 'vbrp']

In [33]:
def list_columns(table_name, schema="public"):
    """
    information_schema로 컬럼 목록을 가져옴 (순서 포함)
    """
    conn = get_db_connection()
    if conn is None:
        return []

    sql = """
    SELECT
        column_name,
        data_type
    FROM information_schema.columns
    WHERE table_schema = %s AND table_name = %s
    ORDER BY ordinal_position;
    """
    with conn, conn.cursor() as cur:
        cur.execute(sql, (schema, table_name))
        rows = cur.fetchall()

    # psycopg3는 기본적으로 튜플 반환
    cols = [
        {
            "name": r[0],
            "type": r[1],
        }
        for r in rows
    ]
    return cols


In [34]:
# 사용 예시
cols = list_columns("bkpf")
cols_list=[]
for c in cols:
    cols_list.append(c["name"])
print(cols_list)

데이터베이스에 성공적으로 연결되었습니다.
['belnr', 'xblnr', 'waers', 'cpudt', 'budat']


In [35]:
def insert_excel_to_table(excel_file, table_name, schema="public"):
    """Excel 파일을 DB 테이블에 바로 삽입"""
    
    # 1. DB 컬럼 목록 가져오기
    cols = list_columns(table_name, schema)
    db_columns = [c["name"] for c in cols]
    print(f"DB 컬럼: {db_columns}")
    
    # 2. Excel 읽기
    df = pd.read_excel(excel_file, dtype=str)
    print(f"Excel 컬럼: {list(df.columns)}")
    
    # 3. 컬럼명 정리 (소문자, 공백제거)
    df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
    
    # 4. 매칭되는 컬럼만 선택
    matched_cols = [col for col in db_columns if col in df.columns]
    print(f"매칭된 컬럼: {matched_cols}")
    
    if not matched_cols:
        print("매칭되는 컬럼이 없습니다.")
        return False
    
    # 5. 매칭된 컬럼만으로 데이터 준비
    insert_df = df[matched_cols].fillna('')  # NaN을 빈문자열로
    
    # 6. DB 연결해서 INSERT
    conn = get_db_connection()
    if conn is None:
        return False
    
    try:
        with conn, conn.cursor() as cur:
            # INSERT SQL 생성
            cols_str = ", ".join([f'"{col}"' for col in matched_cols])
            placeholders = ", ".join(["%s"] * len(matched_cols))
            sql = f'INSERT INTO {schema}."{table_name}" ({cols_str}) VALUES ({placeholders})'
            
            # 데이터 삽입
            data = [tuple(row) for row in insert_df.values]
            cur.executemany(sql, data)
            
            print(f"{len(data)}개 행이 삽입되었습니다.")
            return True
            
    except Exception as e:
        print(f"삽입 오류: {e}")
        return False

In [36]:
def format_number_column_excel(file_path: str, column_name: str):
    df = pd.read_excel(file_path, engine='openpyxl', dtype=str)

    if column_name not in df.columns:
        raise ValueError(f"컬럼 '{column_name}' 이 엑셀에 존재하지 않습니다.")

    df[column_name] = pd.to_numeric(df[column_name], errors="coerce")
    df[column_name] = df[column_name].apply(
        lambda x: f"{x:,.2f}".replace(".", ",") if pd.notnull(x) else ""
    )
    df.to_excel(file_path, index=False, engine='openpyxl')
    return df


In [37]:
table_name = "mseg"
excel_file = "Table_MSEG(Material Document Segment).XLSX"

In [38]:
list_columns(table_name)

데이터베이스에 성공적으로 연결되었습니다.


[{'name': 'mblnr', 'type': 'character varying'},
 {'name': 'zeile', 'type': 'character varying'},
 {'name': 'vbeln_im', 'type': 'character varying'},
 {'name': 'vbelp_im', 'type': 'character varying'},
 {'name': 'menge', 'type': 'character varying'},
 {'name': 'meins', 'type': 'character varying'},
 {'name': 'dmbtr', 'type': 'character varying'},
 {'name': 'waers', 'type': 'character varying'},
 {'name': 'matnr', 'type': 'character varying'},
 {'name': 'cpudt_mkpf', 'type': 'character varying'}]

In [39]:
# result = format_number_column_excel(excel_file, "WRBTR")

In [40]:
# insert_excel_to_table(excel_file, table_name)

In [41]:
# conn= get_db_connection()
# with conn.cursor() as cur:
#     sql = f"""SELECT * FROM {table_name}"""
#     cur.execute(sql)
#     df = pd.DataFrame(cur.fetchall(), columns=[desc[0] for desc in cur.description])
#     print(df)

# conn.close()

------

전체 경로 조회

In [42]:
def get_document_flow_path(sales_order_number, connection):
   
    if not connection:
        return []
        
    print(f"1단계: VBFA 테이블에서 Document Flow 경로를 조회 중입니다... (Sales Order: {sales_order_number})")
    
    query = """
    SELECT
        VBELV AS pre_doc_no,
        VBELN AS post_doc_no,
        VBTYP_V AS pre_doc_type,
        VBTYP_N AS post_doc_type
    FROM
        VBFA
    WHERE
        (VBELV = LPAD(%s, 10, '0') AND VBTYP_N = 'J')
        OR VBELV IN (
            SELECT VBELN
            FROM VBFA
            WHERE VBELV = LPAD(%s, 10, '0') AND VBTYP_N = 'J'
        )
    ORDER BY
        ERDAT,
        ERZET;
    """
    
    try:
        cursor = connection.cursor()
        # 쿼리를 실행하고 파라미터를 안전하게 전달
        cursor.execute(query, (sales_order_number, sales_order_number))  # ← 튜플로 한 번에
        columns = [c.name if hasattr(c, "name") else c[0] for c in cursor.description]
        path_results = [dict(zip(columns, row)) for row in cursor.fetchall()]
        return path_results
    except Exception as e:
        print(f"쿼리 실행 중 오류 발생: {e}")
        return []

실행 (전체 경로 조회)

In [43]:
# sales_no = input(int())
# conn = get_db_connection()
# doc_path = get_document_flow_path(sales_no, conn)
# pd.DataFrame(doc_path)

document별 데이터 출력

In [44]:
def execute_sql(conn, sql, params=None):
    if conn is None:
        raise ValueError("연결된 DB가 없습니다 (conn is None).")

    try:
        with conn.cursor() as cur:
            if params is not None:
                cur.execute(sql, params)
            else:
                cur.execute(sql)

            rows = cur.fetchall()
            cols = [desc[0] for desc in cur.description] if cur.description else []
            return pd.DataFrame(rows, columns=cols)
    except Exception as e:
        conn.rollback()
        print("SQL 실패:\n", sql)
        print("params:", params)
        raise e

In [45]:
def get_sales_order_details(conn, sales_order_number):
    """Sales Order 상세 데이터를 조회"""
    #print(f" - Sales Order ({sales_order_number}) 조회 중...")
    query = """
    SELECT
      'Sales Order' AS document_type,
      VBAK.VBELN AS doc_no,
      VBAP.POSNR AS item,
      VBAP.VGBEL AS preced_doc,
      VBAP.VGPOS AS orig_item,
      VBAP.KWMENG AS quantity,
      VBAP.VRKME AS unit,
      VBAP.NETWR AS ref_value,
      VBAK.WAERK AS curr,
      VBAK.ERDAT AS created_on,
      VBAP.MATNR AS material,
      VBAP.ARKTX AS description,
      VBAK.GBSTK AS status,
      'C' AS doc_code
    FROM VBAK
    JOIN VBAP ON VBAK.VBELN = VBAP.VBELN
    WHERE VBAK.VBELN = LPAD(%s, 10, '0')
    """
    return execute_sql(conn, query, (sales_order_number,))


def get_outbound_delivery_details(conn, doc_no, pre_doc_no=None):
    """Outbound Delivery (J) 상세 데이터를 조회"""
    #print(f" - Outbound Delivery ({doc_no}) 조회 중...")
    query = """ 
    SELECT
      'Outbound Delivery' AS document_type,
      LIKP.VBELN AS doc_no,
      LIPS.POSNR AS item,
      LIPS.VGBEL AS preced_doc,
      LIPS.VGPOS AS orig_item,
      LIPS.LFIMG AS quantity,
      LIPS.VRKME AS unit,
      LIKP.ERDAT AS created_on,
      LIPS.MATNR AS material,
      LIPS.ARKTX AS description,
      LIKP.GBSTK AS status,
      'J' AS doc_code
    FROM LIKP
    JOIN LIPS ON LIKP.VBELN = LIPS.VBELN
    WHERE LIKP.VBELN = %s
    """
    return execute_sql(conn, query, (doc_no,))


def get_picking_request_details(conn, doc_no, pre_doc_no):
    """Picking Request (Q) 상세 데이터를 조회"""
    #print(f" - Picking Request ({doc_no}) 조회 중...")
    date_obj = datetime.strptime(doc_no, '%Y%m%d')
    formatted_doc_no = date_obj.strftime('%Y-%m-%d')
    #print(f"Formatted Picking Request No: {formatted_doc_no}")
    query = """ 
    SELECT
      'Picking Request' AS document_type,
      LIKP.ERDAT AS doc_no,
      LIPS.POSNR AS item,
      LIKP.VBELN AS preced_doc,
      LIPS.VGPOS AS orig_item,
      LIPS.LFIMG AS quantity,
      LIPS.VRKME AS unit,
      LIKP.ERDAT AS created_on,
      LIPS.MATNR AS material,
      LIPS.ARKTX AS description,
      LIKP.GBSTK AS status,
      'Q' AS doc_code
    FROM LIKP
    JOIN LIPS ON LIKP.VBELN = LIPS.VBELN
    WHERE LIKP.ERDAT = %s AND LIKP.VBELN = %s
    """
    result = execute_sql(conn, query, (formatted_doc_no, pre_doc_no,))
    result['doc_no'] = result['doc_no'].astype(str).str.replace('-', '', regex=False)
    return result


def get_goods_issue_details(conn, doc_no, pre_doc_no=None):
    """GD Goods Issue (R) 상세 데이터를 조회"""
    #print(f" - GD Goods Issue ({doc_no}) 조회 중...")
    query = """
    SELECT
      'GD Goods Issue' AS document_type,
      MSEG.MBLNR AS doc_no,
      MSEG.ZEILE AS item,
      MSEG.VBELN_IM AS preced_doc,
      MSEG.VBELP_IM AS orig_item,
      MSEG.MENGE AS quantity,
      MSEG.MEINS AS unit,
      MSEG.DMBTR AS ref_value,
      MSEG.WAERS AS curr,
      MSEG.CPUDT_MKPF AS created_on,
      MSEG.MATNR AS material,
      LIPS.ARKTX AS description,
      LIKP.WBSTK AS status,
      'R' AS doc_code
    FROM MSEG
    LEFT JOIN LIPS ON MSEG.VBELN_IM = LIPS.VBELN AND MSEG.VBELP_IM = LIPS.POSNR
    LEFT JOIN LIKP ON LIPS.VBELN = LIKP.VBELN
    WHERE MSEG.MBLNR = %s
"""
    return execute_sql(conn, query, (doc_no,))


def get_re_goods_delivery_details(conn, doc_no, pre_doc_no=None):
    """RE goods delivery (h) 상세 데이터를 조회"""
    #print(f" - RE goods delivery ({doc_no}) 조회 중...")
    query = """
    SELECT
      'RE Goods Delivery' AS document_type,
      MSEG.MBLNR AS doc_no,
      MSEG.ZEILE AS item,
      MSEG.VBELN_IM AS preced_doc,
      MSEG.VBELP_IM AS orig_item,
      MSEG.MENGE AS quantity,
      MSEG.MEINS AS unit,
      MSEG.DMBTR AS ref_value,
      MSEG.WAERS AS curr,
      MSEG.CPUDT_MKPF AS created_on,
      MSEG.MATNR AS material,
      LIPS.ARKTX AS description,
      LIKP.GBSTK AS status,
      'h' AS doc_code
    FROM MSEG
    LEFT JOIN LIPS ON MSEG.VBELN_IM = LIPS.VBELN AND MSEG.VBELP_IM = LIPS.POSNR
    LEFT JOIN LIKP ON LIPS.VBELN = LIKP.VBELN
    WHERE MSEG.MBLNR = %s
    """
    return execute_sql(conn, query, (doc_no,))


def get_invoice_details(conn, doc_no, pre_doc_no=None):
    """Invoice (M) 상세 데이터를 조회"""
    #print(f" - Invoice ({doc_no}) 조회 중...")
    query = """
    SELECT
        'Invoice' AS document_type,
        VBRK.VBELN AS doc_no,
        VBRP.POSNR AS item,
        VBRP.VGBEL AS preced_doc,
        VBRP.VGPOS AS orig_item,
        VBRP.FKIMG AS quantity,
        VBRP.VRKME AS unit,
        VBRP.NETWR AS ref_value,
        VBRK.WAERK AS curr,
        VBRK.ERDAT AS created_on,
        VBRP.MATNR AS material,
        VBRP.ARKTX AS description,
        VBRK.GBSTK AS status,
        'M' as doc_code
    FROM VBRK
    JOIN VBRP ON VBRK.VBELN = VBRP.VBELN
    WHERE VBRK.VBELN = %s
    """
    return execute_sql(conn, query, (doc_no,))


def get_cancel_invoice_details(conn, doc_no, pre_doc_no=None):
    """Cancel Invoice (N) 상세 데이터를 조회"""
    #print(f" - Cancel Invoice ({doc_no}) 조회 중...")
    query = """
    SELECT
      'Cancel Invoice' AS document_type,
      VBRK.VBELN AS doc_no,         
      VBRP.POSNR AS item,          
      VBRK.STBLG AS preced_doc,    
      VBRP.VGPOS AS orig_item,    
      VBRP.FKIMG AS quantity,   
      VBRP.VRKME AS unit,     
      VBRP.NETWR AS ref_value,     
      VBRK.WAERK AS curr,         
      VBRK.ERDAT AS created_on,   
      VBRP.MATNR AS material,       
      VBRP.ARKTX AS description,  
      VBRK.GBSTK AS status,
      'N' AS doc_code          
    FROM VBRK
    JOIN VBRP ON VBRP.VBELN = VBRK.VBELN
    WHERE VBRK.VBELN = %s
    """
    return execute_sql(conn, query, (doc_no,))
  

def get_journal_entry_details(conn, doc_no):
    """Cancel Invoice - Journal Entry 상세 데이터를 조회"""
    #print(f" - Journal Entry ({doc_no}) 조회 중...")
    query = """
    SELECT
      'Journal Entry' AS document_type,
      BSEG.BELNR AS doc_no,
      BSEG.VBELN as preced_doc,
      BSEG.WRBTR AS ref_value,
      BKPF.WAERS AS curr,
      BKPF.CPUDT AS created_on,
      CASE 
        WHEN BSEG.AUGBL IS NULL OR TRIM(BSEG.AUGBL) = '' 
        THEN 'Not Cleared' 
        ELSE 'Cleared' 
      END AS status,
      'E' AS doc_code
    FROM BSEG
    JOIN BKPF ON BKPF.BELNR = BSEG.BELNR
    WHERE BSEG.VBELN = %s AND BSEG.KOART = 'D'
    """
    return execute_sql(conn, query, (doc_no,))

In [46]:
conn = get_db_connection()

데이터베이스에 성공적으로 연결되었습니다.


In [47]:
# get_cancel_invoice_details(conn, '0090000024', '0080000019')

In [48]:
def get_document_details(sales_order_number, document_path, conn):
    """
    Document Flow 경로 목록을 기반으로 각 문서의 상세 데이터를 조회하는 함수.
    """
    final_data = []
    delivery_doc_no = None

    # Sales Order 상세 데이터 조회 (경로와 관계없이 항상 조회)
    so_df = get_sales_order_details(conn, sales_order_number)
    if not so_df.empty:
        #print(f"sales order 결과: {pd.DataFrame(so_df)}")
        final_data.append(so_df)

    print("2단계: 경로에 따라 후속 문서 상세 데이터 조회 중...")

    # 1) document_path가 DataFrame이면 row dict 리스트로 변환
    if isinstance(document_path, pd.DataFrame):
        rows = document_path.to_dict(orient='records')
    else:
        rows = document_path

    # 2) 각 row에서 post 문서만 추출하여 조회
    for row in rows:
        doc_no   = row.get('post_doc_no')
        doc_type = row.get('post_doc_type')
        result = pd.DataFrame() # 결과 초기화
        
        if doc_type == 'J':
            result = get_outbound_delivery_details(conn, doc_no)
            final_data.append(result)  
            #print(f"outbound delivery 결과: {pd.DataFrame(result)}")
            delivery_doc_no = result['doc_no'].iloc[0]
            #print(f"Delivery Doc No: {delivery_doc_no}")
        
        elif doc_type == 'Q':
            result = get_picking_request_details(conn, doc_no, pre_doc_no=delivery_doc_no)
            final_data.append(result)  
            #print(f"picking request 결과: {result}")
            
        
        elif doc_type == 'R':
            result = get_goods_issue_details(conn, doc_no, pre_doc_no=delivery_doc_no)
            final_data.append(result)  
            #print(f"goods issue 결과: {result}")

        
        elif doc_type == 'h':
            result = get_re_goods_delivery_details(conn, doc_no, pre_doc_no=delivery_doc_no)
            final_data.append(result)  
            #print(f"RE goods delivery 결과: {result}")


        elif doc_type == 'M':
            result = get_invoice_details(conn, doc_no, pre_doc_no=delivery_doc_no)
            #print(f"invoice 결과: {result}")
            final_data.append(result)  
            invoice_doc_no = result['doc_no'].iloc[0]

            journal_result = get_journal_entry_details(conn, invoice_doc_no)
            if journal_result is not None:
                #print(f"journal entry 결과: {journal_result}")
                final_data.append(journal_result)
        
        elif doc_type == 'N':
            result = get_cancel_invoice_details(conn, doc_no, pre_doc_no=delivery_doc_no)
            #print(f"cancel invoice 결과: {result}")
            final_data.append(result)  
            cancel_invoice_doc_no = result['doc_no'].iloc[0]

            journal_result = get_journal_entry_details(conn, cancel_invoice_doc_no)
            if journal_result is not None:
                #print(f"journal entry 결과: {journal_result}")
                final_data.append(journal_result)
     

    if final_data:
        return pd.concat(final_data, ignore_index=True)
    return pd.DataFrame()

실행

In [49]:
def run_documentflow_pipeline(sales_no):
    conn = get_db_connection()
    if conn:
        doc_path = get_document_flow_path(sales_no, conn)
        
        if not doc_path:
            sales_doc = get_sales_order_details(conn, sales_no)
            if sales_doc.empty:
                print(f"\nDocument Flow가 없습니다. (Sales Order: {sales_no})")
                return None
            print("\n최종결과:")
            print(pd.DataFrame(sales_doc))

        else:
            print("\n성공적으로 Document Flow 경로를 조회했습니다.")
            print(pd.DataFrame(doc_path))
            
            final_result = get_document_details(sales_no, doc_path, conn)
            
            print("\n최종결과:")
            #print(pd.DataFrame(final_result))
            return final_result
        
        conn.close()
    else:
        print("프로그램을 종료합니다.")

In [73]:
sales_no = input("조회할 Sales Order 번호를 입력하세요: ")
final_df = run_documentflow_pipeline(sales_no)
final_df

데이터베이스에 성공적으로 연결되었습니다.
1단계: VBFA 테이블에서 Document Flow 경로를 조회 중입니다... (Sales Order: 21)

성공적으로 Document Flow 경로를 조회했습니다.
   pre_doc_no post_doc_no pre_doc_type post_doc_type
0  0000000021  0080000019            C             J
1  0080000019    20240423            J             Q
2  0080000019  4900000219            J             R
3  0080000019  0090000024            J             M
4  0080000019  0090000025            J             N
5  0080000019  4900000225            J             h
6  0080000019  4900000226            J             R
7  0080000019  0090000026            J             M
2단계: 경로에 따라 후속 문서 상세 데이터 조회 중...

최종결과:


Unnamed: 0,document_type,doc_no,item,preced_doc,orig_item,quantity,unit,ref_value,curr,created_on,material,description,status,doc_code
0,Sales Order,21,10.0,,0.0,10.0,EA,250000000.0,KRW,2024-04-23,FERT301,KONA,C,C
1,Outbound Delivery,80000019,10.0,21.0,10.0,10.0,EA,,,2024-04-23,FERT301,KONA,C,J
2,Picking Request,20240423,10.0,80000019.0,10.0,10.0,EA,,,2024-04-23,FERT301,KONA,C,Q
3,GD Goods Issue,4900000219,1.0,80000019.0,10.0,10.0,EA,199095990.0,KRW,2024-04-23,FERT301,KONA,C,R
4,Invoice,90000024,10.0,80000019.0,10.0,10.0,EA,250000000.0,KRW,2024-04-23,FERT301,KONA,C,M
5,Journal Entry,90000024,,90000024.0,,,,250000000.0,KRW,2024-04-23,,,Cleared,E
6,Cancel Invoice,90000025,10.0,,10.0,10.0,EA,250000000.0,KRW,2024-04-24,FERT301,KONA,C,N
7,Journal Entry,100000330,,90000025.0,,,,250000000.0,KRW,2024-04-24,,,Cleared,E
8,RE Goods Delivery,4900000225,1.0,80000019.0,10.0,10.0,EA,199095990.0,KRW,2024-04-24,FERT301,KONA,C,h
9,GD Goods Issue,4900000226,1.0,80000019.0,10.0,10.0,EA,199095990.0,KRW,2024-04-24,FERT301,KONA,C,R


전체 결과 json 형태 변환 - along with edges information

In [51]:
#group 정의
SALES_ORDER = ['Sales Order']
DELIVERY = ['Outbound Delivery', 'Picking Request', 'GD Goods Issue', 'RE Goods Delivery']
BILLING = ['Invoice', 'Cancel Invoice']
ACCOUNTING = ['Journal Entry']

In [52]:
X_BY_GROUP = {
    "SALES_ORDER": 0,
    "DELIVERY": 150,
    "BILLING": 300,
    "ACCOUNTING": 450,
}
Y_GAP = 150

In [53]:
RULES: Dict[str, List[str]] = {
        "J": ["C"],            # Outbound Delivery <- Sales Order
        "Q": ["J"],            # Picking Request <- Outbound Delivery
        "R": ["Q", "h"],       # GD Goods Issue <- Picking Request
        "M": ["R"],            # Invoice <- GD Goods Issue
        "h": ["R"],            # RE Goods Delivery <- GD Goods Issue
        "N": ["M"],            # Cancel Invoice <- Invoice
        "E": ["M", "N"],       # Journal Entry <- Invoice or Cancel Invoice
    }

In [54]:
def get_group(doc_type: str) -> str:
    if doc_type in SALES_ORDER:  return "SALES_ORDER"
    if doc_type in DELIVERY:     return "DELIVERY"
    if doc_type in BILLING:      return "BILLING"
    if doc_type in ACCOUNTING:   return "ACCOUNTING"
    return "DELIVERY"  # fallback

In [61]:
# edges 정의
def build_flow_edges(df) -> List[Dict[str, Any]]:
    last_seen_idx : Dict[str, int] = {}
    edges: List[Dict[str, Any]] = []

    for i, row in df.reset_index(drop=True).iterrows():
        cur_code = str(row["doc_code"]).strip()

        if cur_code in RULES:
            candidates = RULES[cur_code]

            best_src = None
            best_idx = -1

            for cand_code in candidates:
                if cand_code in last_seen_idx and last_seen_idx[cand_code] < i:
                    if last_seen_idx[cand_code] > best_idx:
                        best_idx = last_seen_idx[cand_code]
                        best_src = cand_code
            
            if best_src:
                eid = f"{best_src}-{cur_code}-{i}"
                edges.append({
                    "id": eid,
                    "source": f"{best_src}-{best_idx}",
                    "target": f"{cur_code}-{i}",
                    "type": "lrstep",
                    "markerEnd": {"type": "ArrowClosed"}
                })
        
        last_seen_idx[cur_code] = i
    
    return edges

In [74]:
edges = build_flow_edges(final_df)
edges

[{'id': 'C-J-1',
  'source': 'C-0',
  'target': 'J-1',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'J-Q-2',
  'source': 'J-1',
  'target': 'Q-2',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'Q-R-3',
  'source': 'Q-2',
  'target': 'R-3',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'R-M-4',
  'source': 'R-3',
  'target': 'M-4',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'M-E-5',
  'source': 'M-4',
  'target': 'E-5',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'M-N-6',
  'source': 'M-4',
  'target': 'N-6',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'N-E-7',
  'source': 'N-6',
  'target': 'E-7',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'R-h-8',
  'source': 'R-3',
  'target': 'h-8',
  'type': 'lrstep',
  'markerEnd': {'type': 'ArrowClosed'}},
 {'id': 'h-R-9',
  'source': 'h-8',
  'target': 'R-9',
  'type': 'lrstep

In [83]:
# position 계산
def compute_positions(df, edges) -> Dict[str, Dict[str, int]]:
    meta: Dict[str, Dict[str, str]] = {}
    
    for i, row in df.iterrows():
        doc_code = str(row["doc_code"]).strip()
        dtype = str(row["document_type"]).strip()
        nid = f"{doc_code}-{i}"
        group = get_group(dtype)
        meta[nid] = {"doc_type": dtype, "doc_code": doc_code, "group": group}
    
    
    latest_y = 0
    position: Dict[str, Dict[str, int]] = {}  #결과 포지션

    #sales order
    for i, row in enumerate(df.itertuples(index=False), start=0):
        if getattr(row, "document_type") in SALES_ORDER:
            so_id = f"{str(getattr(row, 'doc_code')).strip()}-{i}"
            position[so_id] = {"x": X_BY_GROUP["SALES_ORDER"], "y": 0}
            break

    #엣지 순서에 따른 포지션 계산
    for e in edges:
        src, tgt = e["source"], e["target"]
        src_group, tgt_group = meta[src]["group"], meta[tgt]["group"]

        #source 배치 x시 처리
        if src not in position:
            sx = X_BY_GROUP[src_group]
            sy = latest_y
            position[src] = {"x": sx, "y": sy}
        
        #target 배치
        tx = X_BY_GROUP[tgt_group]
        if src_group == tgt_group:
            # 다른 레인이면 최근 y + 150
            latest_y += Y_GAP
            ty = latest_y
        else:
            # 다른 레인이면 source와 같은 y
            ty = position[src]["y"]
        
        position[tgt] = {"x": tx, "y": ty}
    
    #최종 meta 구성
    for nid, p in position.items():
        meta[nid]["position"] = p
    
    return meta

In [86]:
positions = compute_positions(final_df, edges)
print(positions)

{'C-0': {'doc_type': 'Sales Order', 'doc_code': 'C', 'group': 'SALES_ORDER', 'position': {'x': 0, 'y': 0}}, 'J-1': {'doc_type': 'Outbound Delivery', 'doc_code': 'J', 'group': 'DELIVERY', 'position': {'x': 150, 'y': 0}}, 'Q-2': {'doc_type': 'Picking Request', 'doc_code': 'Q', 'group': 'DELIVERY', 'position': {'x': 150, 'y': 150}}, 'R-3': {'doc_type': 'GD Goods Issue', 'doc_code': 'R', 'group': 'DELIVERY', 'position': {'x': 150, 'y': 300}}, 'M-4': {'doc_type': 'Invoice', 'doc_code': 'M', 'group': 'BILLING', 'position': {'x': 300, 'y': 300}}, 'E-5': {'doc_type': 'Journal Entry', 'doc_code': 'E', 'group': 'ACCOUNTING', 'position': {'x': 450, 'y': 300}}, 'N-6': {'doc_type': 'Cancel Invoice', 'doc_code': 'N', 'group': 'BILLING', 'position': {'x': 300, 'y': 450}}, 'E-7': {'doc_type': 'Journal Entry', 'doc_code': 'E', 'group': 'ACCOUNTING', 'position': {'x': 450, 'y': 450}}, 'h-8': {'doc_type': 'RE Goods Delivery', 'doc_code': 'h', 'group': 'DELIVERY', 'position': {'x': 150, 'y': 600}}, 'R-9':

In [88]:
#node json 구성
def build_nodes(positions):
    nodes = []
    for nid, info in positions.items():
        nodes.append({
            "id": nid,
            "type" : "doc_flow",
            "position": info["position"],
            "data": {"label": info["doc_type"]}
        })
    
    return nodes

In [89]:
build_nodes(positions)

[{'id': 'C-0',
  'type': 'doc_flow',
  'position': {'x': 0, 'y': 0},
  'data': {'label': 'Sales Order'}},
 {'id': 'J-1',
  'type': 'doc_flow',
  'position': {'x': 150, 'y': 0},
  'data': {'label': 'Outbound Delivery'}},
 {'id': 'Q-2',
  'type': 'doc_flow',
  'position': {'x': 150, 'y': 150},
  'data': {'label': 'Picking Request'}},
 {'id': 'R-3',
  'type': 'doc_flow',
  'position': {'x': 150, 'y': 300},
  'data': {'label': 'GD Goods Issue'}},
 {'id': 'M-4',
  'type': 'doc_flow',
  'position': {'x': 300, 'y': 300},
  'data': {'label': 'Invoice'}},
 {'id': 'E-5',
  'type': 'doc_flow',
  'position': {'x': 450, 'y': 300},
  'data': {'label': 'Journal Entry'}},
 {'id': 'N-6',
  'type': 'doc_flow',
  'position': {'x': 300, 'y': 450},
  'data': {'label': 'Cancel Invoice'}},
 {'id': 'E-7',
  'type': 'doc_flow',
  'position': {'x': 450, 'y': 450},
  'data': {'label': 'Journal Entry'}},
 {'id': 'h-8',
  'type': 'doc_flow',
  'position': {'x': 150, 'y': 600},
  'data': {'label': 'RE Goods Deliver