- 2025년 9월 9일(화)

In [1]:
import base64
import json

# 예시 Base64 문자열 (로그에서 발췌한 AUTHN_INFO 값 일부)
b64_string = "eyJ4dG0iOiIyMDI1LTA4LTIxVDAwOjAxOjM1LjEwMyIsInhpZCI6IklEUC0xNmI0NmVlMzIzM2ZkODM1NjIxMDBkMzJkYjYwYjIyOCIsImlkcHNlc3Npb24iOiJDQjI2MkI5OUZEMDE2MDVEN0YxMUQxNjJDQTVCRDk5RCIsInRva2VuIjoiW1VTRVJdXG5QV19DSEFOR0VfV0FSTj03XG5OT1RfQUZURVI9MjAyNTA4MjExMDAxMzVcbk5PV19MT0dJTl9JUD0xMjcuMC4wLjFcblNFU1NJT05fVElNRT0xMFxuTk9XX0xPR0lOX1RJTUU9MjAyNTA4MjEwOTAxMzVcbk5BTUU97ZmN6ri464+ZXG5QV19WQUxJREFURT05MFxuUE9MTElOR19USU1FPTMwXG5MT0dJTl9UWVBFPUlEX1BXXG5MQVNUX0xPR0lOX1RJTUU9MjAyNTA4MjEwOTAxMTNcblBXX01JU01BVENIX0FMTE9XPTVcblBXX1VQREFURV9EQVlTPTBcblRJTUVTVEFNUD0yMDI1MDgyMTA5MDEzNTA1OVxuTEFTVF9MT0dJTl9JUD0xMjcuMC4wLjFcbklEPXNzb3VzZXJcblxuW0FQUExERUZBVUxUXSJ9"

# 1. Base64 디코딩
decoded_bytes = base64.b64decode(b64_string)

# 2. JSON 문자열 변환
decoded_str = decoded_bytes.decode("utf-8")

# 3. JSON 파싱
try:
    data = json.loads(decoded_str)
    print("디코딩된 JSON:", data)
except json.JSONDecodeError:
    print("JSON으로 파싱 불가. 원문 출력:")
    print(decoded_str)

디코딩된 JSON: {'xtm': '2025-08-21T00:01:35.103', 'xid': 'IDP-16b46ee3233fd83562100d32db60b228', 'idpsession': 'CB262B99FD01605D7F11D162CA5BD99D', 'token': '[USER]\nPW_CHANGE_WARN=7\nNOT_AFTER=20250821100135\nNOW_LOGIN_IP=127.0.0.1\nSESSION_TIME=10\nNOW_LOGIN_TIME=20250821090135\nNAME=홍길동\nPW_VALIDATE=90\nPOLLING_TIME=30\nLOGIN_TYPE=ID_PW\nLAST_LOGIN_TIME=20250821090113\nPW_MISMATCH_ALLOW=5\nPW_UPDATE_DAYS=0\nTIMESTAMP=20250821090135059\nLAST_LOGIN_IP=127.0.0.1\nID=ssouser\n\n[APPLDEFAULT]'}


In [None]:
data_path = "/home/kongju/DEV/dream/DATA/LOGS/sp2__ssoagent_20250821.log"



In [2]:
import re
import base64
import json
import pandas as pd

def extract_authn_info_from_log(filepath: str):
    """
    로그 파일에서 AUTHN_INFO Base64 문자열을 추출하고 디코딩 → JSON 파싱까지 수행
    """
    with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
        content = f.read()

    # AUTHN_INFO 블록 찾기 (AttributeValue 안의 Base64 문자열 캡처)
    matches = re.findall(
        r'<saml2:Attribute Name="AUTHN_INFO">.*?<saml2:AttributeValue.*?>(.*?)</saml2:AttributeValue>',
        content,
        re.DOTALL
    )

    results = []
    for idx, b64_str in enumerate(matches, 1):
        try:
            decoded_str = base64.b64decode(b64_str).decode("utf-8", errors="ignore")
            parsed_json = json.loads(decoded_str)
        except Exception as e:
            parsed_json = {"error": str(e), "raw_decoded": decoded_str}

        results.append({"index": idx, "base64": b64_str.strip(), "parsed": parsed_json})

    return results


def flatten_authn_info(parsed_json: dict):
    """
    AUTHN_INFO JSON에서 token 내부의 Key=Value를 추출해 DataFrame으로 반환
    """
    top = {k: v for k, v in parsed_json.items() if k != "token"}
    token_text = parsed_json.get("token", "")
    kv = {}
    in_user = False
    for line in token_text.splitlines():
        line = line.strip()
        if not line:
            if in_user:
                break
            continue
        if line.startswith("[") and line.endswith("]"):
            section = line.strip("[]").upper()
            if section == "USER":
                in_user = True
                continue
            elif in_user:
                break
            continue
        if in_user and "=" in line:
            k, v = line.split("=", 1)
            kv[k.strip()] = v.strip()

    rows = [{"scope": "top", "key": k, "value": v} for k, v in top.items()]
    rows += [{"scope": "token.USER", "key": k, "value": v} for k, v in kv.items()]
    return pd.DataFrame(rows, columns=["scope", "key", "value"])



In [3]:
log_file = "/home/kongju/DEV/dream/DATA/LOGS/sp2__ssoagent_20250821.log"

# AUTHN_INFO 추출
results = extract_authn_info_from_log(log_file)

for r in results:
    print(f"\n=== AUTHN_INFO #{r['index']} ===")
    print("Base64 snippet:", r["base64"][:80], "...")
    if isinstance(r["parsed"], dict) and "token" in r["parsed"]:
        df = flatten_authn_info(r["parsed"])
        print(df)
    else:
        print("Parsing 실패:", r["parsed"])


=== AUTHN_INFO #1 ===
Base64 snippet: eyJ4dG0iOiIyMDI1LTA4LTIxVDA1OjI0OjU4LjkwOSIsInhpZCI6IklEUC1hNWY4MWIwMzRmMzFjZGUz ...
         scope                key                                 value
0          top                xtm               2025-08-21T05:24:58.909
1          top                xid  IDP-a5f81b034f31cde385fdd91cec985bec
2          top         idpsession      7CA150B783D3829CB734219083500681
3   token.USER     PW_CHANGE_WARN                                     7
4   token.USER          NOT_AFTER                        20250821152436
5   token.USER       NOW_LOGIN_IP                             127.0.0.1
6   token.USER       SESSION_TIME                                    10
7   token.USER     NOW_LOGIN_TIME                        20250821142436
8   token.USER               NAME                                   홍길동
9   token.USER        PW_VALIDATE                                    90
10  token.USER       POLLING_TIME                                    30
11  token.US

In [6]:
df.to_excel("sp2__ssoagent_20250821_authn_info.xlsx")