### 실제로 사기가 얼마나 발생한가???

-코스피 200 기준

In [None]:
import pandas as pd

file_path = 'data\상장법인목록.csv'
df = pd.read_csv(file_path, encoding='cp949')

df_simplified = df[['회사명', '종목코드']].copy()

df_simplified.columns = ['company_name', 'stock_code']

df_simplified['stock_code'] = df_simplified['stock_code'].astype(str).str.zfill(6)

print(df_simplified.head())

df_simplified.to_csv('data\kospi200_company_code.csv', index=False, encoding='utf-8-sig')


  company_name stock_code
0         한화비전     489790
1    HD현대마린솔루션     443060
2         에이피알     278470
3         엘앤에프     066970
4        포스코DX     022100


In [None]:
import requests
import zipfile
import io

api_key = "a3e8c0f55e2d783460b8337e65f89c08c66b9bc0"
url = f"https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={api_key}"

response = requests.get(url)

if response.status_code == 200:
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        z.extractall("corpcode_data")
        print("압축 풀림 완료: corpcode_data 폴더 확인")
else:
    print("에러 발생:", response.text)


압축 풀림 완료: corpcode_data 폴더 확인


코스피 200 종류, 기업 고유 코드, 종목코드 연결

In [None]:
import pandas as pd
import xml.etree.ElementTree as ET

# (1) 코스피 상장법인 목록 불러오기
df_kospi = pd.read_csv("data\상장법인목록.csv", encoding="cp949")[["회사명", "종목코드"]]
df_kospi.columns = ["corp_name", "stock_code"]
df_kospi["stock_code"] = df_kospi["stock_code"].astype(str).str.zfill(6)

# (2) CORPCODE.xml 직접 경로에서 파싱
tree = ET.parse("corpcode_data\CORPCODE.xml")
root = tree.getroot()

# (3) corp_code 정보 추출
corp_info = []
for child in root:
    corp_code = child.findtext("corp_code")
    corp_name = child.findtext("corp_name")
    stock_code = child.findtext("stock_code")
    corp_info.append({"corp_name": corp_name, "corp_code": corp_code, "stock_code": stock_code})

df_corp_code = pd.DataFrame(corp_info)

# (4) 코스피 기업과 corp_code 매핑
df_merged = pd.merge(df_kospi, df_corp_code, on="stock_code", how="inner")
df_merged = df_merged[["corp_name_x", "stock_code", "corp_code"]]
df_merged.columns = ["corp_name", "stock_code", "corp_code"]

# 확인
df_merged


  tree = ET.parse("corpcode_data\CORPCODE.xml")


Unnamed: 0,corp_name,stock_code,corp_code
0,한화비전,489790,01867758
1,HD현대마린솔루션,443060,01194689
2,에이피알,278470,01190568
3,엘앤에프,066970,00398701
4,포스코DX,022100,00155212
...,...,...,...
195,대한전선,001440,00113207
196,한국앤컴퍼니,000240,00160047
197,대한항공,003490,00113526
198,유한양행,000100,00145109


In [15]:
df_merged[df_merged["corp_name"] == "삼성물산"]

Unnamed: 0,corp_name,stock_code,corp_code
45,삼성물산,28260,149655


In [11]:
import pandas as pd
import requests
import time

# df_merged는 이미 만들어져 있다고 가정

# DART API 키
API_KEY = "a3e8c0f55e2d783460b8337e65f89c08c66b9bc0"

# 키워드 기반 이벤트 탐색 함수
def search_events(corp_code, api_key, keywords=["횡령", "배임"], start_date="20190101", end_date="20240630"):
    url = f"https://opendart.fss.or.kr/api/list.json"
    params = {
        "crtfc_key": api_key,
        "corp_code": corp_code,
        "bgn_de": start_date,
        "end_de": end_date,
        "page_count": 100
    }
    try:
        res = requests.get(url, params=params)
        data = res.json()

        if data["status"] != "000":
            return 0, []

        hit_links = []
        for item in data["list"]:
            title = item.get("report_nm", "")
            if any(kw in title for kw in keywords):
                rcp_no = item.get("rcept_no")
                link = f"https://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcp_no}"
                hit_links.append(link)

        return len(hit_links), hit_links

    except Exception as e:
        return "에러", [str(e)]

# 전체 기업에 대해 반복 수행
results = []
for idx, row in df_merged.iterrows():
    corp_name = row["corp_name"]
    corp_code = row["corp_code"]

    count, links = search_events(corp_code, API_KEY)
    results.append({
        "기업명": corp_name,
        "이벤트 발생 횟수": count,
        "관련 링크": "; ".join(links) if links else "없음"
    })

    time.sleep(0.3)  # DART API 제한 방지

# 결과 데이터프레임
df_result = pd.DataFrame(results)

# 엑셀 또는 CSV 저장 가능
df_result.to_csv("event_result.csv", index=False, encoding="utf-8-sig")
df_result


Unnamed: 0,기업명,이벤트 발생 횟수,관련 링크
0,한화비전,0,없음
1,HD현대마린솔루션,0,없음
2,에이피알,0,없음
3,엘앤에프,0,없음
4,포스코DX,0,없음
...,...,...,...
195,대한전선,0,없음
196,한국앤컴퍼니,2,https://dart.fss.or.kr/dsaf001/main.do?rcpNo=2...
197,대한항공,0,없음
198,유한양행,0,없음


In [23]:
### 페이지 반복 코드

import pandas as pd
import requests
import time

# df_merged는 'corp_name', 'corp_code' 컬럼을 포함한 DataFrame이라고 가정
# 예: df_merged = pd.DataFrame([...])

API_KEY = "a3e8c0f55e2d783460b8337e65f89c08c66b9bc0"

def search_events(corp_code, api_key, keywords=["횡령", "배임"], start_date="20200624", end_date="20250624"):
    url = "https://opendart.fss.or.kr/api/list.json"
    page_no = 1
    hit_links = []

    while True:
        params = {
            "crtfc_key": api_key,
            "corp_code": corp_code,
            "bgn_de": start_date,
            "end_de": end_date,
            "page_no": page_no,
            "page_count": 100
        }

        try:
            res = requests.get(url, params=params, timeout=10)
            data = res.json()

            if data.get("status") != "000" or "list" not in data:
                break  # 오류 or 결과 없음

            for item in data["list"]:
                title = item.get("report_nm", "")
                if any(kw in title for kw in keywords):
                    rcp_no = item.get("rcept_no")
                    link = f"https://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcp_no}"
                    hit_links.append(link)

            # 마지막 페이지 도달
            if len(data["list"]) < 100:
                break

            page_no += 1
            time.sleep(0.3)

        except Exception as e:
            return 0, [f"요청 실패: {str(e)}"]

    return len(hit_links), hit_links

# 실행 예시
results = []
for idx, row in df_merged.iterrows():
    corp_name = row["corp_name"]
    corp_code = row["corp_code"]

    count, links = search_events(corp_code, API_KEY)
    results.append({
        "기업명": corp_name,
        "이벤트 발생 횟수": count,
        "관련 링크": "; ".join(links) if links else "없음"
    })
    time.sleep(0.3)

# 결과 저장
df_result = pd.DataFrame(results)
df_result.to_csv("event_result3.csv", index=False, encoding="utf-8-sig")


In [None]:
import requests
import time

# DART 인증키
API_KEY = "a3e8c0f55e2d783460b8337e65f89c08c66b9bc0"

# 삼성전자 corp_code (예: 00126380)
CORP_CODE = "00126380"

def search_events_single(corp_code, api_key, keywords=["횡령", "배임"], start_date="20190101", end_date="20240630"):
    url = "https://opendart.fss.or.kr/api/list.json"
    page_no = 1
    hit_links = []

    while True:
        params = {
            "crtfc_key": api_key,
            "corp_code": corp_code,
            "bgn_de": start_date,
            "end_de": end_date,
            "page_no": page_no,
            "page_count": 100
        }

        try:
            res = requests.get(url, params=params, timeout=10)
            data = res.json()

            if data.get("status") != "000" or "list" not in data:
                break

            for item in data["list"]:
                title = item.get("report_nm", "")
                if any(kw in title for kw in keywords):
                    rcp_no = item.get("rcept_no")
                    link = f"https://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcp_no}"
                    print(f"[HIT] {title} → {link}")
                    hit_links.append(link)

            if len(data["list"]) < 100:
                break

            page_no += 1
            time.sleep(0.2)

        except Exception as e:
            print(f"에러 발생: {str(e)}")
            break

    print(f"\n총 발견된 보고서 수: {len(hit_links)}")
    return hit_links

# 실행
search_events_single(CORP_CODE, API_KEY)


[HIT] 횡령ㆍ배임사실확인 → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20210120800650
[HIT] 횡령ㆍ배임사실확인 → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20190903800354

총 발견된 보고서 수: 2


['https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20210120800650',
 'https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20190903800354']

In [21]:

import requests
import time

# DART 인증키
API_KEY = "a3e8c0f55e2d783460b8337e65f89c08c66b9bc0"

# 삼성물산 corp_code 
CORP_CODE = "00149655"

def search_events_single(corp_code, api_key, keywords=["횡령", "배임"], start_date="20200101", end_date="20250630"): # end_date 설정
    url = "https://opendart.fss.or.kr/api/list.json"
    page_no = 1
    hit_links = []

    while True:
        params = {
            "crtfc_key": api_key,
            "corp_code": corp_code,
            "bgn_de": start_date,
            "end_de": end_date,
            "page_no": page_no,
            "page_count": 100
        }

        try:
            res = requests.get(url, params=params, timeout=10)
            data = res.json()

            if data.get("status") != "000" or "list" not in data:
                break

            for item in data["list"]:
                title = item.get("report_nm", "")
                if any(kw in title for kw in keywords):
                    rcp_no = item.get("rcept_no")
                    link = f"https://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcp_no}"
                    print(f"[HIT] {title} → {link}")
                    hit_links.append(link)

            if len(data["list"]) < 100:
                break

            page_no += 1
            time.sleep(0.2)

        except Exception as e:
            print(f"에러 발생: {str(e)}")
            break

    print(f"\n총 발견된 보고서 수: {len(hit_links)}")
    return hit_links

# 실행
search_events_single(CORP_CODE, API_KEY)

[HIT] [기재정정]횡령ㆍ배임사실확인               → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20250207801436
[HIT] 횡령ㆍ배임사실확인               → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20250203801077
[HIT] [기재정정]횡령ㆍ배임사실확인               → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20240208801439
[HIT] 횡령ㆍ배임사실확인               → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20240205801064
[HIT] 횡령ㆍ배임혐의발생 → https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20200904800573

총 발견된 보고서 수: 5


['https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20250207801436',
 'https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20250203801077',
 'https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20240208801439',
 'https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20240205801064',
 'https://dart.fss.or.kr/dsaf001/main.do?rcpNo=20200904800573']

In [25]:
import pandas as pd

df = pd.read_csv("event_result_data/event_result3.csv")

total_event =df["이벤트 발생 횟수"].sum()

print(total_event)

72
