In [1]:
import pandas as pd
import numpy as np
from coredotfinance.data import KrxReader
krx = KrxReader()
import dart_fss as dart

from tqdm.auto import tqdm
from time import sleep
import FinanceDataReader as fdr
import plotly.graph_objects as go
from plotly.subplots import make_subplots



In [None]:
corp_df = krx.listed_company()
corp_df

In [None]:
def get_report(key, corp_code, year, rpt_code):
    import requests
    from bs4 import BeautifulSoup
    from lxml import html
    from urllib.request import Request, urlopen
    from urllib.parse import urlencode, quote_plus, unquote
    
    url = 'https://opendart.fss.or.kr/api/fnlttSinglAcntAll.json'
    params = {
        'crtfc_key': key,
        'corp_code': corp_code,
        'bsns_year': year,
        'reprt_code': rpt_code,
        'fs_div': 'OFS'
    }
    # response = requests.get(url, params=params).content.decode('UTF-8')
    response = requests.get(url, params=params, timeout=3).json()
    
    return response

In [None]:
def get_corp_list(key):
    import requests
    from io import BytesIO
    from zipfile import ZipFile
    from xml.etree.ElementTree import parse
    from bs4 import BeautifulSoup
    from lxml import html
    from urllib.request import Request, urlopen
    from urllib.parse import urlencode, quote_plus, unquote
    
    url = 'https://opendart.fss.or.kr/api/corpCode.xml'
    params = {'crtfc_key': key}
    response = requests.get(url, params=params).content
    
    with ZipFile(BytesIO(response)) as zipfile:
        zipfile.extractall('corpCode')
    
    xmlTree = parse('./corpCode/corpCode.xml')
    root = xmlTree.getroot()
    raw_list = root.findall('list')
    
    corp_list = {}
    for i in range(len(raw_list)):
        corp_code = raw_list[i].findtext('corp_code')
        # corp_name = raw_list[i].findtext('corp_name')
        stock_code = raw_list[i].findtext('stock_code')
        # modify_date = raw_list[i].findtext('modify_date')
        
        # Ignore non-stock company
        if stock_code == ' ':
            continue
        corp_list[stock_code] = corp_code
    
    return corp_list

In [None]:
API_KEY = 'a1f670c01770f59e59cd35917d87002ed5523869'
YEAR = '2021'
RPT_CODE = '11011'

corp_list = get_corp_list(API_KEY)

In [None]:
print(len(corp_list))
item_list = [
    'ifrs-full_Revenue', 
    'ifrs-full_CostOfSales', 
    'ifrs-full_ProfitLoss',
    'ifrs-full_Equity',
    'ifrs-full_EquityAndLiabilities',
]

fs_list = []
for i in tqdm(range(len(corp_df))):
    ticker = corp_df.loc[i, '종목코드']
    field_code = corp_df.loc[i, '업종코드']
    field_name = corp_df.loc[i, '업종명']
    # print('ticker:', ticker)

    report = get_report(API_KEY, corp_list[ticker], YEAR, RPT_CODE)
    if report['status'] == '020':
        print('Request exceed!')
        break
    elif report['status'] != '000':
        continue
    report = report['list']
    # print(type(report))
    # print(report)
    
    fs_item_list = [field_name]
    for item in item_list:
        value = [x for x in report if x['account_id'] == item]
        if value:
            value = value[0]
            fs_item_list.append(value['thstrm_amount'])
        else:
            fs_item_list.append(0)
    fs_list.append(fs_item_list)
    
fs_df = pd.DataFrame(fs_list, columns=[
    '업종명',
    '매출액',
    '매출원가',
    '당기순이익',
    '자본총계',
    '부채 및 자본 총계'
])
print(fs_df.info())
fs_df

In [None]:
convert_list = ['매출액', '매출원가', '당기순이익', '자본총계', '부채 및 자본 총계']
for item in convert_list:
    fs_df[item] = pd.to_numeric(fs_df[item], downcast='integer')

fs_df.groupby(fs_df['업종명']).sum().to_csv('./data/fields_sum.csv')
fs_df.groupby(fs_df['업종명']).mean().to_csv('./data/fields_mean.csv')

In [None]:
df = fs_df.sort_values(by=['매출액'], ascending=False)[:10]

fig = go.Figure(data=go.Bar(
    x = df['업종명'],
    y = df['매출액'],
))
fig.update_layout(yaxis_range=[0, 1e15])
fig.show()

In [None]:
df = fs_df.sort_values(by=['부채 및 자본 총계'], ascending=False)[:10]

fig = go.Figure(data=go.Bar(
    x = df['업종명'],
    y = df['부채 및 자본 총계'],
))
fig.update_layout(yaxis_range=[0, 1e15])
fig.show()

In [None]:
DAY = '20220701'

df = []
for i in tqdm(range(len(corp_df))):
    ticker = corp_df.loc[i, '종목코드']
    field_name = corp_df.loc[i, '업종명']

    series = fdr.DataReader(ticker, DAY, DAY)
    market_cap = series.iloc[0, 3] * series.iloc[0, 4]
    df.append([field_name, market_cap])

mc_df = pd.DataFrame(df, columns=['업종명', '시가총액'])
mc_df

In [None]:
# df = mc_df.groupby(mc_df['업종명']).sum()
mc_df['dummy'] = 0
df = mc_df.groupby('업종명').sum()
df = df.sort_values(by=['시가총액'], ascending=False)[:10]

fig = go.Figure(data=go.Bar(
    x = df.index,
    y = df['시가총액'],
))
fig.update_layout(title='시가총액')
# fig.update_xaxes(tickangle = 20)
fig.show()