# 公開資訊觀測站｜三大財務報表

資產負債表：https://mops.twse.com.tw/mops/web/t164sb03<br>
綜合損益表：https://mops.twse.com.tw/mops/web/t164sb04<br>
現金流量表：https://mops.twse.com.tw/mops/web/t164sb05<br>

In [4]:
import requests
import pandas as pd
import os

stock_id = 1101
year = 2019
season = 1

url = 'https://mops.twse.com.tw/mops/web/ajax_t164sb03'

form_data = {
        'encodeURIComponent': 1,
        'step': 1,
        'firstin': 1,
        'off': 1,
        'queryName': 'co_id',
        'inpuType': 'co_id',
        'TYPEK': 'all',
        'isnew': 'false',
        'co_id': stock_id,
        'year': year - 1911,
        'season': season
    }


headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7,zh-CN;q=0.6',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept': '*/*',
    'Referer': url
}

response = requests.post(url, headers=headers, data=form_data)

In [9]:
response.text

'\r\n\r\n<html>\r\n<head>\r\n\t<title>公開資訊觀測站</title>\r\n<!--\t<link href="css/css1.css" rel="stylesheet" type="text/css" Media="Screen"/> -->\r\n<!--\t<script type="text/javascript" src="js/mops1.js"></script> -->\r\n</head>\r\n\r\n<body>\r\n<center>\n<h2 align=\'center\'><font color=\'blue\'>合併資產負債表</font></h2>\n<h4 align=\'center\'><font color=\'blue\'>本資料由台泥公司提供</font></h4>\n<table class=\'noBorder\' align=\'center\'\'>\n<tr><td><font color=\'red\' size=\'2\'>「投資人若需了解更詳細資訊可至<a target=\'_blank\' style=\'color:violet;font-size:small;font-weight:bold;font-family:Arial, Helvetica, Sans-serif,"宋體";text-decoration:underline;\' href=\'/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2019&SSEASON=1&REPORT_ID=C\'>XBRL資訊平台</a>或<a target=\'_blank\' style=\'color:violet;font-size:small;font-weight:bold;font-family:Arial, Helvetica, Sans-serif,"宋體";text-decoration:underline;\' href=\'http://doc.twse.com.tw/server-java/t57sb01?step=1&colorchg=1&co_id=1101&year=108&seamon=&mtype=A&\'>電子書查詢</a>」</fon

In [10]:
len(response.text)

41172

In [11]:
with open('1101-bs.html', 'w', encoding='utf-8') as file:
    file.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8">')
    file.write(response.text)

In [12]:
urls = {
        'balance_sheet': 'https://mops.twse.com.tw/mops/web/ajax_t164sb03', # 資產負債表
        'income_statement': 'https://mops.twse.com.tw/mops/web/ajax_t164sb04', # 綜合損益表
        'cash_flow': 'https://mops.twse.com.tw/mops/web/ajax_t164sb05' # 現金流量表
    }

for folder_name, url in urls.items():
    print(folder_name, url)

balance_sheet https://mops.twse.com.tw/mops/web/ajax_t164sb03
income_statement https://mops.twse.com.tw/mops/web/ajax_t164sb04
cash_flow https://mops.twse.com.tw/mops/web/ajax_t164sb05


In [5]:
import requests
import pandas as pd
import os
import time

def retry_post_requests(url, headers, form_data):
    
    for i in range(3):
        try:
            return requests.post(url, headers=headers, data=form_data)
        except:
            print('發生錯誤，等待1分鐘後嘗試')
            time.sleep(60)
    
    return None

def download_financial_statements(stock_id, year, season):
    
    urls = {
        'balance_sheet': 'https://mops.twse.com.tw/mops/web/ajax_t164sb03', # 資產負債表
        'income_statement': 'https://mops.twse.com.tw/mops/web/ajax_t164sb04', # 綜合損益表
        'cash_flow': 'https://mops.twse.com.tw/mops/web/ajax_t164sb05' # 現金流量表
    }
    
    form_data = {
        'encodeURIComponent': 1,
        'step': 1,
        'firstin': 1,
        'off': 1,
        'queryName': 'co_id',
        'inpuType': 'co_id',
        'TYPEK': 'all',
        'isnew': 'false',
        'co_id': stock_id,
        'year': year - 1911,
        'season': season
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7,zh-CN;q=0.6',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept': '*/*'
    }
    
    for folder_name, url in urls.items():
        
        headers['Referer'] = url
        
        filename = os.path.join('data',
                                'financial_statement',
                                folder_name,
                                '{}S{}'.format(year, season),
                                '{}.html'.format(stock_id))
        
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        
        if os.path.exists(filename) and os.stat(filename).st_size > 10000:
            continue
        
        response = retry_post_requests(url, headers, form_data)
        
        if response is None:
            print('{}S{} Stock {} 抓取失敗'.format(year, season, stock_id))
            time.sleep(3)
            break
        
        if len(response.text) < 1000:
            print('{}S{} Stock {} 找不到內容'.format(year, season, stock_id))
            time.sleep(3)
            break
        
        with open(filename, 'w', encoding='utf-8') as file:
            file.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8">')
            file.write(response.text)
        
        time.sleep(15)
        
    return None
        

In [14]:
download_financial_statements(1101, 2019, 1)

In [15]:
def update_financial_statement(year, season):
    
    stock_ids = ['1101', '1102', '1103']
        
    for stock_id in stock_ids:
        
        print('{}S{} stock {} 下載中...'.format(year, season, stock_id))
        
        download_financial_statements(stock_id, year, season)

In [16]:
update_financial_statement(2019, 1)

2019S1 stock 1101 下載中...
2019S1 stock 1102 下載中...
2019S1 stock 1103 下載中...


In [12]:
from tqdm import tqdm_notebook

def update_financial_statement(year, season):
    
    stock_ids = ['1101', '1102', '1103']
    
    progress = tqdm_notebook(stock_ids)
    
    for stock_id in progress:
                
        progress.set_description('{}S{} stock {} 下載中...'.format(year, season, stock_id))
        
        download_financial_statements(stock_id, year, season)

In [18]:
update_financial_statement(2019, 1)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [10]:
import datetime 

def season_to_date(year, season):
    date = {
        1: datetime.date(year, 5, 14),
        2: datetime.date(year, 8, 13),
        3: datetime.date(year, 11, 13),
        4: datetime.date(year + 1, 3, 30),
    }
    return date[season]


In [20]:
season_to_date(2019, 1)

datetime.date(2019, 5, 14)

In [8]:
def retry_requests(url, headers):
    
    for i in range(3):
        try:
            return requests.get(url, headers=headers)
        except:
            print('發生錯誤，等待1分鐘後嘗試')
            time.sleep(60)
    
    return None

def get_monthly_reports(date):
        
    url = 'https://mops.twse.com.tw/nas/t21/sii/t21sc03_{year}_{month}_0.html'.format(year=date.year - 1911,
                                                                                      month=date.month)
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
    }
    
    response = retry_requests(url, headers)
    response.encoding = 'big5'

    try:
        dfs = pd.read_html(response.text)
    except:
        return None
    
    df = pd.concat([df for df in dfs if len(df.columns) == 11])
    
    df.columns = df.columns.droplevel(0)
    
    df.drop(['公司名稱', '備註'], axis=1, inplace = True)
    
    df = df.rename(columns = {'公司代號':'證券代號'})

    df.reset_index(drop=True, inplace=True)
    df.drop(df[df['證券代號'] == '合計'].index, inplace=True)

    df['日期'] = pd.to_datetime(date)
    
    df = df.set_index(['證券代號', '日期'])
    
    return df

In [22]:
df = get_monthly_reports(season_to_date(2019, 1))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,當月營收,上月營收,去年當月營收,上月比較增減(%),去年同月增減(%),當月累計營收,去年累計營收,前期比較增減(%)
證券代號,日期,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1101,2019-05-14,10903967,10596314,11539982,2.90,-5.51,46856612,46742616,0.24
1102,2019-05-14,8435765,8434811,7698165,0.01,9.58,36261181,31460310,15.26
1103,2019-05-14,146479,160751,185856,-8.87,-21.18,759621,828000,-8.25
1104,2019-05-14,421251,418992,426170,0.53,-1.15,2027540,1977280,2.54
1108,2019-05-14,305274,323834,294581,-5.73,3.62,1463455,1407947,3.94
1109,2019-05-14,800417,753252,625561,6.26,27.95,2914103,2232150,30.55
1110,2019-05-14,132249,118229,142543,11.85,-7.22,667517,672790,-0.78
1201,2019-05-14,1809938,1695994,1693716,6.71,6.86,8046332,7094306,13.41
1203,2019-05-14,531311,469207,526103,13.23,0.98,2502699,2464575,1.54
1210,2019-05-14,6390612,6424220,6115699,-0.52,4.49,31092939,28872829,7.68


In [23]:
df.index.levels[0].tolist()

['1101',
 '1102',
 '1103',
 '1104',
 '1108',
 '1109',
 '1110',
 '1201',
 '1203',
 '1210',
 '1213',
 '1215',
 '1216',
 '1217',
 '1218',
 '1219',
 '1220',
 '1225',
 '1227',
 '1229',
 '1231',
 '1232',
 '1233',
 '1234',
 '1235',
 '1236',
 '1301',
 '1303',
 '1304',
 '1305',
 '1307',
 '1308',
 '1309',
 '1310',
 '1312',
 '1313',
 '1314',
 '1315',
 '1316',
 '1319',
 '1321',
 '1323',
 '1324',
 '1325',
 '1326',
 '1339',
 '1402',
 '1409',
 '1410',
 '1413',
 '1414',
 '1416',
 '1417',
 '1418',
 '1419',
 '1423',
 '1432',
 '1434',
 '1435',
 '1436',
 '1437',
 '1438',
 '1439',
 '1440',
 '1441',
 '1442',
 '1443',
 '1444',
 '1445',
 '1446',
 '1447',
 '1449',
 '1451',
 '1452',
 '1453',
 '1454',
 '1455',
 '1456',
 '1457',
 '1459',
 '1460',
 '1463',
 '1464',
 '1465',
 '1466',
 '1467',
 '1468',
 '1470',
 '1471',
 '1472',
 '1473',
 '1474',
 '1475',
 '1476',
 '1477',
 '1503',
 '1504',
 '1506',
 '1507',
 '1512',
 '1513',
 '1514',
 '1515',
 '1516',
 '1517',
 '1519',
 '1521',
 '1522',
 '1524',
 '1525',
 '1526',
 

In [14]:
def update_financial_statement(year, season):
    
    stock_ids = get_monthly_reports(season_to_date(year, season)).index.levels[0].tolist()
    
    progress = tqdm_notebook(stock_ids)
    
    for stock_id in progress:
                
        progress.set_description('{}S{} stock {} 下載中...'.format(year, season, stock_id))
        
        download_financial_statements(stock_id, year, season)

In [None]:
update_financial_statement(2019, 1)