In [1]:
import pandas as pd
import requests
from io import StringIO
import time
from bs4 import BeautifulSoup

# 展開報表結果
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 2000)
pd.set_option('display.width', 1000)

def monthly_report(year, month, trace_month, companies, is_first):
    
    # 追蹤月數
    trace_month = trace_month - 1
    
    # 假如是西元，轉成民國
    if year > 1990:
        year -= 1911
    
    url = 'https://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'_0.html'
    if year <= 98:
        url = 'https://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'.html'
    
    # 偽瀏覽器
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    
    # 下載該年月的網站，並用pandas轉換成 dataframe
    r = requests.get(url, headers=headers)
    r.encoding = 'big5'

    dfs = pd.read_html(StringIO(r.text), encoding='big5')
    
    # 取出column數大於5小於11之資料
    df = pd.concat([df for df in dfs if df.shape[1] <= 11 and df.shape[1] > 5])
    
    # 將雙層colume簡化為一層
    if 'levels' in dir(df.columns):
        df.columns = df.columns.get_level_values(1)
    else:
        df = df[list(range(0,10))]
        column_index = df.index[(df[0] == '公司代號'.decode('utf-8'))][0]
        df.columns = df.iloc[column_index]
        
    df['當月營收'] = pd.to_numeric(df['當月營收'], 'coerce')
    df = df[~df['當月營收'].isnull()]
    df = df[df['公司代號'] != '合計']
    
    new_companies = []
    if len(companies) > 0:
        df = df[df['公司代號'].isin(companies)]
    elif len(companies) == 0 and is_first == False :
        result = ["沒有資料"]
        result_dict = {"result":result}
        df = pd.DataFrame(result_dict)
        return df;
    
    df = df[pd.to_numeric(df['去年同月增減(%)'], errors='coerce') > 0]
    new_companies = df['公司代號'].values
    
    if trace_month >= 1 :
        is_first = False
        month = month - 1;
        if month == 0 :
            month = 12;
            year = year - 1;
        df = monthly_report(year, month, trace_month, new_companies, is_first)
    else :
        return df
    
    return df

In [2]:
year = 2023
month = 7
trace_month = 6
companies = []
df = monthly_report(year, month, trace_month, companies, is_first=True)
df = df.reset_index(drop=True)
df

Unnamed: 0,公司代號,公司名稱,備註,上月比較增減(%),上月營收,去年同月增減(%),去年當月營收,當月營收,前期比較增減(%),去年累計營收,當月累計營收
0,1103,嘉泥,旅宿營運收入增加。,10.37,191217,57.42,134062,211050,37.7,292114,402267
1,1104,環泥,1月份適逢過年,17.27,504378,69.81,348338,591525,16.5,940660,1095903
2,1108,幸福,本月較去年同期營收增加逾50%，主係去年2月適逢年假，營業日數較短所致。,22.66,294901,71.74,210635,361748,22.66,535299,656649
3,1110,東泥,主要本期東南水泥(股)公司銷貨收入增加，係因去年同期遇春節年假，銷貨天數較少及售價調高所致,26.28,137873,78.77,97391,174115,18.06,264249,311988
4,1201,味全,-,-6.0,1585126,15.32,1291903,1489920,3.86,2960690,3075046
5,1216,統一,-,-11.43,45915517,19.2,34113196,40664831,5.37,82162681,86580348
6,1233,天仁,-,-26.6,209169,16.48,131798,153523,13.36,319921,362692
7,1234,黑松,主係去年基期低，疫情解封後，餐飲活動增加，加上節慶贈禮買氣，酒類代理事業，除既有產品成長，另...,-23.77,1044159,70.92,465642,795909,30.88,1405871,1840068
8,1235,興泰,本期銷貨收入較去年同期增加115.1萬,17.69,1560,168.02,685,1836,78.45,1903,3396
9,1702,南僑,-,-0.07,1645186,29.86,1265823,1643891,6.96,3074974,3289077


In [3]:
year = 2023
month = 6
trace_month = 6
companies = []
df2 = monthly_report(year, month, trace_month, companies, is_first=True)
df2 = df2.reset_index(drop=True)
df2

Unnamed: 0,公司代號,公司名稱,備註,上月比較增減(%),上月營收,去年同月增減(%),去年當月營收,當月營收,前期比較增減(%),去年累計營收,當月累計營收
0,1103,嘉泥,-,-16.36,228644,20.98,158052,191217,20.98,158052,191217
1,1233,天仁,-,8.07,193545,11.18,188123,209169,11.18,188123,209169
2,1234,黑松,-,21.96,856128,11.05,940229,1044159,11.05,940229,1044159
3,1235,興泰,-,-36.45,2455,28.07,1218,1560,28.07,1218,1560
4,1475,業旺,-,-15.92,122090,18.19,86843,102647,18.19,86843,102647
5,1514,亞力,-,-40.38,883938,23.94,425119,526927,23.94,425119,526927
6,2371,大同,依IFRS10規定綜合判斷對精英電腦(股)公司已具備實質控制力，故自111年10月起將其併入...,-20.02,4424394,29.75,2727030,3538518,29.75,2727030,3538518
7,4572,駐龍,"航太景氣可望逐步復甦,客戶需求增加,營收隨之成長",10.79,55771,69.59,36435,61792,69.59,36435,61792
8,8222,寶一,-,61.85,27910,25.73,35927,45173,25.73,35927,45173
9,8996,高力,112年1月營收較去年同期增加71.84%，主要係熱能產品營業額較去年同期大幅增加98%及板...,2.49,313394,71.83,186934,321223,71.83,186934,321223


In [4]:
df3 = df2[~df2['公司代號'].isin(df['公司代號'])]
df3 = df3.reset_index(drop=True)
df3.to_csv('C:/Users/Skyluck/Desktop/差異減少.csv', encoding="utf_8_sig")

df4 = df[~df['公司代號'].isin(df2['公司代號'])]
df4 = df4.reset_index(drop=True)
df4.to_csv('C:/Users/Skyluck/Desktop/差異新增.csv', encoding="utf_8_sig")