# 月營收選股

參考資料：

* [超簡單用python抓取每月營收](https://goo.gl/3bqLou)
* [Python-簡單的月營收選股！](https://bit.ly/2Ly3nLo)

In [1]:
import pandas as pd
import requests
from io import StringIO
import time

In [2]:
def monthly_report(year, month):
    
    # 假如是西元，轉成民國
    if year > 1911:
        year -= 1911
    
    url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'_0.html'
    if year <= 98:
        url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'.html'
    
    # 偽瀏覽器
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    
    # 下載該年月的網站，並用pandas轉換成 dataframe
    r = requests.get(url, headers)
    r.encoding = 'big5'
    html_df = pd.read_html(StringIO(r.text))
    
    # 處理一下資料
    if html_df[0].shape[0] > 500:
        df = html_df[0].copy()
    else:
        df = pd.concat([df for df in html_df if df.shape[1] <= 11])
    df = df[list(range(0,10))]
    column_index = df.index[(df[0] == '公司代號')][0]
    df.columns = df.iloc[column_index]
    df['當月營收'] = pd.to_numeric(df['當月營收'], 'coerce')
    df = df[~df['當月營收'].isnull()]
    df = df[df['公司代號'] != '合計']
    
    return df

In [3]:
df = monthly_report(2017, 1)

In [4]:
df.head()

4,公司代號,公司名稱,當月營收,上月營收,去年當月營收,上月比較增減(%),去年同月增減(%),當月累計營收,去年累計營收,前期比較增減(%)
5,1101,台泥,6131670.0,9418581,6575590,-34.89,-6.75,6131670,6575590,-6.75
6,1102,亞泥,4162130.0,6023954,5066062,-30.9,-17.84,4162130,5066062,-17.84
7,1103,嘉泥,232657.0,382981,197328,-39.25,17.9,232657,197328,17.9
8,1104,環球水泥,363865.0,463842,373311,-21.55,-2.53,363865,373311,-2.53
9,1108,幸福水泥,316773.0,345573,348266,-8.33,-9.04,316773,348266,-9.04


In [5]:
# 抓最近 12 月的資料
import datetime
import pandas as pd
import time
data = {}
n_days = 12
now = datetime.datetime.now()
year = now.year
month = now.month
while len(data) < n_days:
    
    print('parsing', year, month)
    
    # 使用 crawPrice 爬資料
    try:
        data['%d-%d-01'%(year, month)] = monthly_report(year, month)
    except Exception as e:
        print('get 404, please check if the revenues are not revealed')
    
    # 減一個月
    month -= 1
    if month == 0:
        month = 12
        year -= 1
    time.sleep(10)

parsing 2018 8
get 404, please check if the revenues are not revealed
parsing 2018 7
parsing 2018 6
parsing 2018 5
parsing 2018 4
parsing 2018 3
parsing 2018 2
parsing 2018 1
parsing 2017 12
parsing 2017 11
parsing 2017 10
parsing 2017 9
parsing 2017 8


In [6]:
# 彙整資料
for k in data.keys():
    data[k].index = data[k]['公司代號']
    
df = pd.DataFrame({k:df['當月營收'] for k, df in data.items()}).transpose()
df.index = pd.to_datetime(df.index)
df = df.sort_index()

In [7]:
df.head()

Unnamed: 0,1101,1102,1103,1104,1108,1109,1110,1201,1203,1210,...,9939,9940,9941,9942,9943,9944,9945,9946,9955,9958
2017-08-01,6952639.0,5478165.0,174704.0,367333.0,293423.0,487484.0,110747.0,1785571.0,561578.0,6554870.0,...,1757672.0,710351.0,1606952.0,274935.0,243109.0,277898.0,737665.0,88308.0,174798.0,145720.0
2017-09-01,8071192.0,5806573.0,177734.0,403016.0,267312.0,521034.0,232565.0,1685062.0,543886.0,7052418.0,...,1506725.0,3851321.0,1648181.0,272291.0,227517.0,268175.0,1655423.0,80710.0,222351.0,104617.0
2017-10-01,8387381.0,5840273.0,167398.0,336159.0,259826.0,377596.0,170205.0,1559834.0,513465.0,6551300.0,...,1195736.0,885129.0,1656403.0,277848.0,251580.0,252592.0,788604.0,56498.0,195447.0,152063.0
2017-11-01,9985897.0,6639248.0,181868.0,381565.0,258913.0,673172.0,183382.0,1642814.0,493888.0,6490644.0,...,1141135.0,1083362.0,1756364.0,278968.0,216298.0,262227.0,898326.0,388892.0,201589.0,158651.0
2017-12-01,10988149.0,7070625.0,181112.0,399796.0,291571.0,581701.0,221910.0,1345880.0,571720.0,7129396.0,...,1128145.0,1328392.0,1758233.0,291623.0,251656.0,270272.0,1251114.0,406099.0,108519.0,186791.0


In [8]:
# 平均線法選股
method1 = df.iloc[-3:].mean() > df.iloc[-12:].mean()
method1[method1 == True].index

Index(['1101', '1102', '1103', '1108', '1109', '1201', '1213', '1215', '1216',
       '1217',
       ...
       '9934', '9935', '9937', '9939', '9941', '9942', '9943', '9944', '9945',
       '9955'],
      dtype='object', length=550)

In [9]:
# 成長法選股
method2 = df.rolling(4, min_periods=2).mean()
method2 = (method2 > method2.shift()).iloc[-5:].sum()
method2[method2 == 5]

1218    5
1323    5
1409    5
1536    5
1582    5
1614    5
1709    5
1773    5
1909    5
2002    5
2006    5
2010    5
2013    5
2014    5
2015    5
2049    5
2062    5
2101    5
2104    5
2327    5
2338    5
2340    5
2351    5
2373    5
2397    5
2401    5
2408    5
2431    5
2449    5
2456    5
       ..
3698    5
4119    5
4536    5
4540    5
4555    5
4919    5
4952    5
4976    5
5305    5
5434    5
5706    5
6215    5
6224    5
6225    5
6239    5
6412    5
6582    5
8016    5
8131    5
8150    5
8249    5
8341    5
8464    5
8926    5
9905    5
9914    5
9930    5
9935    5
9939    5
9941    5
Length: 87, dtype: int64

In [10]:
# 創新高法
method3 = df.iloc[-1] == df.iloc[-12:].max()
method3[method3 == True].index

Index(['1201', '1215', '1312', '1314', '1315', '1409', '1443', '1467', '1473',
       '1512',
       ...
       '8261', '8341', '8926', '9902', '9905', '9930', '9937', '9939', '9941',
       '9955'],
      dtype='object', length=132)