In [1]:
import requests
import os
from bs4 import BeautifulSoup as bs
import pandas as pd
from datetime import datetime, timedelta
import time
import numpy
parent = os.path.dirname(os.path.abspath('__file__'))
output_path = os.path.join(parent, 'Output')

In [2]:
from modules import Mongo
from BaseObj import CallOption, PutOption

In [3]:
def checkPath(pth):
    if not os.path.isdir(pth):
        os.makedirs(pth)
        
def getNextTTM(td:datetime) -> datetime:
    thirdWed = getThirdWendesday(td)
    if td.date() > thirdWed.date():
        tmp = td + timedelta(31)
        tmp = tmp.replace(day=1)
        thirdWed = getThirdWendesday(tmp)
    return thirdWed
        
def checkClosed2TTM(td:datetime) -> bool:
    TTM = getNextTTM(td)
    diffDays = (TTM - td).days
    return diffDays <= 7

# 期交所資料收集

## 三大法人

In [56]:
def crawlInstitutionTraded(date:datetime = datetime.today()):
    url = 'https://www.taifex.com.tw/cht/3/futContractsDate'
    pay_load = {
            "queryType": "1",
            "goDay": "",
            "doQuery": "1",
            "dateaddcnt": "",
            "queryDate": date.strftime("%Y/%m/%d"),
            "commodityId": ""
        }
    df = pd.DataFrame()
    f_path = os.path.join(output_path, '三大法人', f'Fut_{date.strftime("%Y%m%d")}.txt')
    checkPath(os.path.dirname(f_path))
    if os.path.isfile(f_path):# and 0:
        df = pd.read_csv(f_path, encoding='utf-8-sig',dtype=str)
    else:
        try:
            res = requests.post(url, data=pay_load)
            soup = bs(res.content, 'lxml')
            table = soup.find_all('table', {'class':"table_f"})[0]
            cols = '序號,商品,身分別,買方口數(交易),買方金額(交易),賣方口數(交易),賣方金額(交易),差額口數(交易),差額金額(交易)'.split(',')
            cols.extend('買方口數(OI),買方金額(OI),賣方口數(OI),賣方金額(OI),差額口數(OI),差額金額(OI)'.split(','))
            out = []
            last_tmp = []
            for tr in table.find_all('tr')[3:]:
                if '期貨小計' in tr.text: break
                if '鉅額' in tr.text or '期貨合計' in tr.text:continue
                tmp = [td.text.strip() for td in tr.find_all('th')]+[td.text.strip() for td in tr.find_all('td')]
#                 print(tmp)
                if len(tmp) < len(cols):
                    tmp = last_tmp[:len(cols)-len(tmp)] + tmp
                if tmp:
                    out.append(dict((k, v) for k, v in zip(cols, tmp)))
                    last_tmp = tmp
            df = pd.DataFrame(out)
#             print(df)
            del df['序號']
            df.to_csv(f_path, index=False, encoding='utf-8-sig')
        except Exception as e:
            print(e)
            pass
    if not df.empty:
#         df = df[df['商品']=='臺指選擇權']
        df = df['商品,身分別,差額口數(交易),差額口數(OI)'.split(',')]
    return df

In [57]:
df = crawlInstitutionTraded(datetime(2022,9,30))

In [58]:
df

Unnamed: 0,商品,身分別,差額口數(交易),差額口數(OI)
0,臺股期貨,自營商,181,5266
1,臺股期貨,投信,456,-9267
2,臺股期貨,外資,-1466,-1828
3,電子期貨,自營商,8,-2
4,電子期貨,投信,0,183
5,電子期貨,外資,-62,1215
6,金融期貨,自營商,89,-38
7,金融期貨,投信,0,63
8,金融期貨,外資,-26,154
9,小型臺指期貨,自營商,69,-4574


## 大額交易

In [62]:
def crawlLargeTraded(date:datetime = datetime.today()):
    url = "https://www.taifex.com.tw/cht/3/largeTraderFutQry"
    pay_load = {
        "datecount": "",
        "contractId2": "",
        "queryDate": date.strftime("%Y/%m/%d"),
        "contractId": "all"
    }

    df = pd.DataFrame()
    f_path = os.path.join(output_path, '大額交易人', f'Fut_{date.strftime("%Y%m%d")}.txt')
    checkPath(os.path.dirname(f_path))
    if os.path.isfile(f_path):
        df = pd.read_csv(f_path, encoding='utf-8-sig',dtype=str)
    else:
        try:
            res = requests.post(url, data=pay_load)
            soup = bs(res.content, 'lxml')
            table = soup.find_all('table', {'class':"table_f"})[0]
            cols = '契約,到期月份(週別),五大交易人(特法))買(OI),五大交易人(特法))買(Ratio),十大交易人(特法))買(OI),十大交易人(特法))買(Ratio)'.split(',')
            cols.extend("五大交易人(特法))賣(OI),五大交易人(特法))賣(Ratio),十大交易人(特法))賣(OI),十大交易人(特法))賣(Ratio),OI".split(','))
            out = []
            for tr in table.find_all('tr')[3:]:
                if '鉅額' in tr.text or '小計' in tr.text:continue
                tmp = [td.text.strip().replace('\r','').replace('\n','').replace('\t','') for td in tr.find_all('td')]
                if len(tmp) < len(cols):
                    tmp = last_tmp[:len(cols)-len(tmp)] + tmp
                if tmp:
                    out.append(dict((k, v) for k, v in zip(cols, tmp)))
                    last_tmp = tmp
            df = pd.DataFrame(out)
            del df['序號']
            df.to_csv(f_path, index=False, encoding='utf-8-sig')
        except:
            pass
    if not df.empty:
        for col in df.columns[2:]:
            df[col] = df[col].apply(lambda x: x.replace(',',''))
        df = df['契約,到期月份(週別),十大交易人(特法))買(OI),十大交易人(特法))賣(OI)'.split(',')]
    return df

In [63]:
df_large = crawlLargeTraded(datetime(2022,9,30))

In [64]:
df_large

Unnamed: 0,契約,到期月份(週別),十大交易人(特法))買(OI),十大交易人(特法))賣(OI)
0,臺股期貨(TX+MTX/4),202210W1,101(14),126(32)
1,臺股期貨(TX+MTX/4),202210,34678(33398),45372(38298)
2,臺股期貨(TX+MTX/4),所有契約,37070(32782),47955(37099)
3,電子期貨(TE+ZEF/8),202210,1869(1692),2134(359)
4,電子期貨(TE+ZEF/8),所有契約,1932(1755),2137(362)
...,...,...,...,...
520,萬海期貨,所有契約,3641(2796),1857(0)
521,高端疫苗期貨,202210,261(137),195(23)
522,高端疫苗期貨,所有契約,264(140),195(23)
523,力積電期貨,202210,545(333),550(144)
