In [39]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import time
import os
from utils import getSchema

parent = os.path.dirname(os.path.abspath('__file__'))
output_path = os.path.join(parent, 'OptionOI')
if not os.path.isdir(output_path):
    os.makedirs(output_path)

In [33]:
def getIndexInterDay(ticker='發行量加權股價指數', start_date:datetime=datetime.today(), end_date:datetime=datetime.today()):
    schema = getSchema("TWSE")
    table = schema['HistoricalPrice.Index.Interday']
    data = list(table.find({'Date':{"$gte":start_date.strftime("%Y-%m-%d"),
                                   "$lte":end_date.strftime("%Y-%m-%d")},
                           "IndexName":{"$in":ticker.split(',')}}))
    df = pd.DataFrame(data)
    del df['_id']
    return df

In [58]:
def getOptionOI(date:datetime=datetime.today()):
    df = pd.DataFrame()
    dtStr = date.strftime("%Y%m%d")
    if date.isocalendar()[-1] <= 5:
        file_path = os.path.join(output_path, f'{dtStr}_OI.csv')
        if os.path.isfile(file_path):
            df = pd.read_csv(file_path)
            for col in df.columns:
                df[col] = df[col].apply(lambda x: str(x).strip())
        else:
            url = f'https://www.taifex.com.tw/file/taifex/Dailydownload/LI/chinese/OPT/LI_{dtStr}.csv'
            try:
                df = pd.read_csv(url, encoding='cp950', skiprows=1)
            except:
                pass
            if not df.empty:
                if df.shape[1] == 1:
                    return pd.DataFrame()
                df.columns = 'Contract,C/P,TTM,Strike,OI,Volume,Liquitity'.split(',')
                for col in df.columns:
                    df[col] = df[col].apply(lambda x: str(x).strip())
                df.to_csv(file_path, index=False, encoding='utf-8')
    return df

In [29]:
Option_oi = {}

In [30]:
for date in pd.date_range(datetime(2022,5,1), datetime.today()):
    print(f"========= Read {date.strftime('%Y-%m-%d')} =========")
    Option_oi[date] = getOptionOI(date)
    time.sleep(3)



In [15]:
Option_oi

Unnamed: 0,Contract,C/P,TTM,Strike,OI,Volume,Liquitity
0,CAO,買權,202207,100.0,0,0,
1,CAO,買權,202207,67.5,0,0,
2,CAO,買權,202207,70.0,0,0,
3,CAO,買權,202207,72.5,0,0,
4,CAO,買權,202207,75.0,0,0,
...,...,...,...,...,...,...,...
5677,TXO,賣權,202303,18400.0,0,0,
5678,TXO,賣權,202303,18600.0,0,0,
5679,TXO,賣權,202303,18800.0,0,0,
5680,TXO,賣權,202303,19000.0,0,0,


In [194]:
def AggregateOI(date, df):
    df = df.copy(deep=True)
    df = df[df.Contract.apply(lambda x: x.strip()) == 'TXO']
    
    # Check TTM    
#     print(df.TTM.unique())
    TTMs = sorted(df.TTM.astype(str).unique(), key=lambda x: str(x)[:6])
#     print(TTMs)
    closed_ttm = str(TTMs[0]).strip()
    ttm_year = int(closed_ttm[:4])
    ttm_month = int(closed_ttm[4:6])
    
    start_date = datetime(ttm_year, ttm_month, 1)
    next_month_date = start_date + timedelta(31)
    end_date = next_month_date.replace(day=1) + timedelta(-1)
#     print(start_date, end_date)
    ttm_dates = pd.date_range(start_date, end_date, freq="W-WED")
    print(closed_ttm)
    print(ttm_dates)
    if "W" in closed_ttm:
        ttm_date = ttm_dates[int(closed_ttm[-1])-1]
    else:
        ttm_date = ttm_dates[2]
#     print(date.date(), ttm_date.date(), date.date() in [x.date() for x in ttm_dates], '\n')
    if date.date() == ttm_dates[0].date():#[x.date() for x in ]:
        closed_ttm = TTMs[1]
    tmp_df = df[df.TTM.astype(str) == closed_ttm]
    call_df = tmp_df[tmp_df['C/P']=='買權'].sort_values("OI", ascending=False).head(10)
    put_df = tmp_df[tmp_df['C/P']=='賣權'].sort_values("OI", ascending=False).head(10)
#     print(call_df)
#     print(put_df)
    output = {
        'Date':date,
        "TTM":closed_ttm,
        'MaxOIStrikeCall':call_df.loc[call_df.index[0], 'Strike'],
        'MaxOICall':call_df.loc[call_df.index[0], 'OI'],
        'TotalOICall':call_df.OI.sum(),
        'MaxOIStrikePut':put_df.loc[put_df.index[0], 'Strike'],
        'MaxOIPut':put_df.loc[put_df.index[0], 'OI'],
        'TotalOIPut':put_df.OI.sum(),
        "IsTTM":int(date.date() == ttm_date.date()),
    }
    return output
    
    
    

In [195]:
aggregate_data = []

In [196]:
for date, df in Option_oi.items():
    if df.empty:
        continue
    aggregate_data.append(AggregateOI(date, df))
#     break

202205W1
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205W2
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205W2
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205W2
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205W2
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205W2
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205
DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25'], dtype='datetime64[ns]', freq='W-WED')
202205
DatetimeIndex(['2022-05-04', '2022-05-11', '2

In [240]:
OI_df = pd.DataFrame(aggregate_data).set_index('Date')
OI_df['OICallDiff'] = OI_df['TotalOICall'].diff()
OI_df['OIPutDiff'] = OI_df['TotalOIPut'].diff()
OI_df['OI_Ratio'] = OI_df['TotalOICall'] / OI_df['TotalOIPut'] - 1
OI_df['OI_Ratio_diff'] = OI_df['OI_Ratio'].diff()

In [241]:
dates=sorted(Option_oi.keys())
idx_df = getIndexInterDay(start_date=dates[0])

In [242]:
idx_df = idx_df.set_index("Date")
idx_df.index = pd.to_datetime(idx_df.index)

In [243]:
for i in range(OI_df.shape[0]):
    if i < 2:
        continue
    if OI_df.loc[OI_df.index[i-1], 'TTM'] != OI_df.loc[OI_df.index[i], 'TTM']:
        continue
#     print(OI_df.index[i-1])
    if OI_df.loc[OI_df.index[i-1], 'OICallDiff'] > 0 and OI_df.loc[OI_df.index[i-1], 'OIPutDiff'] > 0:
        if OI_df.loc[OI_df.index[i-1], 'OICallDiff'] > OI_df.loc[OI_df.index[i-1], 'OIPutDiff']:
            idx_df.loc[OI_df.index[i], "Prediction"] = "C > O"
        elif OI_df.loc[OI_df.index[i-1], 'OICallDiff'] < OI_df.loc[OI_df.index[i-1], 'OIPutDiff']:
            idx_df.loc[OI_df.index[i], "Prediction"] = "C < O"
        if float(idx_df.loc[OI_df.index[i], "Close"]) > float(idx_df.loc[OI_df.index[i], "Open"]):
                idx_df.loc[OI_df.index[i], "Realized"] = "C > O"
        if float(idx_df.loc[OI_df.index[i], "Close"]) < float(idx_df.loc[OI_df.index[i], "Open"]):
            idx_df.loc[OI_df.index[i], "Realized"] = "C < O"
#         if float(idx_df.loc[OI_df.index[i], "Close"]) > float(idx_df.loc[OI_df.index[i-1], "Close"]):
#                 idx_df.loc[OI_df.index[i], "RealizedON"] = "C > O"
#         if float(idx_df.loc[OI_df.index[i], "Close"]) < float(idx_df.loc[OI_df.index[i-1], "Close"]):
#             idx_df.loc[OI_df.index[i], "RealizedON"] = "C < O"

In [244]:
com = pd.concat([OI_df,idx_df], axis=1)
com['Match'] = com['Prediction'] == com['Realized']
# com['MatchON'] = com['Prediction'] == com['RealizedON']
tmp_com = com[com["Prediction"].isnull()]
com.loc[tmp_com.index,"Match"] = float('nan')
# com.loc[tmp_com.index,"MatchON"] = float('nan')

In [246]:
com.to_csv(os.path.join(output_path, 'Summary.csv'), encoding='utf-8-sig')

In [245]:
com

Unnamed: 0_level_0,TTM,MaxOIStrikeCall,MaxOICall,TotalOICall,MaxOIStrikePut,MaxOIPut,TotalOIPut,IsTTM,OICallDiff,OIPutDiff,OI_Ratio,OI_Ratio_diff,IndexName,Open,High,Low,Close,Prediction,Realized,Match
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-05-03,202205W1,16700.0,28132,177400,16000.0,17538,138614,0,,,0.279813,,發行量加權股價指數,16592.18,16604.87,16465.99,16498.9,,,
2022-05-04,202205,17000.0,16302,123576,15800.0,16504,98620,0,-53824.0,-39994.0,0.253052,-0.026761,發行量加權股價指數,16498.9,16617.06,16498.9,16565.83,,,
2022-05-05,202205W2,17000.0,18378,111040,16200.0,8210,66940,0,-12536.0,-31680.0,0.658799,0.405747,發行量加權股價指數,16565.83,16783.78,16565.83,16696.12,,,
2022-05-06,202205W2,16800.0,25436,160334,16000.0,13954,97076,0,49294.0,30136.0,0.651634,-0.007165,發行量加權股價指數,16696.12,16696.12,16312.17,16408.2,,,
2022-05-09,202205W2,16400.0,22450,162588,16000.0,16706,115724,0,2254.0,18648.0,0.404964,-0.24667,發行量加權股價指數,16408.2,16408.2,16048.92,16048.92,C > O,C < O,0.0
2022-05-10,202205W2,16200.0,19180,145940,15700.0,19140,149184,0,-16648.0,33460.0,-0.021745,-0.426708,發行量加權股價指數,16048.92,16071.5,15734.44,16061.7,C < O,C > O,0.0
2022-05-11,202205,17000.0,21040,154664,15500.0,18056,123174,0,8724.0,-26010.0,0.255655,0.2774,發行量加權股價指數,16061.7,16081.15,15953.27,16006.25,,,
2022-05-12,202205,16500.0,27444,198678,15500.0,23074,162090,0,44014.0,38916.0,0.225726,-0.029928,發行量加權股價指數,16006.25,16006.25,15616.68,15616.68,,,
2022-05-13,202205,16500.0,27122,200142,15000.0,27566,193146,0,1464.0,31056.0,0.036221,-0.189505,發行量加權股價指數,15616.68,15860.69,15616.68,15832.54,C > O,C > O,1.0
2022-05-16,202205,16300.0,33948,238168,15500.0,31446,213042,0,38026.0,19896.0,0.117939,0.081718,發行量加權股價指數,15832.54,16032.85,15832.54,15901.04,C < O,C > O,0.0
