In [1]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re
import math
from datetime import date
import matplotlib.pyplot as plt

# 展開所有dataframe columns
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option ('display.max_colwidth',500)

# 不縮寫成科學記號
pd.set_option('display.float_format',lambda x : '%.2f' % x)

db = sqlite3.connect('./stock.db' , isolation_level=None)

In [2]:
# 下次換季時間： 8/14, 11/14, 3/31, 5/15
QUARTERS = ['2022q2','2022q3','2022q4','2023q1']
CURRENT_QUARTER = QUARTERS[-1][-2:]
CURRENT_QUARTERS = list(map(lambda q: q[-2:], QUARTERS))
CURRENT_YEAR = int(QUARTERS[-1][0:4])
LAST_YEAR_QUARTER = '2022q4'
CURRENT_YEARS = [2018, 2019, 2020, 2021, 2022]
print("QUARTERS:", QUARTERS)
print("CURRENT_QUARTER:", CURRENT_QUARTER)
print("CURRENT_QUARTERS:", CURRENT_QUARTERS)
print("CURRENT_YEAR:", CURRENT_YEAR)
print("LAST_YEAR_QUARTER:", LAST_YEAR_QUARTER)
print("CURRENT_YEARS:", CURRENT_YEARS)

QUARTERS: ['2022q2', '2022q3', '2022q4', '2023q1']
CURRENT_QUARTER: q1
CURRENT_QUARTERS: ['q2', 'q3', 'q4', 'q1']
CURRENT_YEAR: 2023
LAST_YEAR_QUARTER: 2022q4
CURRENT_YEARS: [2018, 2019, 2020, 2021, 2022]


In [3]:
# 查詢股本table
df_stock_id_name = pd.read_sql_query("select * from stockIdName",db)
print(len(df_stock_id_name))
df_stock_id_name.head()

1804


Unnamed: 0,id,name,listingDate,market,industry,capital
0,1101,台泥,1962/02/09,上市,水泥工業,73.56
1,1102,亞泥,1962/06/08,上市,水泥工業,35.46
2,1103,嘉泥,1969/11/14,上市,水泥工業,7.75
3,1104,環泥,1971/02/01,上市,水泥工業,6.54
4,1108,幸福,1990/06/06,上市,水泥工業,4.05


In [4]:
out = df_stock_id_name[['id', 'name', 'market', 'industry', 'capital']]
out.head()

Unnamed: 0,id,name,market,industry,capital
0,1101,台泥,上市,水泥工業,73.56
1,1102,亞泥,上市,水泥工業,35.46
2,1103,嘉泥,上市,水泥工業,7.75
3,1104,環泥,上市,水泥工業,6.54
4,1108,幸福,上市,水泥工業,4.05


In [5]:
ids = out['id'].tolist()

# 用的到的table(加速運算時間,不用每個id都查db)

In [38]:
# 月營收
monthly = pd.read_sql_query(f"select * from monthlyRevenue order by date" , db)
# 日收盤
daily = pd.read_sql_query("select * from daily",db)
daily = daily.sort_values(by=['date'])
# 現金流量表
cashflow = pd.read_sql_query("select * from cashflow order by date",db)
# 財報
df_financial_statement = pd.read_sql_query("select * from financialStatement order by date",db)
df_financial_statement = df_financial_statement.fillna(0)
df_financial_statement.replace('--', 0, inplace=True)
# 負債表
df_debt = pd.read_sql_query(f"select * from balanceSheet order by date",db)

# YoY > 0筆數

月營收yoy>0筆數

In [7]:
def countYoYGreaterThan0(id):
    df = monthly[(monthly['id']==id) & (monthly['YoY'] > 0)]
    
    try:
        return len(df)
    except:
        return np.nan
    
out['yoyIncreaseCount'] = [countYoYGreaterThan0(id) for id in ids]

# 月營收

In [8]:
# 近12月營收平均
def TTMRevenueCal(id):
    df = monthly[(monthly['id']==id)].tail(12)
    
    try:
        TTMRevenue = round((df['revenue'].sum()/100000),2)
        TTMYoY = round(df['sumYoY'].mean() ,2)
        return TTMRevenue, TTMYoY
    except:
        return np.nan

out['TTMRevenue'] = [TTMRevenueCal(id)[0] for id in ids]
out['TTMYoY'] = [TTMRevenueCal(id)[1] for id in ids]

In [9]:
out[out['id'].isin([2330,3037])]

Unnamed: 0,id,name,market,industry,capital,yoyIncreaseCount,TTMRevenue,TTMYoY
274,2330,台積電,上市,半導體業,259.32,96,22190.02,20.06
536,3037,欣興,上市,電子零組件業,15.24,80,1227.34,6.72


# 毛利率(Gross Profit Margin)
# 營業利益率(Operating profit Margin)
# 淨利率(Net profit margin)

In [10]:
df_financial_statement['grossProfitMargin'] = df_financial_statement['grossProfit'].div(df_financial_statement['revenue'])
df_financial_statement['operatingProfitMargin'] = df_financial_statement['operatingIncome'].div(df_financial_statement['revenue'])
df_financial_statement['netProfitMargin'] = df_financial_statement['income'].div(df_financial_statement['revenue'])

In [11]:
df_financial_statement[df_financial_statement['id']==2454]

Unnamed: 0,date,id,name,revenue,grossProfit,operatingIncome,incomeBeforeTax,income,eps,qeps,grossProfitMargin,operatingProfitMargin,netProfitMargin
371,2022q1,2454,聯發科,142710849,71783375,36467204.0,37970104.0,33413257.0,21.02,21.02,0.5,0.26,0.23
1340,2022q2,2454,聯發科,298440670,148547660,75648167.0,78907419.0,69025258.0,43.4,22.38,0.5,0.25,0.23
2316,2022q3,2454,聯發科,440601946,218643069,108702197.0,114503907.0,100110619.0,62.95,19.55,0.5,0.25,0.23
3294,2022q4,2454,聯發科,548796030,270904435,126788452.0,135561243.0,118625021.0,74.59,11.64,0.49,0.23,0.22
7515,2023q1,2454,聯發科,95651513,45912242,14368762.0,19103771.0,16890491.0,10.64,10.64,0.48,0.15,0.18
9306,2021q1,2454,聯發科,108032982,48520384,20197898.0,29075654.0,25777244.0,16.21,16.21,0.45,0.19,0.24
10255,2021q2,2454,聯發科,233686385,106558821,49032212.0,60662756.0,53363882.0,33.65,17.44,0.46,0.21,0.23
11229,2021q3,2454,聯發科,364760705,167774928,78319103.0,92852732.0,81725033.0,51.57,17.92,0.46,0.21,0.22
12190,2021q4,2454,聯發科,493414582,231604595,108040234.0,126852053.0,111872533.0,70.56,18.99,0.47,0.22,0.23
16364,2020q1,2454,聯發科,60862975,26237103,5802289.0,6730250.0,5804475.0,3.64,3.64,0.43,0.1,0.1


In [12]:
# 近四季毛利率、營業利益率、淨利率
def writeToOut(id, date, col):
    df = df_financial_statement[(df_financial_statement['date']==date) & (df_financial_statement['id']==id)]
    try: 
        return round(df[col].iloc[0], 2)
    except:
        return np.nan

cols = ['grossProfitMargin', 'operatingProfitMargin', 'netProfitMargin']

# for col in cols:
#     for idx, quarter in enumerate(QUARTERS):
#         out[f'{col}{CURRENT_QUARTERS[idx]}'] = [writeToOut(id, quarter, col) for id in ids]

# writeToOut(2330, '2023q1', 'grossProfitMargin')

In [13]:
# 近五年毛利率,營業利益率,淨利率 (too slow !!!)
def findYearFinancialStatement(id, year, col):
    try:
        return round(df_financial_statement[(df_financial_statement['id']==id) & (df_financial_statement['date'].str.contains(str(year)+'q4'))][col].iloc[0], 2)
    except:
        return np.nan

# for col in cols:
#     for year in CURRENT_YEARS:
#         out[f"{year}{col}"] = [findYearFinancialStatement(id, year, col) for id in ids]

In [14]:
out[out['id'].isin([2330,2454,3037])]

Unnamed: 0,id,name,market,industry,capital,yoyIncreaseCount,TTMRevenue,TTMYoY
274,2330,台積電,上市,半導體業,259.32,96,22190.02,20.06
360,2454,聯發科,上市,半導體業,16.0,86,4350.15,-14.63
536,3037,欣興,上市,電子零組件業,15.24,80,1227.34,6.72


# EPS

In [15]:
# 近四季eps
def nearFourQuarterEPS(id):
    try:
        fid = df_financial_statement['id'] == id
        return round(df_financial_statement[fid].sort_values(by=['date']).tail(4)['qeps'].sum(), 2)
    except:
        return np.nan
    
out['eps'] = [nearFourQuarterEPS(id) for id in ids]

In [16]:
# 上年eps
def lastYearEPS(id):
    try:
        isId = df_financial_statement['id'] == id
        isLastYear = df_financial_statement['date'].str.contains(LAST_YEAR_QUARTER)
        return df_financial_statement[isId & isLastYear]['eps'].iloc[0]
    except:
        return np.nan 
    
out['last_eps'] = [lastYearEPS(id) for id in ids]

# 收盤價

In [17]:
def recentClose(id):
    try:
        return daily[daily['id']==id].tail(1).iloc[0]['close']
    except:
        return np.nan
    
out['收盤價'] = [recentClose(id) for id in ids]

# 現金流量

In [20]:
# 近四季自由現金
def calFreeCashFlow(id):
    try:
        df = cashflow[cashflow['id']==id].tail(4)
        return df['qcashflow'].sum() / 1e5
    except:
        return np.nan

out['freeCashFlow'] = [calFreeCashFlow(id) for id in ids]

In [22]:
# 近5年自由現金流平均
def FiveYearsFreeCashFlowAvg(id):
    try:
        df = cashflow[(cashflow['id']==id) & (cashflow['date'].str.contains('q4'))].tail(5)
        freeCash = df['operating'] + df['investing']
        return round(freeCash.mean() / 1e5, 2)
    except:
        return np.nan
    
out['freeCashFlow5Y'] = [FiveYearsFreeCashFlowAvg(id) for id in ids]

# 負債

In [23]:
def debt(id):
    df = df_debt[df_debt['id']==id].tail(1)
    df['debtRate'] = df['liabilities'] / df['asset']
    try:
        return round(df['debtRate'].iloc[0] ,2)
    except:
        return np.nan

out['debt'] = [debt(id) for id in ids]

# 市值

In [24]:
out['marketCap'] = out['收盤價'] * out['capital']
df_marketCap = out.sort_values(by=['marketCap'], ascending=False).head(50)
df_marketCap[['id', 'name', 'industry', 'capital', 'marketCap', 'eps', '收盤價', ]].head(10)

Unnamed: 0,id,name,industry,capital,marketCap,eps,收盤價
274,2330,台積電,半導體業,259.32,141588.72,39.36,546.0
267,2317,鴻海,其他電子業,138.63,15041.35,9.02,108.5
360,2454,聯發科,半導體業,16.0,10864.0,64.21,679.0
262,2308,台達電,電子零組件業,25.98,9274.86,12.91,357.0
329,2412,中華電,通信網路業,77.57,9153.26,4.77,118.0
477,2881,富邦金,金融保險業,139.95,9152.73,0.73,65.4
311,2382,廣達,電腦及週邊設備業,38.63,8633.81,7.44,223.5
826,6505,台塑化,油電燃氣業,95.26,7801.79,0.55,81.9
478,2882,國泰金,金融保險業,162.03,7526.29,0.46,46.45
260,2303,聯電,半導體業,125.05,5671.02,6.79,45.35


# MA

In [25]:
def ma(id, day):
    df_close = daily[daily['id'] == id]
    try:
        return round(df_close.tail(day)['close'].mean(), 2)
    except:
        return np.nan
out['ma20'] = [ma(id, 20) for id in ids]
out['ma60'] = [ma(id, 60) for id in ids]
out['ma120'] = [ma(id, 120) for id in ids]

# PE, EPS_INCREASE, PEG

In [26]:
# pe
series_pe = round(out['收盤價']/out['eps'] ,2)
out['pe'] = series_pe

# eps_increase
out['eps_increase'] = round(100*(out['eps']-out['last_eps'])/out['last_eps'], 2)

# peg
out['peg'] = round(out['pe']/out['eps_increase'], 2)

In [27]:
def cal_avg_pe(id):
    df = daily[daily['id']==id]
    try:
        return round(df['pe'].mean(), 2)
    except:
        return np.nan

out['avg_pe'] = [cal_avg_pe(id) for id in ids]

## 筆數

In [28]:
def count_daily(id):
    df = daily[daily['id']==id]
    return len(df)

out['count'] = [count_daily(id) for id in ids]

## DCF

In [29]:
def epsDcf(eps, cagr = 0.02):
    r = 0.08   # 折現率, 希望報酬, 大盤平均報酬, WACC
    g = 0.02  # 永續成長率
    COUNT_YEAR = 5
    EV = 0     # 企業價值(每年eps折現加總+最終價值tv)

    for year in range(1, COUNT_YEAR + 1):
        FCF = round(eps * pow(1 + cagr, year), 2)
        discountFCF = round(FCF / pow((1 + r), year), 2)
        EV += discountFCF

        if(year == COUNT_YEAR):
            TV = (discountFCF * (1 + g)) / (r - g)
            TV = round(TV / (pow((1 + r), COUNT_YEAR)) , 2)
            EV += TV
    
    return EV

out['epsDcf'] = epsDcf(out['eps'])

In [30]:
def cashflowDcf(cashflow, cagr = 0.02):
    r = 0.08   # 折現率, 希望報酬, 大盤平均報酬, WACC
    g = 0.02  # 永續成長率
    COUNT_YEAR = 5
    EV = 0     # 企業價值(每年eps折現加總+最終價值tv)

    for year in range(1, COUNT_YEAR + 1):
        FCF = round(cashflow * pow(1 + cagr, year), 2)
        discountFCF = round(FCF / pow((1 + r), year), 2)
        EV += discountFCF

        if(year == COUNT_YEAR):
            TV = (discountFCF * (1 + g)) / (r - g)
            TV = round(TV / (pow((1 + r), COUNT_YEAR)) , 2)
            EV += TV
    return EV

def calCashflowDcf(id, cagr = 0.02):
    df = out[out['id']==id]
    freeCashFlow = df['freeCashFlow'].iloc[0]
    
    df_ids = df_stock_id_name[df_stock_id_name['id']==id]
    capital = df_ids['capital'].iloc[0]
    
    return round(cashflowDcf(freeCashFlow, cagr)/capital, 2)

out['cashflowDcf'] = [calCashflowDcf(id) for id in ids]

  return round(cashflowDcf(freeCashFlow, cagr)/capital, 2)
  return round(cashflowDcf(freeCashFlow, cagr)/capital, 2)


In [31]:
out[out['id'].isin([2454,2330])]

Unnamed: 0,id,name,market,industry,capital,yoyIncreaseCount,TTMRevenue,TTMYoY,eps,last_eps,收盤價,freeCashFlow,freeCashFlow5Y,debt,marketCap,ma20,ma60,ma120,pe,eps_increase,peg,avg_pe,count,epsDcf,cashflowDcf
274,2330,台積電,上市,半導體業,259.32,96,22190.02,20.06,39.36,39.2,546.0,4485.88,2856.75,0.39,141588.72,566.0,565.7,540.55,13.87,0.41,33.83,21.38,1364,508.58,223.49
360,2454,聯發科,上市,半導體業,16.0,86,4350.15,-14.63,64.21,74.59,679.0,761.11,541.82,0.45,10864.0,686.3,709.73,719.0,10.57,-13.92,-0.76,21.91,1364,829.58,614.58


# ROE

In [49]:
def roe(id):
    try:
        debt = df_debt[df_debt['id']==id].tail(1)
        shareholderEquity = debt['shareholderEquity'].iloc[0]

        financial_statement = df_financial_statement[df_financial_statement['id']==id].tail(1)
        income = financial_statement['income'].iloc[0]
        return round(income*100/shareholderEquity , 2)
    except:
        return np.nan

out['roe'] = [roe(id) for id in ids]

In [55]:
features = out[['id','name','market','industry','count','capital','marketCap','debt','ma20','ma60','ma120','TTMRevenue','TTMYoY','freeCashFlow','freeCashFlow5Y','roe','last_eps','eps','eps_increase','yoyIncreaseCount','pe','avg_pe','peg','收盤價','epsDcf','cashflowDcf']]
features.to_csv(f"features.csv", index = False)

# 統計數字

In [None]:
today = '20230811'
print('總家數:', len(out))
print('市值大於50億家數:', len(out[out['marketCap'] > 50]))
print(f'市值加總: {round(out["marketCap"].sum()/1e4, 2)}兆')

In [None]:
# 在季線上家數
upMa60 = len(out[out['收盤價'] > out['ma60']])
percent = round(len(out[out['收盤價'] > out['ma60']])*100/len(out), 2)
print(f'在季線上家數: {upMa60} ({percent}%)')
upMa120 = len(out[out['收盤價'] > out['ma120']])
percent120 = round(len(out[out['收盤價'] > out['ma120']])*100/len(out), 2)
print(f'在半年線上家數: {upMa120} ({percent120}%)')


# 選股
* 本益比 < 20
* eps > 3
* marketCap(市值) > 50億
* yoyCount(yoy>0次數) > 0
* industry(產業) 排除生技醫療業、航運業
* grossmargin(毛利率) > 0.2
* profit(淨利率), operating(營業利益率) > 0
* cashflowSum(現金流) > 0
* 自由現金流為正機率 > 80%
* count至少有一年資料 > 240

In [None]:
filter = out
industry = (filter['industry'] != '生技醫療業') & (filter['industry'] != '航運業') & (filter['industry'] != '文化創意業')
eps = (filter['eps'] > 3) 
pe = (filter['pe'] <= 20)
marketCap = (filter['marketCap'] > 50) 
cashflowSum = (filter['cashflowSum'] > 0)
yoyCount = (filter['yoy>0次數'] > 0)
grossmargin = (filter['毛利率q1'] > 0.2) & (filter['毛利率q2'] > 0.2) & (filter['毛利率q3'] > 0.2) & (filter['毛利率q4'] > 0.2)
operating = (filter['營業利益率q1'] > 0) & (filter['營業利益率q2'] > 0) & (filter['營業利益率q3'] > 0) & (filter['營業利益率q4'] > 0)
profit = (filter['淨利率q1'] > 0) & (filter['淨利率q2'] > 0) & (filter['淨利率q3'] > 0) & (filter['淨利率q4'] > 0)
cashflowPositiveRate = (filter['cashflowPositiveRate'] >= 80)
count = (filter['count'] > 240)

filter1 = filter[count&pe&industry&eps&marketCap&cashflowSum&profit&operating&grossmargin&cashflowPositiveRate] 

print(f'{len(filter1)} 筆')

In [None]:
# 產業分布
group = filter1.groupby(by=['industry']).agg({'id': len, 'name': ', '.join}).sort_values(by=['id'], ascending=False)
group

In [None]:
filter1.to_csv(f"filter1.csv", index = False)

In [None]:
ma20 = filter1['ma20']
ma60 = filter1['ma60']
ma120 = filter1['ma120']
ma20_60 = ma20 >= ma60
ma60_120 = ma60 >= ma120
ma20_120 = ma20 >= ma120
print('多頭結構：')
filter_long = filter1[ma20_60 & ma60_120 & ma20_120]
long_ids = filter_long['id'].tolist()
filter_long

In [None]:
dev = 2
diff20_60 = abs((ma20-ma60)*100/ma60) <= dev
diff60_120 = abs((ma60-ma120)*100/ma120) <= dev
diff20_120 = abs((ma20-ma120)*100/ma120) <= dev

print('均線糾結:')
filter_torn = filter1[diff20_60 & diff60_120 & diff20_120]
torn_ids = filter_torn['id'].tolist()
filter_torn

In [None]:
# filter在季線上機率
print(f"{round(len(filter1[filter1['收盤價'] >= filter1['ma60']])*100/len(filter1), 2)}%")

## MA分佈機率

In [None]:
# ma乖離機率
max_days = 120
def dev_ma(id, days):
    df_close = daily[daily['id'] == id][['date','id','name','close']]
    series = df_close.close
    ma = series.rolling(days).mean().tolist()[max_days - 1:]
    return ma

def cal_probability(list, threshold, now):
    p = round(np.sum(list > threshold)/len(list) * 100, 2) if threshold >= 0 else round(np.sum(list < threshold)/len(list) * 100, 2) 
    isInThreshold = ((threshold + 0.05) > now/100 > threshold)
    return isInThreshold, p
#     return isInThreshold, f'{p}% ←' if isInThreshold else f'{p}%'

In [None]:
df_prob = pd.DataFrame(columns=['id', 'name', '週期', '30%','25%','20%','15%', '10%', '5%', '0%', '-5%', '-10%', '-15%', '-20%', '-25%', '-30%'])

def cal_prob(id):
    df_id = out[out['id']==id]
    name = df_id['name'].iloc[0]
    new_row = {'id': id, 'name': name }
    ma20 = dev_ma(id, 20)
    ma60 = dev_ma(id, 60)
    ma120 = dev_ma(id, 120)
    dev_ma20_ma60 = np.divide(ma20, ma60) - 1
    dev_ma20_ma120 = np.divide(ma20, ma120) - 1
    now_ma20 = out[out['id']==id]['ma20'].iloc[0]
    now_ma60 = out[out['id']==id]['ma60'].iloc[0]
    now_ma120 = out[out['id']==id]['ma120'].iloc[0]
    now_ma20_ma60 = round(100 * (np.divide(now_ma20, now_ma60) - 1), 2)
    now_ma20_ma120 = round(100 * (np.divide(now_ma20, now_ma120) - 1), 2)
    
    for type in ['季線']:
        flag = False
        for percent in range(30, -30-1, -5):
            new_row['週期'] = type
            now_ma = now_ma20_ma60 if type == '季線' else now_ma20_ma120
            dev_ma1 = dev_ma20_ma60 if type == '季線' else dev_ma20_ma120
            isInThreshold, prob = cal_probability(dev_ma1, percent/100, now_ma)
            new_row[f'{percent}%'] = f"{prob}% ←" if isInThreshold else f"{prob}%"
            if isInThreshold and prob < 10:
                flag = True
        if flag:
            df_prob.loc[len(df_prob)] = new_row

# 在filter1裡面ma機率分佈
[cal_prob(id) for id in filter1['id'].tolist()]
print(len(df_prob))
df_prob

# 本益比分佈機率

In [None]:
PE_MIN = 0
PE_MAX = 100

def peDistribution(id):
    pe = daily[daily['id']==id][['date','id','name','pe']]
    pe = pe[(pe['pe'] > PE_MIN) & (pe['pe'] < PE_MAX)]
    return pe['pe'].tolist()

id = 1723
pe = peDistribution(id)
y, x, _ = plt.hist(pe, 100)
df_id = out[out['id']==id]
name = df_id['name'].iloc[0]
now_pe = df_id['pe'].iloc[0]
most_pe = round(x[np.where(y == y.max())][0], 2)
eps = df_id['eps'].iloc[0]

print(f"[{len(pe)}]", name)
print('[PE] :', '目前:', now_pe, 'most:', most_pe)
print('[EPS]:','目前:', eps)
plt.axvline(now_pe, color='k', linestyle='dashed', linewidth=1)

plt.show()

In [None]:
df_target = pd.DataFrame(columns=['count', 'id', 'name', 'now_pe','most_pe', 'avg_pe', 'eps', '收盤價', '偏離平均程度'])

def countTarget(id):
    pe = peDistribution(id)
    y, x, _ = plt.hist(pe, 100)
    df_id = out[out['id']==id]
    name = df_id['name'].iloc[0]
    now_pe = df_id['pe'].iloc[0]
    most_pe = round(x[np.where(y == y.max())][0], 2)
    avg_pe = df_id['avg_pe'].iloc[0]
    eps = df_id['eps'].iloc[0]
    close = df_id['收盤價'].iloc[0]
    cal_pe = min(most_pe, avg_pe)
    

    new_row = {'count': len(pe), 
               'id':id, 
               'name':name, 
               'now_pe':now_pe, 
               'most_pe':most_pe, 
               'avg_pe': avg_pe,
               'eps':eps, 
               '收盤價':close,
               '偏離平均程度':round(100*(cal_pe/now_pe - 1), 2)
              }
    
    df_target.loc[len(df_target)] = new_row
    

[countTarget(id) for id in filter1['id'].tolist()]

In [None]:
id = df_target['id'].isin(filter1['id'].tolist())

df_target_filter = (df_target[(df_target['count'] > 0)]) # 過濾pe數量>0
df_target_filter = df_target_filter.sort_values(by=['偏離平均程度'],ascending=False)
df_target_filter = df_target_filter.reset_index(drop=True)

df_target_filter

In [None]:
# 偏離程度>20
bias = df_target_filter['偏離平均程度'] > 20
isLong = df_target_filter['id'].isin(long_ids)
isTorn = df_target_filter['id'].isin(torn_ids)
df_target_filter[bias & isLong].to_csv(f'{today}_long.csv')
df_target_filter[bias & isTorn].to_csv(f'{today}_torn.csv')
df_target_filter[bias].to_csv(f'{today}_filter.csv')
print('、'.join(df_target_filter[bias]['name'].tolist()))
df_target_filter[bias]

In [None]:
bias_ids = df_target_filter[bias]['id'].tolist()
# df_prob[df_prob['id'].isin(bias_ids)]
df_prob[df_prob['id'].isin(bias_ids)].to_csv(f'{today}_probility.csv', index=False)

# 更新每月filter

In [None]:
# 需更新local的filter.csv檔案,再依據檔案新增新的
old = pd.read_csv('filter.csv') # 20230811
new = pd.merge(old, df_target_filter[bias][['id','name','收盤價']], how='outer')
new.rename(columns = {'收盤價': today}, inplace = True)
print(len(old), len(new))
new.to_csv(f"filter.csv", index = False)
new

In [56]:
db.close()