In [None]:
import sys
import os
import shutil
import time
import datetime
import sqlite3
from loguru import logger
from io import StringIO
import shioaji as sj
from pathlib import Path
import requests
import pandas as pd
import numpy as np
from typing import List, Dict, Set, Optional
import json
from io import StringIO

""" Crawler """
from trader.pipeline.crawlers.stock_price_crawler import StockPriceCrawler
from trader.pipeline.crawlers.stock_chip_crawler import StockChipCrawler
from trader.pipeline.crawlers.stock_tick_crawler import StockTickCrawler
from trader.pipeline.crawlers.stock_info_crawler import StockInfoCrawler
from trader.pipeline.crawlers.financial_statement_crawler import (
    FinancialStatementCrawler,
)
from trader.pipeline.crawlers.monthly_revenue_report_crawler import (
    MonthlyRevenueReportCrawler,
)

""" Cleaner """
from trader.pipeline.cleaners.stock_chip_cleaner import StockChipCleaner
from trader.pipeline.cleaners.stock_price_cleaner import StockPriceCleaner
from trader.pipeline.cleaners.stock_tick_cleaner import StockTickCleaner
from trader.pipeline.cleaners.financial_statement_cleaner import (
    FinancialStatementCleaner,
)
from trader.pipeline.cleaners.monthly_revenue_report_cleaner import (
    MonthlyRevenueReportCleaner,
)

""" Loader """
from trader.pipeline.loaders.stock_chip_loader import StockChipLoader
from trader.pipeline.loaders.stock_price_loader import StockPriceLoader
from trader.pipeline.loaders.financial_statement_loader import FinancialStatementLoader
from trader.pipeline.loaders.monthly_revenue_report_loader import (
    MonthlyRevenueReportLoader,
)

""" Updater """
from trader.pipeline.updaters.stock_tick_updater import StockTickUpdater
from trader.pipeline.updaters.stock_chip_updater import StockChipUpdater
from trader.pipeline.updaters.stock_price_updater import StockPriceUpdater
from trader.pipeline.updaters.financial_statement_updater import (
    FinancialStatementUpdater,
)
from trader.pipeline.updaters.monthly_revenue_report_updater import (
    MonthlyRevenueReportUpdater,
)

""" Data API """
from trader.api.stock_chip_api import StockChipAPI
from trader.api.stock_tick_api import StockTickAPI
from trader.api.stock_price_api import StockPriceAPI
from trader.api.financial_statement_api import FinancialStatementAPI
from trader.api.monthly_revenue_report_api import MonthlyRevenueReportAPI


""" Others """
from trader.pipeline.crawlers.utils.payload import Payload
from trader.pipeline.crawlers.utils.request_utils import RequestUtils
from trader.pipeline.utils.data_utils import DataUtils
from trader.pipeline.utils import (
    URLManager,
    DataType,
    MarketType,
    FinancialStatementType,
    FileEncoding,
)
from trader.utils import ShioajiAccount, Units, TimeUtils
from trader.config import (
    PIPELINE_DOWNLOADS_PATH,
    PRICE_DOWNLOADS_PATH,
    TICK_DOWNLOADS_PATH,
    FINANCIAL_STATEMENT_DOWNLOADS_PATH,
    CHIP_DOWNLOADS_PATH,
    DOWNLOADS_METADATA_DIR_PATH,
    FINANCIAL_STATEMENT_META_DIR_PATH,
    DB_PATH,
    PRICE_TABLE_NAME,
    CHIP_TABLE_NAME,
    BALANCE_SHEET_TABLE_NAME,
    COMPREHENSIVE_INCOME_TABLE_NAME,
    CASH_FLOW_TABLE_NAME,
    LOGS_DIR_PATH,
)

### API

In [None]:
"""Monthly Revenue Report API"""

year = 2024
month = 1
mrr_api = MonthlyRevenueReportAPI()

df = mrr_api.get_range(year, year, month, 2)
df

In [None]:
"""Financial Statement API"""

# table_name = BALANCE_SHEET_TABLE_NAME
# table_name = COMPREHENSIVE_INCOME_TABLE_NAME
table_name = CASH_FLOW_TABLE_NAME
year = 2024
season = 1
fs_api = FinancialStatementAPI()

df = fs_api.get_range(table_name, year, 2024, season, 2)

In [None]:
"""Stock Price API"""

start_date = datetime.date(2025, 7, 1)
end_date = datetime.date(2025, 7, 5)
stock_id = "2330"
price = StockPriceAPI()
df = price.get_range(start_date, end_date)

In [None]:
"""Stock Tick API"""

start_date = datetime.date(2023, 7, 5)
end_date = datetime.date(2023, 7, 5)

tick = StockTickAPI()
df = tick.get_ordered_ticks(start_date, end_date)
df

In [None]:
"""Stock Chip API"""

start_date = datetime.date(2014, 12, 1)
end_date = datetime.date(2015, 1, 15)

chip = StockChipAPI()

# df = chip.get_range(start_date, end_date)
df = chip.get(start_date)

### Update

In [None]:
"""Stock Chip"""

# date = datetime.date(2014, 4, 20)
date_1 = datetime.date(2019, 12, 3)
# date_2 = datetime.date(2018, 1, 15)


# chip_crawler = StockChipCrawler()
# chip_cleaner = StockChipCleaner()
chip_loader = StockChipLoader()
# chip_updater = StockChipUpdater()
# chip_updater.update(start_date, end_date)
# chip_loader.add_to_db()

chip_loader.add_to_db(remove_files=False)
# twse_df = chip_crawler.crawl_twse_chip(date_1)
# tpex_df = chip_crawler.crawl_tpex_chip(date_1)

# twse_df = chip_cleaner.clean_twse_chip(twse_df, date_1)
# tpex_df = chip_cleaner.clean_tpex_chip(tpex_df, date)

# chip_loader.create_db()
# chip_loader.add_to_db()

In [None]:
"""Stock Price"""

# start_date = datetime.date(2013, 1, 2)
# end_date = datetime.date(2013, 1, 3)

# price_crawler = StockPriceCrawler()
# price_cleaner = StockPriceCleaner()
price_loader = StockPriceLoader()
# price_updater = StockPriceUpdater()
# price_updater.update(start_date, end_date)

# df = price_crawler.crawl_twse_price(start_date)
# df = price_crawler.crawl_tpex_price(start_date)

price_loader.add_to_db(remove_files=False)

# if df is not None:
# df = price_cleaner.clean_twse_price(df, start_date)
# df = price_cleaner.clean_tpex_price(df, start_date)

In [None]:
"""Stock Tick"""

# tick_crawler = StockTickCrawler()
# tick_cleaner = StockTickCleaner()
# tick_updater = StockTickUpdater()

# tick_updater.update(start_date, end_date)

In [None]:
"""Monthly Revenue Report Crawler"""

start_year = 2018
end_year = 2025
start_month = 5
end_month = 6

mrr_crawler = MonthlyRevenueReportCrawler()
mrr_cleaner = MonthlyRevenueReportCleaner()
# mrr_loader = MonthlyRevenueReportLoader()
# mrr_updater = MonthlyRevenueReportUpdater()

# mrr_updater.update(
#     start_year=start_year,
#     end_year=end_year,
#     start_month=start_month,
#     end_month=end_month,
# )

df_list = mrr_crawler.crawl_twse_monthly_revenue(year=start_year, month=start_month)
df = mrr_cleaner.clean_monthly_revenue(
    df_list=df_list, year=start_year, month=start_month
)

# mrr_loader.create_db()
# mrr_loader.add_to_db()

In [None]:
"""Financial Statement"""

start_year = 2013
end_year = 2025
start_season = 2
end_season = 2

fs_crawler = FinancialStatementCrawler()
fs_cleaner = FinancialStatementCleaner()
# fs_loader = FinancialStatementLoader()
# fs_updater = FinancialStatementUpdater()

# fs_updater.update_balance_sheet(start_year, end_year, start_season, end_season)
# fs_updater.update_comprehensive_income(start_year, end_year, start_season, end_season)
# fs_updater.update_cash_flow(start_year, end_year, start_season, end_season)

In [None]:
"""Balance Sheet"""

df_list = fs_crawler.crawl_balance_sheet(year=start_year, season=start_season)
# df = fs_cleaner.clean_balance_sheet(df_list, year=start_year, season=start_season)

In [None]:
"""Comprehensive Income"""

df_list = fs_crawler.crawl_comprehensive_income(start_year, start_season)
# df = fs_cleaner.clean_comprehensive_income(df_list, start_year, season)

In [None]:
"""Cash Flow"""

year = 2013
season = 4

df_list = fs_crawler.crawl_cash_flow(year, season)
df = fs_cleaner.clean_cash_flow(df_list, year, season)

In [None]:
import pandas as pd

# 模擬含有多欄位的財報資料
data = [
    {
        "stock_id": 1101,
        "公司名稱": "台泥",
        "year": 2024,
        "month": 5,
        "當月營收": 1000,
        "上月營收": 950,
    },
    {
        "stock_id": 1102,
        "公司名稱": "亞泥",
        "year": 2024,
        "month": 5,
        "當月營收": 2000,
        "上月營收": 2100,
    },
    {
        "stock_id": 1101,
        "公司名稱": "台泥",
        "year": 2024,
        "month": 5,
        "當月營收": 1000,
        "上月營收": 1950,
    },  # 重複
    {
        "stock_id": 1103,
        "公司名稱": "嘉泥",
        "year": 2024,
        "month": 5,
        "當月營收": 1500,
        "上月營收": 1600,
    },
]

df = pd.DataFrame(data)
print("✅ 原始資料：")
print(df)

# 根據關鍵欄位去除重複資料
df_dedup = df.drop_duplicates(
    subset=["stock_id", "公司名稱", "year", "month"], keep="first"
)

# 重設 index
df_dedup = df_dedup.reset_index(drop=True)
print("\n✅ 去重後並重排 index 的結果：")
print(df_dedup)