In [2]:
import pandas as pd
from typing import *

In [3]:
y,m,d=map(int,"2015_08_35".split("_"))
m

8

In [4]:
from typing import Sequence
from pandas import DataFrame


class StockDataBasis:
    
    def __init__(self,data_frame:pd.DataFrame,column_names=Sequence[str]) -> None:
        existed_columns = data_frame.columns
        for column_name in column_names:
            assert column_name in existed_columns, f"Key column {column_name} not found in data_frame"
        self.data_frame = data_frame
        
    def value_pairs(self, key ,key_data_frame:pd.DataFrame, value_data_frame:pd.DataFrame) -> Dict:
        raise NotImplementedError("Subclass must implement abstract method")

class AdjustFactor(StockDataBasis):
    
    def __init__(self, data_frame: DataFrame) -> None:
        super().__init__(data_frame, ["dividOperateDate","foreAdjustFactor","backAdjustFactor","adjustFactor"])
        self.data_frame[["foreAdjustFactor","backAdjustFactor","adjustFactor"]] = self.data_frame[["foreAdjustFactor","backAdjustFactor","adjustFactor"]].apply(pd.to_numeric)
    
    def get_divid_operate_date(self):
        return self.data_frame["dividOperateDate"]
    
    def get_before_adjust_factor(self):
        return self.data_frame["foreAdjustFactor"]
    
    def get_after_adjust_factor(self):
        return self.data_frame["backAdjustFactor"]
    
    def get_adjust_factor(self):
        return self.data_frame["adjustFactor"]

In [5]:
class StockHDF5Reader:
    
    def __init__(self,file_path) -> None:
        try:
            hdf =pd.HDFStore(file_path, mode='r')
        except Exception as e:
            raise Exception("Failed to open file: "+str(e))
        existed_keys=hdf.keys()
        for key in ["adjust_factor"]:
            assert "/"+key in existed_keys, f"Key {key} not found in file {file_path}"
        hdf.close()
        self.file_path = file_path
        self._adjust_factor = None
        
    def get_adjust_factor(self) -> AdjustFactor:
        if self._adjust_factor is None:
            self._adjust_factor = AdjustFactor(pd.read_hdf(self.file_path, key="adjust_factor"))
        return self._adjust_factor
    

In [6]:
test_hdf5=pd.HDFStore("../../sh.600556.h5",mode='r')
print(test_hdf5.keys())
test_hdf5.keys()
print(test_hdf5["/stock_basic"])
test_hdf5.close()

['/adjust_factor', '/balance_data', '/cash_flow_data', '/day_data', '/dividend_data', '/dupont_data', '/forecast_report', '/growth_data', '/month_data', '/operation_data', '/performance_express_report', '/profit_data', '/stock_basic', '/week_data']
        code code_name     ipoDate outDate type status
0  sh.600556       天下秀  2001-08-07            1      1


In [12]:
df=pd.read_hdf("../../sh.600556.h5", key='/stock_basic')
df

Unnamed: 0,code,code_name,ipoDate,outDate,type,status
0,sh.600556,天下秀,2001-08-07,,1,1


In [17]:
df["outDate"][0]

''

In [52]:
test_hdf5.get_adjust_factor().data_frame.index

Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], dtype='int64')

In [15]:
read_hdf.head()
read_hdf.describe()
read_hdf.columns

Index(['code', 'dividOperateDate', 'foreAdjustFactor', 'backAdjustFactor',
       'adjustFactor'],
      dtype='object')

In [14]:
read_hdf.dividOperateDate[10]

'2020-07-09'

In [22]:
from enum import StrEnum

class StockDataKey(StrEnum):
    ADJUST_FACTOR = "adjust_factor"
    STOCK_BASIC = "stock_basic"
    STOCK_DAILY = "stock_daily"
    STOCK_WEEKLY = "stock_weekly"
    STOCK_MONTHLY = "stock_monthly"
    STOCK_5MIN = "stock_5min"
    STOCK_15MIN = "stock_15min"
    STOCK_30MIN = "stock_30min"
    STOCK_60MIN = "stock_60min"
    STOCK_1MIN = "stock_1min"
    
print(StockDataKey.ADJUST_FACTOR)

adjust_factor
