In [1]:
# 为了创建抽象类，从python内置模块abc导入ABC类和 abstactmethod装饰器
from abc import ABC, abstractmethod
# 为了整理数据，导入第三方库pandas
import pandas as pd

# 公共清洗函数
def clean_sales_data(df):
    expected_cols = ["date", "order_id", "money", "province"]
    df = df.reindex(columns=expected_cols)

    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df["money"] = pd.to_numeric(df["money"], errors="coerce")

    df = df.dropna(subset=["date", "money"])
    
    return df
    
# 创建一个ABC的子类，定义为数据读取的抽象类
class FileReader(ABC):
    # 使用abstractmethod声明下面的方法为抽象方法
    @abstractmethod
    # read_data方法无法实例化必须通过子类的改写才能实例化
    def read_data(self):
        pass

# 定义一个读取txt文件的子类,通过read_data方法实现txt的读取功能
class TxtFileReader(FileReader):
    # 初始化txt读取器的属性,接收一个参数 data_path，表示要读取的文件路径
    def __init__(self, data_path):
        self.data_path = data_path
        
    # 改写父类的读取文件的抽象方法
    def read_data(self):
        # 使用pd.read_csv方法读取txt文件的数据(未加入数据清洗和补缺流程)
        df = pd.read_csv(self.data_path, header = None, names = ["date", "order_id", "money", "province"])
        return clean_sales_data(df)

# 定义一个读取json文件的子类,通过read_data方法实现json文件的读取功能
class JsonFileReader(FileReader):
    # 初始化json读取器的属性，接收一个data_path参数表示要读取文件的路径
    def __init__(self, data_path):
        self.data_path = data_path

    # 改写父类读取文件的抽象方法
    def read_data(self):
        df = pd.read_json(self.data_path)
        return clean_sales_data(df)

if __name__ == "__main__":
    txt_file_reader = TxtFileReader("data/january_sales.txt")
    
    try:
        print(txt_file_reader.read_data())
    except Exception as e:
        print("读取文件失败：", e)
    
    json_file_reader = JsonFileReader("data/february_sales.json")
    try:
        print(json_file_reader.read_data())
    except Exception as e:
        print("读取文件失败：", e)
   
    
    

          date  order_id    money  province
0   2025-01-01  ORD93810   645.52   Jiangsu
1   2025-01-01  ORD46048  1299.97  Shanghai
2   2025-01-01  ORD23434  3415.83   Sichuan
3   2025-01-01  ORD21395  2993.41   Beijing
4   2025-01-01  ORD13905   559.11  Shanghai
..         ...       ...      ...       ...
305 2025-01-31  ORD21959  2403.05  Zhejiang
306 2025-01-31  ORD53687  1673.19   Beijing
307 2025-01-31  ORD31079  1716.04   Jiangsu
308 2025-01-31  ORD74942  1512.27  Zhejiang
309 2025-01-31  ORD82102   279.82   Beijing

[310 rows x 4 columns]
          date  order_id    money  province
0   2025-02-01  ORD51224  1336.75   Beijing
1   2025-02-01  ORD62976  4338.06   Beijing
2   2025-02-01  ORD96200  4359.90  Zhejiang
3   2025-02-01  ORD64165   365.60   Sichuan
4   2025-02-01  ORD57419  3150.91  Zhejiang
..         ...       ...      ...       ...
275 2025-02-28  ORD41147  1575.64   Beijing
276 2025-02-28  ORD41252  2157.15   Sichuan
277 2025-02-28  ORD69914   407.76  Zhejiang
278 2025