In [None]:
import pandas as pd
import psycopg2 as pg
import os

In [None]:
failed_log = []  # 文件行数空字典

In [None]:
colfilter = [
    "sCode", "eType", "dateTime", "funcArea", "sDescription", "sFilename",
    "nSubCode", "eCPU"
]

In [None]:
replace_dic = {
    "开机": "Power up",
    "初始化": "Initializing",
    "维护": "Maintenance",
    "错误": "Error",
    "紧急停止": "Emergency stop",
    "忙": "Busy",
    "诊断": "Diagnostics",
    "准备": "Ready",
    "受控停机": "Controlled stop",
    "未连接": "Not connected",
    "温度调整": "Adjusting thermal",
    "分析仪状态从": "Analyzer Status changed from",
    "变为": "to",
    "样品": "SAMPLE",
    "试剂1": "REAGENT 1",
    "试剂2": "REAGENT 2"
}

In [None]:
Filter_List_sDescription = [
    "Analyzer Status changed from Busy to Emergency stop.",
    "Analyzer Status changed from Busy to Controlled stop.",
    "Analyzer Status changed from Controlled stop to Error.",
    "Analyzer Status changed from Controlled stop to Emergency stop.",
    "Analyzer Status changed from Initializing to Emergency stop.",
    "Analyzer Status changed from Initializing to Error.",
    "Analyzer Status changed from Maintenance to Emergency stop.",
    "Analyzer Status changed from Maintenance to Error.",
    "timeFlag"
]

In [None]:
Unselect_List_sCode = [
    "'03218", "'03004", "'02083", "'02025", "'03215", "'03188", "'02055",
    "'03184", "'01285", "'01336", "'02077", "'03007", "'03014", "'00056",
    "'03013", "'03084", "'03016", "'03085", "'03211", "'03020", "'03019",
    "'03005", "'03011", "'03018", "'03015", "'03017", "'03009", "'03010",
    "'03021"
]

In [None]:
Filter_List_funcArea = ["Analyzer", "Materials"]
Filter_List_eType = ["ERROR", "INFORMATION"]

In [None]:
def file_filter(filedir, keyword):
    allfilelist = os.listdir(filedir)
    targetfiles = [file for file in allfilelist if keyword in file]
    return targetfiles

In [None]:
def replace_desp(desp):
    if "分析仪状态从" not in desp:
        return desp
    for (cn, en) in replace_dic.items():
        desp = desp.replace(cn, en)
    return desp

In [None]:
# log_start_time, log_last_time = 0, 0

In [None]:
def read_toplog(logfullpath, filter_col, code):
    print(logfullpath)
    df_toplog = pd.read_csv(logfullpath,
                        sep="\t",
                        encoding=code,
                        usecols=filter_col,
                        parse_dates=["dateTime",])
    return df_toplog

In [None]:
def logaddsq(tlog0):  # log_days: log reserve days; 0 means all.
    tlog0 = tlog0.dropna(
        subset=["sCode", "dateTime", "eType", "funcArea", "sDescription"])
    first_line = tlog0.iloc[[0]]
    last_line = tlog0.iloc[[-1]]
    # 筛选掉无用数据
    tlog0 = tlog0[(tlog0.funcArea.isin(Filter_List_funcArea))
                  & (tlog0.eType.isin(Filter_List_eType))]
    tlog0["sDescription"] = tlog0["sDescription"].map(replace_desp)
    tlog0 = tlog0[(tlog0.eType == "ERROR")
                  | (tlog0.sDescription.isin(Filter_List_sDescription))]
    tlog0 = tlog0[~tlog0.sCode.isin(Unselect_List_sCode)]
    tlog0.reset_index(drop=True, inplace=True)
    tlog1 = tlog0.copy()
    tlog0.drop(["eType", "funcArea", "sFilename", "nSubCode", "eCPU"],
               axis=1,
               inplace=True)
    tlog1.index = tlog1.index + 1
    logwithsq = pd.merge(tlog1, tlog0, left_index=True,
                         right_index=True, suffixes=("", "SQ"))
    if logwithsq.empty:
        return logwithsq
    else:
        return pd.concat([first_line, logwithsq, last_line])

In [None]:
work_path = "D:/DataWork/AllWerfenChinaTop/posgres_source/topsta_generalLog/Data/GeneralLogs/"

In [None]:
top_list = os.listdir(work_path)

In [None]:
df_toplist= pd.DataFrame(top_list)

In [None]:
df_toplist[["serial_num","dt"]] = df_toplist[0].str.split("_", expand=True)

In [None]:
df_toplist.rename(columns={0 : "filename"}, inplace=True)

In [None]:
df_toplist.sort_values(["serial_num", "dt"], inplace=True)

In [None]:
conn = pg.connect(dbname= "mydb", user= "sirius", password= "biicf", host= "localhost")
cur = conn.cursor()

In [None]:
for id, row in df_toplist.iterrows():
    df_raw = read_toplog((work_path + row["filename"]), colfilter, "utf_16_le")
    if df_raw.dateTime.dtype != 'datetime64[ns]':
        print("************" + row["filename"] + "**************")
        failed_log.append(row["filename"])
        continue
    log_start_time = pd.to_datetime(df_raw.iloc[0, 2])
    log_end_time = pd.to_datetime(df_raw.iloc[-1, 2])
    cur.execute("SELECT MAX(datetime) FROM service.genlog_standalone WHERE topsn = %s", (row["serial_num"],))
    logtime = cur.fetchone()
    if logtime[0] == None:
        dfsq = logaddsq(df_raw)
        if dfsq.empty:
            continue
        dfsq["serial_num"] = row["serial_num"]
        for i, rw in dfsq.iterrows():
            cur.execute(
                    "INSERT INTO service.genlog_standalone \
                    (topsn, scode, etype, datetime, funcarea, sdescription, sfilename, nsubcode, ecpu, scodesq, sdescriptionsq) \
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                    (
                        rw["serial_num"], rw['sCode'], rw['eType'], rw['dateTime'], rw['funcArea'], rw['sDescription'], 
                        rw['sFilename'], rw['nSubCode'], rw['eCPU'], rw['sCodeSQ'], rw['sDescriptionSQ']
                    ) 
            )
    elif log_start_time > logtime[0] + pd.Timedelta(5, "d"):
        dfsq = logaddsq(df_raw)
        if dfsq.empty:
            continue
        dfsq["serial_num"] = row["serial_num"]
        cur.execute("DELETE FROM service.genlog_standalone WHERE topsn = %s", (row["serial_num"],))
        for i, rw in dfsq.iterrows():
            cur.execute(
                    "INSERT INTO service.genlog_standalone \
                    (topsn, scode, etype, datetime, funcarea, sdescription, sfilename, nsubcode, ecpu, scodesq, sdescriptionsq) \
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                    (
                        rw["serial_num"], rw['sCode'], rw['eType'], rw['dateTime'], rw['funcArea'], rw['sDescription'], 
                        rw['sFilename'], rw['nSubCode'], rw['eCPU'], rw['sCodeSQ'], rw['sDescriptionSQ']
                    ) 
            )
    else:
        dfsq = logaddsq(df_raw)
        dfsq["serial_num"] = row["serial_num"]
        dfsq = dfsq[dfsq["dateTime"] > logtime[0]]
        if dfsq.empty:
            continue
        for i, rw in dfsq.iterrows():
            cur.execute(
                    "INSERT INTO service.genlog_standalone \
                    (topsn, scode, etype, datetime, funcarea, sdescription, sfilename, nsubcode, ecpu, scodesq, sdescriptionsq) \
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                    (
                        rw["serial_num"], rw['sCode'], rw['eType'], rw['dateTime'], rw['funcArea'], rw['sDescription'], 
                        rw['sFilename'], rw['nSubCode'], rw['eCPU'], rw['sCodeSQ'], rw['sDescriptionSQ']
                    ) 
            )

In [None]:
conn.commit()

In [None]:
cur.close()
conn.close()

In [None]:
pd.Series(failed_log).to_csv(r"D:\DataWork\AllWerfenChinaTop\posgres_source\topsta_generalLog\failed.csv")