In [1]:
import pandas as pd
import glob
import os
import datetime
import re
import json

In [11]:
# location folders
folder_importbackup = r'D:\FEWSProjecten\DMS\ImportBackup\STA'
folder_ftpstatus_status = r'D:\FEWSProjecten\DMS\ftpStatusFiles'
folder_ftpstatus_data = r'D:\FEWSProjecten\DMS\asFromFtpPrimaryroot\STA'
file_monitor_out = r'D:\FEWSProjecten\DMS\prtg_out.json'

# folder_ftpstatus_stata = r"D:\MyProjects\PR3916_Vincent\Werkmap\Python\Check_Data_Import\dummy_data\ftpStatusFiles"
# folder_ftpstatus_data = r"D:\MyProjects\PR3916_Vincent\Werkmap\Python\Check_Data_Import\dummy_data\asFromFtpPrimaryroot\STA"
# file = r'D:\MyProjects\PR3916_Vincent\Werkmap\Python\prtg_out.json'

In [12]:
# %% Function to read _STA files from ftp and check if data is available by reading STA files from ftp (from Mattijn)
def timestamp_status_data(folder_ftpstatus_status, folder_ftpstatus_data):
    
    list_of_status_files = glob.glob(os.path.join(folder_ftpstatus_status,'*STA_*.json'))

    list_of_status_files = [list_of_status_files[i] for i in [0, -1]]

    df_status_data_sensor_loc_ts = pd.DataFrame()
    for status_file in list_of_status_files:
        status_loc = os.path.basename(status_file).split('_')
        
        if len(status_loc) == 4:
            key_status_loc = '{}'.format(status_loc[2])
        elif len(status_loc) == 5:
            key_status_loc = '{}_{}'.format(status_loc[2], status_loc[3])    

        # get creation time status file
        status_file_ts = datetime.datetime.utcfromtimestamp(os.path.getctime(status_file)) 

        # get path of status data file
        df_status_status = pd.read_json(status_file, typ='series', orient='values')
        status_file_ts = pd.to_datetime(df_status_status['ts'])
        status_data_file = df_status_status['fn']
 
        # get path of status data file
        #status_data_file = pd.read_json(status_file, typ='series', orient='values')['fn']
        path_status_data_file = os.path.join(folder_ftpstatus_data, 
                                     key_status_loc.replace('_','\\'), 
                                     status_data_file)

        # read status data file
        df_status_data = pd.read_csv(path_status_data_file, skiprows=[0,2,3])    

        sensors = ['ETRO_Status', 'ISA_Status', 'YSI_Status', 'AQD_Status', 'VEC_Status', 'RDI_Status'] 
        s_status_sensors = df_status_data[sensors].isin([-1]).any()
        s_status_sensors.index = [sensor.split('_')[0] for sensor in sensors]    

        df_status_sensors = s_status_sensors.to_frame()
        df_status_sensors.reset_index(inplace=True)
        df_status_sensors.columns = ['data_sensor', 'data_available']

        df_status_sensors.loc[:,'file_ftpstatus'] = status_file
        df_status_sensors.loc[:,'key_ftpstatus'] = key_status_loc
        df_status_sensors.loc[:,'ts_ftpstatus'] = status_file_ts 
        
        df_status_data_sensor_loc_ts = df_status_data_sensor_loc_ts.append(df_status_sensors)     
    
    return df_status_data_sensor_loc_ts  

# %% Rename certain parameters in dataframe

def rename_df(df_status_data_sensor_loc_ts):

    # Rename the instruments
    instr_sta = ['ETRO', 'ISA', 'YSI', 'AQD', 'VEC', 'RDI']
    instr_ftp = ['STB', 'ALT', 'MPP', 'AQD', 'ADV', 'ADC']
    dict_instr = dict(zip(instr_sta, instr_ftp))
    df_status_data_sensor_loc_ts['data_sensor'] = df_status_data_sensor_loc_ts['data_sensor'].replace(dict_instr, regex=True)

    # Rename the locations
    loc_sta = ['FL65_A', 'FL65_B', 'FL65_C', 'FL69_A', 'FL69_B', 'FL69_C']
    loc_ftp = ['FL65A', 'FL65B', 'FL65C', 'FL69A', 'FL69B', 'FL69C']
    dict_loc = dict(zip(loc_sta, loc_ftp))
    df_status_data_sensor_loc_ts['key_ftpstatus'] = df_status_data_sensor_loc_ts['key_ftpstatus'].replace(dict_loc, regex=True)
    
    # Rename the columns (easier to merge dataframes later)
    df_status_data_sensor_loc_ts.rename(columns = {'key_ftpstatus':'location'}, inplace = True)
    
    df_status = df_status_data_sensor_loc_ts.copy()
    
    return df_status

# %% Dataframe of the timestamps of the _INSTR files from the ftp

def timestamp_status_instrument(folder_ftpstatus_status):
    ftp_status = []
    for file in os.listdir(folder_ftpstatus_status):
        if file.endswith(".json"):
            # read json file where fn = filename and ts = timestamp
            ftp = pd.read_json(os.path.join(folder_ftpstatus_status, file), typ='series', orient='values')
            m = re.search('(FL.*?)_.*?', ftp['fn'])
            pair = {
                    'data_sensor':ftp['fn'][0:3],
                    'location':m.group(1),
                    'ts':pd.to_datetime(ftp['ts'])
                    }
            ftp_status.append(pair)
            
    # Create dataframe from dictionary with instrument, location, timestamp        
    df_ftp = pd.DataFrame.from_dict(ftp_status)  
    
    return df_ftp

# %% Merge dataframes and compute timedelta of _STA and _INSTR

# column ts_ftpstatus originates from the timestamp of the status file of the STA file
# column ts originates from the timestamp of the status file of the instrument & location combination

def compute_timedelta(df_status, df_ftp):

    # Merge dataframes
    df = df_status.merge(right=df_ftp, left_on='data_sensor', right_on='location')

    # Compute timedelta between status import and data import
    df['ts_delta'] = df['ts_ftpstatus'] - df['ts']
            
    return df
            
# %% Json file with error message if at locations where instrument is active, the timestamp of data import is not more than 2 hours behind timestamp of status import
            
def error_message(df):
    
    # Check if timedelta is more than 2 hours and data is available
    msg_filter = [all(tup) for tup in zip(df.data_available == True, df.ts_delta > pd.Timedelta(hours=2))]
    df_msg = df#[msg_filter]

    # add columns for prtg
    df_msg.loc[:, 'channel'] = df_msg['location']+ ' - ' + df_msg['data_sensor']
    df_msg.loc[:, 'value_error'] = 0
    df_msg.loc[msg_filter, 'value_error'] = 2    
    df_msg.loc[:, 'limitmaxerror'] = 1
    df_msg.loc[:, 'limitmode'] = 1
    df_msg.loc[:,'limiterrormsg'] = 'Actieve sensor levert data op de FTP'
    df_msg.loc[msg_filter,'limiterrormsg'] = 'Melding: Aanlevering data op ftp van ' + df_msg['data_sensor'] + ' op locatie ' + df_msg['location'] + ' is zorgelijk rond ' + df_msg.ts_ftpstatus.dt.strftime('%Y-%m-%d %H:%M')

    # filter columns to output
    df_msg_out = df_msg[['channel','value_error','limitmaxerror', 'limitmode','limiterrormsg']]
    df_msg_out.columns = ['channel','value','limitmaxerror', 'limitmode','limiterrormsg']
    
    return df_msg_out

In [10]:
# %% Run functions and save result to file

df_status_data_sensor_loc_ts = timestamp_status_data(folder_ftpstatus_status, folder_ftpstatus_data)
df_status = rename_df(df_status_data_sensor_loc_ts)
df_ftp = timestamp_status_instrument(folder_ftpstatus_status)
df = compute_timedelta(df_status_data_sensor_loc_ts, df_ftp)
df_msg_out = error_message(df)

# # save to file
# error_out = {"prtg":{"result": df_msg_out.to_dict(orient='records')}}
# with open(file_monitor_out, 'w') as f:
#     print(json.dumps(error_out), file=f)  # Python 3.x

In [7]:
df

Unnamed: 0,data_sensor,data_available,file_ftpstatus,location,ts_ftpstatus,ts,ts_delta
0,ALT,True,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL65_...,FL65A,2019-05-23 12:59:58,2019-05-23 12:19:58,0 days 00:40:00
1,AQD,False,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL65_...,FL65A,2019-05-23 12:59:58,2019-02-11 15:52:15,100 days 21:07:43
2,ADV,True,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL65_...,FL65A,2019-05-23 12:59:58,2019-05-23 13:00:16,-1 days +23:59:42
3,STB,True,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL70_...,FL70,2019-05-23 12:09:55,2019-05-23 13:10:15,-1 days +22:59:40
4,MPP,False,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL70_...,FL70,2019-05-23 12:09:55,2019-02-13 11:01:30,99 days 01:08:25
5,AQD,False,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL70_...,FL70,2019-05-23 12:09:55,2019-02-13 11:01:23,99 days 01:08:32
6,ADV,False,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL70_...,FL70,2019-05-23 12:09:55,2019-04-06 23:10:04,46 days 12:59:51
7,ADC,True,D:\FEWSProjecten\DMS\ftpStatusFiles\_STA_FL70_...,FL70,2019-05-23 12:09:55,2019-05-23 12:39:57,-1 days +23:29:58
