In [25]:
import glob
import pandas as pd
from datetime import datetime
from datetime import timedelta
import csv

TIME_GAP = timedelta(minutes=30)

In [26]:
def load_file_to_df(filename):
    """This function takes a file name as an input and loads it into a dataframe
    and returns a dataframe"""
    try:
        df_file = pd.read_csv(filename,
                              sep=" : ",
                              header=None,
                              names=["Reader_IP", "Tag ID", "TEMP"], engine="python")
    except FileNotFoundError as fnfe:
        df_file = None
        print(f"{filename} not found. Please check the folder selection and try again")

    if df_file is not None:
        df_file.head()

    return df_file


In [27]:
def process(df):
    """
        Following steps will be performed to process the data
        1. initialize a list to hold the final set of rows
        2. initialize paramerets that need to follow the loop for comparision
        2.1 tag_id
        2.2 groups
        3. read a row
        4. check if the tag id is same as last row. if not, create a new group
        5. 
    """
    tag_id = None
    reader_id = None
    max_rssi = None
    min_timestamp = None
    max_timestamp = None
    tag_reader_group = []
    
    for index, row in df.iterrows():
        # Handling first row
        if row["Tag ID"] is None and row["Reader_IP"] is None:
            tag_id = row["Tag ID"]
            reader_id = row["Reader_IP"]
            max_rssi = 0.0
            min_timestamp = row["TimeStamp"]
            max_timestamp = row["TimeStamp"]
            continue

        # Handling other rows
        if row["Tag ID"] == tag_id:
            if row["Reader_IP"] == reader_id:
                if row["TimeStamp"] < min_timestamp:
                    min_timestamp = row["TimeStamp"]
                if row["TimeStamp"] > max_timestamp:
                    if row["TimeStamp"] - max_timestamp > TIME_GAP:
                        tag_reader_group.append([tag_id, reader_id, min_timestamp, max_timestamp, max_rssi])
                        tag_id = row["Tag ID"]
                        reader_id = row["Reader_IP"]
                        max_rssi = 0.0
                        min_timestamp = row["TimeStamp"]
                        max_timestamp = row["TimeStamp"]
                    else:
                        max_timestamp = row["TimeStamp"]
                if row["RSSI"] > max_rssi:
                    max_rssi = row["RSSI"]
            else:
                tag_reader_group.append([tag_id, reader_id, min_timestamp, max_timestamp, max_rssi])
                reader_id = row["Reader_IP"]
                max_rssi = 0.0
                min_timestamp = row["TimeStamp"]
                max_timestamp = row["TimeStamp"]
        else:
            tag_reader_group.append([tag_id, reader_id, min_timestamp, max_timestamp, max_rssi])
            tag_id = row["Tag ID"]
            reader_id = row["Reader_IP"]
            max_rssi = 0.0
            min_timestamp = row["TimeStamp"]
            max_timestamp = row["TimeStamp"]

    print(f"Total unique rows found: {len(tag_reader_group)}")
    return tag_reader_group

In [28]:
def process2(df):
    """
        Following steps will be performed to process the data
        1. initialize a list to hold the final set of rows
        2. initialize paramerets that need to follow the loop for comparision
        2.1 tag_id
        2.2 groups
        3. read a row
        4. check if the tag id is same as last row. if not, create a new group
        5. 
    """
    tag_id = None
    reader_id = None
    max_rssi = None
    min_timestamp = None
    max_timestamp = None
    tag_reader_group = []
    
    for index, row in df.iterrows():
        # Handling first row
        if row["Tag ID"] is None and row["Reader_IP"] is None:
            tag_id = row["Tag ID"]
            reader_id = row["Reader_IP"]
            max_rssi = 0.0
            min_timestamp = row["TimeStamp"]
            max_timestamp = row["TimeStamp"]
            continue

        # Handling other rows
        if row["Tag ID"] == tag_id:
            #if row["Reader_IP"] == reader_id:
            if row["TimeStamp"] < min_timestamp:
                min_timestamp = row["TimeStamp"]
            if row["TimeStamp"] > max_timestamp:
                if row["TimeStamp"] - max_timestamp > TIME_GAP:
                    tag_reader_group.append([tag_id, reader_id, min_timestamp, max_timestamp, max_rssi])
                    tag_id = row["Tag ID"]
                    reader_id = row["Reader_IP"]
                    max_rssi = 0.0
                    min_timestamp = row["TimeStamp"]
                    max_timestamp = row["TimeStamp"]
                else:
                    max_timestamp = row["TimeStamp"]
            if row["RSSI"] > max_rssi:
                max_rssi = row["RSSI"]
            # else:
            #     tag_reader_group.append([tag_id, reader_id, min_timestamp, max_timestamp, max_rssi])
            #     reader_id = row["Reader_IP"]
            #     max_rssi = 0.0
            #     min_timestamp = row["TimeStamp"]
            #     max_timestamp = row["TimeStamp"]
        else:
            tag_reader_group.append([tag_id, reader_id, min_timestamp, max_timestamp, max_rssi])
            tag_id = row["Tag ID"]
            reader_id = row["Reader_IP"]
            max_rssi = 0.0
            min_timestamp = row["TimeStamp"]
            max_timestamp = row["TimeStamp"]

    print(f"Total unique rows found: {len(tag_reader_group)}")
    return tag_reader_group

In [29]:
def df_preprocess(df):
    # processing the dataframe to get the relevant data in the format that can be processed
    df[["RSSI", "Date", "Time", "AMPM"]] = df["TEMP"].str.split(" ", expand=True)
    df["TimeStamp"] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format="%m/%d/%Y %H:%M:%S")
    df = df.drop(labels=["Date", "Time", "AMPM", "TEMP"], axis=1)
    
    df.sort_values(by=["Tag ID", "Reader_IP", "TimeStamp"])
    df.reset_index()

    # df_by_group = df.groupby(by=["Tag ID", "Reader_IP"], sort=True, group_keys=True)
    # df_by_group = df.groupby(by=["Tag ID", "Reader_IP"], sort=True, group_keys=True)

    # return df_by_group
    return df

In [31]:
folder_path = '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/'
res = glob.glob(f"{folder_path}/**/*.txt", recursive=True)
print(res)

df_all_files = pd.DataFrame()
for file in res:
    print(f"Start importing file {file}")
    df_all_files = pd.concat([df_all_files, load_file_to_df(file)],
                             axis=0,
                             ignore_index=True)

df_processed = df_preprocess(df_all_files)
df_processed.info()
df_processed["RSSI"] = df_processed["RSSI"].astype(float)
df_processed['TimeStamp'] = pd.to_datetime(df_processed['TimeStamp'])
df_processed = df_processed.sort_values(["Tag ID", "Reader_IP", "TimeStamp"])

result_list = process(df_processed)
# [print(row) for row in result_list]

fields = ["Tag ID", "Reader IP", "Min Timestamp", "Max Timestamp", "Max RSSI"]
with open("results-7Feb.csv", 'w') as f:
    # using csv.writer method from CSV package
    write = csv.writer(f, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL, escapechar="\\" )     
    write.writerow(fields)
    write.writerows(result_list)

print ("Processing completed")
print(f"filename: {folder_path}\\results.csv")


['/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane3/Rfid_Reader_Data_06-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane3/Rfid_Reader_Data_07-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane5/Rfid_Reader_Data_06-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane5/Rfid_Reader_Data_07-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane2/Rfid_Reader_Data_06-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane2/Rfid_Reader_Data_07-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane1/Rfid_Reader_Data_06-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/7Feb/toProcess/Lane1/Rfid_Reader_Data_07-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/read