In [1]:
import glob
import pandas as pd

In [2]:
folder_path = '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files'
res = glob.glob(f"{folder_path}/**/*.txt", recursive=True)
print(res)

['/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane3/Rfid_Reader_Data_03-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane5/Rfid_Reader_Data_03-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane2/Rfid_Reader_Data_03-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane1/Rfid_Reader_Data_03-02-2024.txt', '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane4/Rfid_Reader_Data_03-02-2024.txt']


In [3]:
def load_file_to_df(filename):
    """This function takes a file name as an input and loads it into a dataframe
    and returns a dataframe"""
    try:
        df_file = pd.read_csv(filename,
                              sep=" : ",
                              header=None,
                              names=["Reader_IP", "Tag ID", "TEMP"], engine="python")
    except FileNotFoundError as fnfe:
        df_file = None
        print(f"{filename} not found. Please check the folder selection and try again")

    if df_file is not None:
        df_file.head()

    return df_file


In [5]:
df_all_files = pd.DataFrame()
for file in res:
    print(f"Start importing file {file}")
    df_all_files = pd.concat([df_all_files, load_file_to_df(file)],
                             axis=0,
                             ignore_index=True)


Start importing file /home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane3/Rfid_Reader_Data_03-02-2024.txt
Start importing file /home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane5/Rfid_Reader_Data_03-02-2024.txt
Start importing file /home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane2/Rfid_Reader_Data_03-02-2024.txt
Start importing file /home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane1/Rfid_Reader_Data_03-02-2024.txt
Start importing file /home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files/Lane4/Rfid_Reader_Data_03-02-2024.txt


df_all_files.head()

df_all_files.info()

In [6]:
def df_preprocess(df):
    # processing the dataframe to get the relevant data in the format that can be processed
    df[["RSSI", "Date", "Time", "AMPM"]] = df["TEMP"].str.split(" ", expand=True)
    df["TimeStamp"] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format="%m/%d/%Y %H:%M:%S")
    df = df.drop(labels=["Date", "Time", "AMPM", "TEMP"], axis=1)
    
    df.sort_values(by=["Tag ID", "Reader_IP", "TimeStamp"])
    df.reset_index()

    # df_by_group = df.groupby(by=["Tag ID", "Reader_IP"], sort=True, group_keys=True)
    # df_by_group = df.groupby(by=["Tag ID", "Reader_IP"], sort=True, group_keys=True)

    # return df_by_group
    return df

In [7]:
df_processed = df_preprocess(df_all_files)

df_processed.info()

df_processed.head(150)

df_processed.tail()

In [8]:
df_processed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 728340 entries, 0 to 728339
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   Reader_IP  728340 non-null  object        
 1   Tag ID     728340 non-null  object        
 2   RSSI       728340 non-null  object        
 3   TimeStamp  728340 non-null  datetime64[ns]
dtypes: datetime64[ns](1), object(3)
memory usage: 22.2+ MB


In [9]:
df_processed["RSSI"] = df_processed["RSSI"].astype(float)
df_processed['TimeStamp'] = pd.to_datetime(df_processed['TimeStamp'])

In [10]:
df_processed['batch'] = (df_processed['Reader_IP'] != df_processed['Reader_IP'].shift(1)) | (df_processed['Tag ID'] != df_processed['Tag ID'].shift(1)) | (df_processed['TimeStamp'].shift(1) - df_processed['TimeStamp'] > pd.Timedelta(1,'h'))

In [17]:
df_processed.head()

Unnamed: 0,Reader_IP,Tag ID,RSSI,TimeStamp,batch
0,192.168.15.232,34161FA820328EE8092E7020,61.6,2024-02-03 00:00:05,True
1,192.168.15.232,34161FA820328EE8092E7020,60.8,2024-02-03 00:00:05,False
2,192.168.15.232,34161FA820328EE8092E7020,60.0,2024-02-03 00:00:05,False
3,192.168.15.232,34161FA820328EE8092E7020,62.4,2024-02-03 00:00:05,False
4,192.168.15.232,34161FA820328EE8092E7020,64.0,2024-02-03 00:00:05,False


In [16]:
# Create a batch number for each set of consecutive rows with the same 'sensor_id' and 'reader_id'

df_processed['batch_number'] = df_processed.groupby(['Tag ID', 'Reader_IP', 'batch']).cumsum()

TypeError: datetime64 type does not support cumsum operations

In [None]:
df_grouped = df_processed.groupby(["Tag ID", "Reader_IP", "batch_number"])

In [None]:
agg_functions = {
    'TimeStamp': ['min', 'max'],
    'RSSI': 'max'
}

In [None]:
result_df = df_grouped.agg(agg_functions).reset_index()

In [None]:
print(result_df)

In [None]:
result_df.to_csv