In [None]:
import glob
import pandas as pd

In [None]:
folder_path = '/home/ketan/repos/ibTrack/VCTPL/LogProcessing/src/reader_logs/log_files'
res = glob.glob(f"{folder_path}/**/*.txt", recursive=True)
print(res)

In [None]:
def load_file_to_df(filename):
    """This function takes a file name as an input and loads it into a dataframe
    and returns a dataframe"""
    try:
        df_file = pd.read_csv(filename,
                              sep=" : ",
                              header=None,
                              names=["Reader_IP", "Tag ID", "TEMP"], engine="python")
    except FileNotFoundError as fnfe:
        df_file = None
        print(f"{filename} not found. Please check the folder selection and try again")

    if df_file is not None:
        df_file.head()

    return df_file


In [None]:
df_all_files = pd.DataFrame()
for file in res:
    print(f"Start importing file {file}")
    df_all_files = pd.concat([df_all_files, load_file_to_df(file)],
                             axis=0,
                             ignore_index=True)


df_all_files.head()

df_all_files.info()

In [None]:
def df_preprocess(df):
    # processing the dataframe to get the relevant data in the format that can be processed
    df[["RSSI", "Date", "Time", "AMPM"]] = df["TEMP"].str.split(" ", expand=True)
    df["TimeStamp"] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format="%m/%d/%Y %H:%M:%S")
    df = df.drop(labels=["Date", "Time", "AMPM", "TEMP"], axis=1)
    
    df.sort_values(by=["Tag ID", "Reader_IP", "TimeStamp"])
    df.reset_index()

    # df_by_group = df.groupby(by=["Tag ID", "Reader_IP"], sort=True, group_keys=True)
    # df_by_group = df.groupby(by=["Tag ID", "Reader_IP"], sort=True, group_keys=True)

    # return df_by_group
    return df

In [None]:
df_processed = df_preprocess(df_all_files)

df_processed.info()

df_processed.head(150)

df_processed.tail()

In [None]:
df_processed.info()

In [None]:
df_processed["RSSI"] = df_processed["RSSI"].astype(float)
df_processed['TimeStamp'] = pd.to_datetime(df_processed['TimeStamp'])

In [None]:
df_processed['batch'] = (df_processed['Reader_IP'] != df_processed['Reader_IP'].shift(1)) | (df_processed['Tag ID'] != df_processed['Tag ID'].shift(1)) | (df_processed['TimeStamp'].shift(1) - df_processed['TimeStamp'] > pd.Timedelta(1,'h'))

In [None]:
df_processed.head()

In [None]:
# Create a batch number for each set of consecutive rows with the same 'sensor_id' and 'reader_id'
df_processed['timestamp_int'] = df_processed['TimeStamp'].astype(int)
df_processed['batch_number'] = (df_processed['batch']).cumsum()


In [None]:
df_grouped = df_processed.groupby(["Tag ID", "Reader_IP", "batch_number"])

In [None]:
agg_functions = {
    'TimeStamp': ['min', 'max'],
    'RSSI': 'max'
}

In [None]:
result_df = df_grouped.agg(agg_functions).reset_index()

In [None]:
result_df.head()

In [None]:
# result_df['TimeStamp'] = pd.to_datetime(result_df['TimeStamp'])

In [None]:
print(result_df)

In [None]:
result_df.to_csv