In [546]:
import numpy as np
import pandas as pd

print('[INFO] numpy version:', np.version.full_version)
print('[INFO] pandas version:', pd.__version__)

numpy version: 1.23.3
pandas version: 1.5.0


In [548]:
# Settings
pd.set_option('display.max_columns', None)

In [588]:
# import the necessary packages
import argparse

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--file", type=str, default="./data/detections023 2.txt", help="path to input file")
ap.add_argument("-o", "--output", type=str, required=False, help="output file (must include full path)")

args = vars(ap.parse_args(""))

In [589]:
def get_input_file_path():
    print("[INFO] Get input/output parameters")
    
    from pathlib import Path

    file_path = args['file']
    path = Path(file_path)

    if path.is_file() == False:
        raise Exception("File does not exist!")

    print(f"[INFO] File: {file_path}")

    filename = path.stem
    suffix = path.suffix
    dir = path.parent

    return {
        'full_file_path': file_path,
        'filename': filename,
        'suffix': suffix,
        'path': dir
    }


In [590]:
def format_date_time():
    print('[INFO] Converting Date/Time to EPOCH')
    
    df['Date-epoch'] = (pd.to_datetime(df["Date"] + " " + df["Time"]).astype('int64') // 1e6).astype('int')
    
def add_markers_length():
    df['markers-length'] = df['Markers'].str.len()
    
def split_markers():
    print('split')
    
    # count number of max markers
    biggest_markers = df.sort_values(by=['markers-length'], inplace=False, ascending=False)[0]['Markers']
    
    print(biggest_markers)

def formar_local_manager():
    df['Analyzer-Instance'] = df['Analyzer-Instance'].str.replace('.*_', '', regex=True)
    
def split_markers(df):
    print('[INFO] Splitting "Markers" in fields')
    
    mm = df['Markers'].str.strip(to_strip='()').str.split('\)\(', expand=True)

    df1 = df
    
    for col in mm.columns:
        cols_marker = mm[col].str.strip(to_strip=']').str.split('\[|,', expand=True, regex=True).rename(columns={0:f'Marker-{col}', 1:f'Marker-{col}-X', 2:f'Marker-{col}-Y'})

        df1 = df1.join(cols_marker)
        
    return df1

def save_to_csv(full_file_path, filename, suffix, path):
    
    output = args['output']
    if(output is None):
        output = f'{path}/{filename}.csv'

    print('[INFO] Saving CSV in', output)

    selected_columns = df.columns.to_series().filter(regex='^(?!Field).*$')

    df[selected_columns].to_csv(output)    
        

In [592]:
try:
    print('[INFO] Starting parsing file')
    
    fields = ["Analyzer-Instance", "Field-2", "Date", "Time", "Field-5", "Field-6", "Field-7", "Field-8",
              "Field-9", "Field-10", "Field-11", "Field-12", "Field-13", "Field-14", "JobId", 
              "Markers", "Field-17", "Field-18"]
    
    input_file_path = get_input_file_path()
    
    df = pd.read_csv(input_file_path['full_file_path'], sep='\s+', header=None, index_col=None, names=fields)

    # Format fields
    format_date_time()
    formar_local_manager()    
    df = split_markers(df)
    save_to_csv(**input_file_path)
except Exception as inst:
    print("[ERROR]", inst)

[INFO] Starting parsing file
[INFO] Get input/output parameters
[INFO] File: ./data/detections023 2.txt
[INFO] Converting Date/Time to EPOCH
[INFO] Splitting "Markers" in fields
[INFO] Saving CSV in data/detections023 2.csv


In [535]:
# df.sort_values(by=['markers-length'], inplace=False, ascending=False)
df

Unnamed: 0,Analyzer-Instance,Field-2,Date,Time,Field-5,Field-6,Field-7,Field-8,Field-9,Field-10,Field-11,Field-12,Field-13,Field-14,JobId,Markers,Field-17,Field-18,Date-epoch,Marker-0,Marker-0-X,Marker-0-Y,Marker-1,Marker-1-X,Marker-1-Y,Marker-2,Marker-2-X,Marker-2-Y,Marker-3,Marker-3-X,Marker-3-Y
0,1,|[0m,2023-01-03,11:46:38.223,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,9cd010af-5dd7-4a7a-8ca2-2d74b8d3fa15:,"(33126453626[1765,824])",,,1672746398223,33126453626,1765,824,,,,,,,,,
1,1,|[0m,2023-01-03,11:46:42.549,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,831450b6-0be9-4ba5-9731-816859dd1b7b:,"(33126453626[1765,826])",,,1672746402548,33126453626,1765,826,,,,,,,,,
2,1,|[0m,2023-01-03,11:46:59.571,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,0de9fc2d-ca20-433f-9a1b-570671901a70:,"(33126453261[2476,1537])(33126453262[3670,1209])",,,1672746419571,33126453261,2476,1537,33126453262,3670,1209,,,,,,
3,1,|[0m,2023-01-03,11:47:08.072,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,30d0c8de-c095-43dd-831e-4a31f0e3710d:,"(33126454632[18,601])(33126453126[607,1165])(3...",,,1672746428072,33126454632,18,601,33126453126,607,1165,33126454312,2918,1469,,,
4,1,|[0m,2023-01-03,11:47:12.425,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,1d6ebb81-40a1-4a2d-bb89-752dcdb956ee:,"(33126454632[18,602])(33126453126[606,1168])(3...",,,1672746432424,33126454632,18,602,33126453126,606,1168,33126454312,2918,1469,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69912,4,|[0m,2023-01-09,09:33:10.004,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,fc667d97-6991-4d9b-9594-7995154c7951:,"(33126454632[22,603])(33126453126[608,1168])(3...",,,1673256790004,33126454632,22,603,33126453126,608,1168,33126454312,2921,1466,,,
69913,4,|[0m,2023-01-09,09:33:29.858,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,c7e661e9-57b6-4832-99af-45f39d0e9487:,"(33126453626[1768,825])",,,1673256809857,33126453626,1768,825,,,,,,,,,
69914,4,|[0m,2023-01-09,09:33:55.520,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,77baab7e-7f50-4ab6-a3d4-81772755f551:,"(33126454632[21,604])(33126453126[608,1170])(3...",,,1673256835520,33126454632,21,604,33126453126,608,1170,33126454312,2919,1469,,,
69915,4,|[0m,2023-01-09,09:34:14.421,INFO,1,---,[,main],c.z.a.worker.service.WorkerService,:,Results,for,job,eb948fd6-9fb2-4242-8ab4-9f5e1e7347da:,"(33126453626[1769,825])",,,1673256854420,33126453626,1769,825,,,,,,,,,
