### Testing for parsing some KPIs from log file.

In [48]:
import pandas as pd
import re
import datetime

def extract_box_name(line):
    match = re.findall(r'([A-Z]+_\d+)', str(line))
    if match:
        return match[0]
    else:
        return None

def extract_line(line):
    match = re.findall(r'([A-Z]+_\d+)_(.+)', str(line))
    if match:
        return match[0][1]
    else:
        return None

# read data
df = pd.read_excel('Onlinelog.xlsm', sheet_name='Survey', skiprows=2)

# handle date and time
df['Time'] = df['Time'].astype(str).str.split('.').str[0]
df['Date_Time'] = pd.to_datetime(df['Date'].dt.strftime('%Y-%m-%d') + ' ' + df['Time'])

# extract box_name and line from "Run Line/Target" using regex
df['box_name'] = df['Run Line/Target'].apply(extract_box_name)
df['line'] = df['Run Line/Target'].apply(extract_line)

# filter Log on and Log off events
df = df[df['Event'].isin(['Log on', 'Log off', 'Log on DNP'])]

# sort the values
df = df.sort_values(['box_name', 'line', 'Date_Time'])

# list to store rows
rows = []

# iterate over the dataframe
for box_name in df['box_name'].dropna().unique():
    temp_df = df[df['box_name'] == box_name]
    for line in temp_df['line'].dropna().unique():
        line_df = temp_df[temp_df['line'] == line]
        log_on = line_df[line_df['Event'].isin(['Log on', 'Log on DNP'])]
        log_off = line_df[line_df['Event'] == 'Log off']
        
        if not log_on.empty and not log_off.empty:
            duration_seconds = int((log_off.iloc[-1]['Date_Time'] - log_on.iloc[0]['Date_Time']).total_seconds())
            duration = str(datetime.timedelta(seconds=duration_seconds))

            multiple_runs = f"Multiple runs: {len(log_on)}" if len(log_on) > 1 else ""
            row = {
                'box_name': box_name,
                'line': line,
                'operator': log_on.iloc[0]['Op.'],
                'start_time': log_on.iloc[0]['Date_Time'].strftime('%Y-%m-%d %H:%M:%S'),
                'end_time': log_off.iloc[-1]['Date_Time'].strftime('%Y-%m-%d %H:%M:%S'),
                'duration': duration,
                'multiple_runs': multiple_runs
            }
            rows.append(row)

# create dataframe from rows
result = pd.DataFrame(rows)

# save to excel
result.to_excel('output.xlsx', index=False)
