In [182]:
#Importing the required Libraries

import os
import pandas as pd # type: ignore
from datetime import datetime

In [183]:
columns=['Start Time','End Time','Duration','Connection Status'] #Header columns
log_df = pd.DataFrame(columns=columns)

In [184]:
# Creating the CSV file with the header columns

output_file = 'Log-Analysis-Report.csv'
log_df.to_csv(output_file,index=False)

In [185]:
# Function to analyze connection and disconnection times
def loganalysis(et, st):

    # Append end of the day time (23:59:59) to end times list
    start = st
    end = et
    new_end_time = datetime(end[-1].year, end[-1].month, end[-1].day, 23, 59, 59)
    end.append(new_end_time)

    temp_ct = []  # Temporary list for connection times
    temp_nct = []  # Temporary list for no connection times
    connection_times = []  # Final list for connection durations
    no_connection_times = []  # Final list for no connection durations

    # Loop through end times and compare with start times
    for i in range(len(end)):
        for j in range(len(start)):
            if start[j] < end[i]:
                temp_ct.append(start[j])  # Append valid start time to temp_ct
            else:
                temp_nct.append(start[j])  # Append disconnected start times to temp_nct

        start = [x for x in start if x not in temp_ct]  # Remove used start times

        # Calculate connected time duration
        cd = [min(temp_ct), end[i], end[i] - min(temp_ct), "Connected"]
        connection_times.append(cd)

        # If there's a disconnection, calculate no connection duration
        if temp_nct:
            ncd = [end[i], min(temp_nct), min(temp_nct) - end[i], "Disconnected"]
            no_connection_times.append(ncd)
            
        # Clear temporary lists for next iteration
        temp_ct.clear()  
        temp_nct.clear()

    return connection_times, no_connection_times

In [186]:
# Function to merge connection and no connection dataframes and sort them
def dfmerging(connection_times, no_connection_times):
    columns = ['Start Time', 'End Time', 'Time Difference', 'Status']  # Define columns for DataFrame
    df1 = pd.DataFrame(connection_times, columns=columns)  # DataFrame for connection times
    df2 = pd.DataFrame(no_connection_times, columns=columns)  # DataFrame for no connection times

    # Concatenate and sort the dataframes based on start time
    combined_df = pd.concat([df1, df2], ignore_index=True)
    combined_df.sort_values(by=['Start Time'], inplace=True)

    # Remove rows where start time and end time are the same
    final_csv = combined_df[combined_df['Start Time'] != combined_df['End Time']]
    final_csv.reset_index(drop=True, inplace=True)  # Reset index

    return final_csv

In [187]:
# Directory containing log files
log_dir = r'D:/WHMS/UBANLOG/'

st = []  # List to store start times
et = []  # List to store end times

# Loop through each day and analyze log files
for i in range(1, 32):
    file_name = f"OperaWater_Edge.log.2019-12-{i:02d}.txt"  # Generate file name for each day
    file_path = os.path.join(log_dir, file_name)  # Get full file path

    dateformat = "%Y-%m-%d %H:%M:%S"  # Date format used in the logs
    if not os.path.exists(file_path):  # Check if file exists
        print(f'File {file_name} not found')
        continue  # Skip if file is not found

    with open(file_path, 'r') as logfile:
        

        for line in logfile:
            if "Connected" in line:  # Check for connection status
                start_time = line[1:20]
                st.append(datetime.strptime(start_time, dateformat))  # Append start time
                
            if "Disconnected" in line:  # Check for disconnection status
                end_time = line[1:20]
                et.append(datetime.strptime(end_time, dateformat))  # Append end time

ct, nct = loganalysis(et, st)  # Perform log analysis
result = dfmerging(ct, nct)  # Merge and sort results

File OperaWater_Edge.log.2019-12-01.txt not found
File OperaWater_Edge.log.2019-12-02.txt not found
File OperaWater_Edge.log.2019-12-03.txt not found
File OperaWater_Edge.log.2019-12-04.txt not found
File OperaWater_Edge.log.2019-12-08.txt not found
File OperaWater_Edge.log.2019-12-09.txt not found
File OperaWater_Edge.log.2019-12-10.txt not found
File OperaWater_Edge.log.2019-12-11.txt not found
File OperaWater_Edge.log.2019-12-12.txt not found
File OperaWater_Edge.log.2019-12-13.txt not found
File OperaWater_Edge.log.2019-12-14.txt not found
File OperaWater_Edge.log.2019-12-15.txt not found
File OperaWater_Edge.log.2019-12-16.txt not found
File OperaWater_Edge.log.2019-12-17.txt not found
File OperaWater_Edge.log.2019-12-18.txt not found
File OperaWater_Edge.log.2019-12-19.txt not found
File OperaWater_Edge.log.2019-12-20.txt not found
File OperaWater_Edge.log.2019-12-21.txt not found
File OperaWater_Edge.log.2019-12-22.txt not found
File OperaWater_Edge.log.2019-12-23.txt not found


In [188]:
result #Result dataframe

Unnamed: 0,Start Time,End Time,Duration,Connection Status
0,2024-10-05 00:00:00,2024-10-05 04:00:20,0 days 04:00:20,Connected
1,2024-10-05 04:00:20,2024-10-05 06:00:00,0 days 01:59:40,Disconnected
2,2024-10-05 06:00:00,2024-10-05 13:00:50,0 days 07:00:50,Connected
3,2024-10-05 13:00:50,2024-10-05 15:00:00,0 days 01:59:10,Disconnected
4,2024-10-05 15:00:00,2024-10-06 04:15:45,0 days 13:15:45,Connected
5,2024-10-06 04:15:45,2024-10-06 06:00:00,0 days 01:44:15,Disconnected
6,2024-10-06 06:00:00,2024-10-06 16:00:00,0 days 10:00:00,Connected
7,2024-10-06 16:00:00,2024-10-06 18:00:10,0 days 02:00:10,Disconnected
8,2024-10-06 18:00:10,2024-10-07 04:05:30,0 days 10:05:20,Connected
9,2024-10-07 04:05:30,2024-10-07 06:00:00,0 days 01:54:30,Disconnected


In [189]:
result.to_csv(output_file,index=False) # Saving the result dataframe to csv

In [190]:
result.groupby('Connection Status')['Duration'].sum() #Basic analysis on Total Monthly Connection and Disconnection duration

Connection Status
Connected      2 days 12:52:34
Disconnected   0 days 11:07:25
Name: Duration, dtype: timedelta64[ns]