In [27]:
import pandas as pd
from datetime import datetime
import os

In [37]:
def parse_log(log_path):
    with open(log_path, 'r') as file:
        logs = file.readlines()

    data = []
    current_model = {}

    for log in logs:
        parts = log.split(' - ')
        timestamp_str, log_level, message = parts[:3]
        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S,%f")

        if 'Training model' in message:
            if current_model:
                if all(key in current_model for key in ['Fake Images', 'Real Images', 'Epochs', 'Accuracy', 'Loss', 'Time Taken']):
                    data.append(current_model)
            current_model = {'Timestamp': timestamp}
            if 'fake and' in message and 'real faces' in message:
                try:
                    fake_images = int(message.split(' ')[3])
                    real_images = int(message.split(' ')[6])
                except ValueError:
                    print("ji")
                    continue  # Ignore incomplete logs
                current_model.update({'Fake Images': fake_images, 'Real Images': real_images})

            if 'epochs' in message:
                try:
                    epochs = int(message.split(' ')[-1])
                except ValueError:
                    epochs = 30
                current_model['Epochs'] = epochs

        elif 'Test Accuracy' in message:
            accuracy = float(message.split(':')[-1])
            current_model['Accuracy'] = accuracy
            if "Epochs" not in current_model:
                current_model['Epochs'] = 30 
            if current_model and all(key in current_model for key in ['Fake Images', 'Real Images', 'Epochs', 'Accuracy', 'Loss', 'Time Taken']):
                data.append(current_model)

        elif 'Test Loss' in message:
            loss = float(message.split(':')[-1])
            current_model['Loss'] = loss

        elif 'Model trained' in message:
            time_taken = timestamp - current_model['Timestamp']
            current_model['Time Taken'] = time_taken.total_seconds()/60

    if current_model and all(key in current_model for key in ['Fake Images', 'Real Images', 'Epochs', 'Accuracy', 'Loss', 'Time Taken']):
        data.append(current_model)

    return pd.DataFrame(data)

In [38]:
log_file_path = r'model_training\final\mainlogs'

finaldf = pd.DataFrame()
for log_files in os.listdir(log_file_path):
    if log_files.endswith('.log'):
        df = parse_log(os.path.join(log_file_path, log_files))
        finaldf = pd.concat([finaldf, df], ignore_index=True)

if 'Epochs' in finaldf.columns:
    finaldf['Epochs'].fillna(30, inplace=True)

finaldf.tail(10)

Unnamed: 0,Timestamp,Fake Images,Real Images,Time Taken,Loss,Accuracy,Epochs
16,2024-02-25 12:53:44.735,400,1201,1.757567,0.474072,0.823285,5
17,2024-02-25 12:53:44.735,400,1201,1.757567,0.474072,0.823285,5
18,2024-02-25 13:34:48.260,4000,1201,176.898083,0.43694,0.821268,50
19,2024-02-25 13:34:48.260,4000,1201,176.898083,0.43694,0.821268,50
20,2024-02-25 18:21:14.959,4000,1201,191.212967,0.445995,0.801409,50
21,2024-02-25 18:21:14.959,4000,1201,191.212967,0.445995,0.801409,50
22,2024-02-26 13:31:11.516,4000,1201,163.58625,0.281651,0.887892,30
23,2024-02-26 13:31:11.516,4000,1201,163.58625,0.281651,0.887892,30
24,2024-02-26 21:21:50.686,4000,1201,65.4576,0.260957,0.896861,35
25,2024-02-26 21:21:50.686,4000,1201,65.4576,0.260957,0.896861,35


In [39]:
finaldf.to_csv('model_training/final/Logs-Analysis.csv', index=False)

In [40]:
finaldf.describe()

Unnamed: 0,Timestamp,Fake Images,Real Images,Time Taken,Loss,Accuracy,Epochs
count,26,26.0,26.0,26.0,26.0,26.0,26.0
mean,2024-02-24 06:16:10.393461760,3492.307692,1201.0,86.995297,0.358264,0.852421,36.153846
min,2024-02-21 14:03:59.303000,400.0,1201.0,1.757567,0.244192,0.801409,5.0
25%,2024-02-22 07:32:59.251000064,3000.0,1201.0,22.10165,0.281651,0.823285,30.0
50%,2024-02-25 00:27:01.417999872,4000.0,1201.0,88.9074,0.347759,0.848174,30.0
75%,2024-02-25 13:34:48.260000,4000.0,1201.0,146.33435,0.43694,0.887892,50.0
max,2024-02-26 21:21:50.686000,6000.0,1201.0,191.212967,0.474072,0.90274,50.0
std,,1512.857714,0.0,64.086264,0.077236,0.034003,13.138317


In [41]:
finaldf.shape

(26, 7)

In [42]:
# sum of all the time taken
finaldf['Time Taken'].sum()

2261.8777333333337