# Hard-coded Version

In [21]:
import pandas as pd
import re

#Step 1: Loading the 3 Data Files: Data Extraction Fields (Excel sheet that can be amended anytime), Planned Test Cases Data, Log Files

#Loading the list of desired output fields from the Excel, file path should be updated accordingly to user's own file path
fields_df = pd.read_excel("C:/Users/zolyn/OneDrive/Desktop/BT4103/Files to Parse In/Data Extraction Fields.xlsx", header=None)
desired_fields = fields_df[0].dropna().tolist()

# Load the planned test cases CSV file, file path should be updated accordingly to user's own file path
planned_df = pd.read_csv("C:/Users/zolyn/OneDrive/Desktop/BT4103/Files to Parse In/refined_security_test_cases.csv")

# Read and split logs by separator, file path should be updated accordingly to user's own file path
with open("C:/Users/zolyn/OneDrive/Desktop/BT4103/Files to Parse In/final_updated_logs.txt", "r", encoding="utf-8") as f:
    log_lines = f.read().split("======================================================")

#Step 2: Parsing Logs in Pairs (Request + Response)

paired_log_entries = []
i = 0
while i < len(log_lines) - 1:
    req_block = log_lines[i].strip()
    res_block = log_lines[i + 1].strip()
    i += 2  #Move to the next pair

    lines = [line.strip() for line in req_block.splitlines() if line.strip()]
    log_info = {
        "Date of Test Executed": "",
        "Project ID": "",
        "User ID": "",
        "Test Case ID": "",
        "User Agent": "",
        "Request Method": "",
        "Requested Source": "",
        "Test Case Outcome Message": "",
        "Error Message": "",
        "Response Size": ""
    }

    for line in lines:
        if re.match(r"^(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH)", line):
            log_info["Request Method"] = line.split()[0]
            log_info["Requested Source"] = line.split()[1]
        elif "User-Agent:" in line:
            log_info["User Agent"] = line.split("User-Agent:")[-1].strip()
        elif "Project ID:" in line:
            log_info["Project ID"] = line.split("Project ID:")[-1].strip()
        elif "User ID:" in line:
            log_info["User ID"] = line.split("User ID:")[-1].strip()
        elif "Test Case ID:" in line:
            log_info["Test Case ID"] = line.split("Test Case ID:")[-1].strip()

    #Parse the corresponding response block
    lines = [line.strip() for line in res_block.splitlines() if line.strip()]
    for line in lines:
        if line.startswith("HTTP"):
            log_info["Test Case Outcome Message"] = "Pass" if "200 OK" in line else "Fail"
            log_info["Error Message"] = "" if "200 OK" in line else line
        elif line.lower().startswith("date:"):
            log_info["Date of Test Executed"] = line.split(":", 1)[-1].strip()
        elif "Content-Length:" in line:
            log_info["Response Size"] = line.split("Content-Length:")[-1].strip()

    if log_info["Test Case ID"] and log_info["Project ID"] and log_info["User ID"]:
        paired_log_entries.append(log_info)

#Step 3: Merge Logs with Planned Test Cases

logs_df = pd.DataFrame(paired_log_entries)
merged_df = pd.merge(
    planned_df, logs_df,
    on=["Project ID", "User ID", "Test Case ID"],
    how="left"
)

#Add manual input columns
merged_df["Actual Test Case Outcome"] = ""
merged_df["Reason for failure/success"] = ""

#Step 4: Match Column Names Case-Insensitive

#Normalize column names to match even with case differences
column_mapping = {col.lower(): col for col in merged_df.columns}
normalized_desired = [col.lower() for col in desired_fields]
final_columns = [column_mapping[col] for col in normalized_desired if col in column_mapping]

#Filter only final export columns
final_output = merged_df[final_columns]

#Step 5: Export to CSV, file path should be updated accordingly to user's own file path
final_output.to_csv("C:/Users/zolyn/OneDrive/Desktop/BT4103/Output Files from Logs Standardisation System/hardcode_logs_output.csv", index=False)

# More Dynamic Version of the code

In [23]:
import pandas as pd
import re

#STEP 1: Load Files

#Excel: list of export fields, file path should be updated accordingly to user's own file path
field_df = pd.read_excel("C:/Users/zolyn/OneDrive/Desktop/BT4103/Files to Parse In/Data Extraction Fields.xlsx", header=None)
field_df.columns = ["Field Name", "Manual Input"]
field_df["Field Name"] = field_df["Field Name"].str.strip()
field_df["Manual Input"] = field_df["Manual Input"].fillna("").astype(str).str.lower()

#Extract desired fields and which ones are manual input fields
desired_fields = field_df["Field Name"].tolist()
manual_fields = field_df[field_df["Manual Input"].isin(["yes", "true", "manual"])]["Field Name"].tolist()

#CSV: planned test cases, file path should be updated accordingly to user's own file path
planned_df = pd.read_csv("C:/Users/zolyn/OneDrive/Desktop/BT4103/Files to Parse In/refined_security_test_cases.csv")

#TXT: raw logs, file path should be updated accordingly to user's own file path
with open("C:/Users/zolyn/OneDrive/Desktop/BT4103/Files to Parse In/final_updated_logs.txt", "r", encoding="utf-8") as f:
    log_blocks = f.read().split("======================================================")

In [25]:
#STEP 2: Parse Logs in Pairs

parsed_logs = []
i = 0
while i < len(log_blocks) - 1:
    req = log_blocks[i].strip()
    res = log_blocks[i + 1].strip()
    i += 2

    entry = {
        "Date of Test Executed": "",
        "Project ID": "",
        "User ID": "",
        "Test Case ID": "",
        "User Agent": "",
        "Request Method": "",
        "Requested Source": "",
        "Test Case Outcome Message": "",
        "Error Message": "",
        "Response Size": ""
    }

    for line in req.splitlines():
        line = line.strip()
        if not line:
            continue
        if re.match(r"^(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH)", line):
            parts = line.split()
            entry["Request Method"] = parts[0]
            entry["Requested Source"] = parts[1]
        elif "User-Agent:" in line:
            entry["User Agent"] = line.split("User-Agent:")[-1].strip()
        elif "Project ID:" in line:
            entry["Project ID"] = line.split("Project ID:")[-1].strip()
        elif "User ID:" in line:
            entry["User ID"] = line.split("User ID:")[-1].strip()
        elif "Test Case ID:" in line:
            entry["Test Case ID"] = line.split("Test Case ID:")[-1].strip()

    for line in res.splitlines():
        line = line.strip()
        if not line:
            continue
        if line.startswith("HTTP"):
            entry["Test Case Outcome Message"] = "Pass" if "200 OK" in line else "Fail"
            entry["Error Message"] = "" if "200 OK" in line else line
        elif line.lower().startswith("date:"):
            entry["Date of Test Executed"] = line.split(":", 1)[-1].strip()
        elif "Content-Length:" in line:
            entry["Response Size"] = line.split("Content-Length:")[-1].strip()

    if entry["Project ID"] and entry["User ID"] and entry["Test Case ID"]:
        parsed_logs.append(entry)

logs_df = pd.DataFrame(parsed_logs)

#STEP 3: Merge Logs with Planned Cases

merged_df = pd.merge(
    planned_df,
    logs_df,
    on=["Project ID", "User ID", "Test Case ID"],
    how="left"
)

#STEP 4: Add Manual Input Fields if Missing

for col in manual_fields:
    normalized_col = col.strip().lower()
    if normalized_col not in [c.strip().lower() for c in merged_df.columns]:
        merged_df[col] = ""

#STEP 5: Normalise and Align Columns

# Normalize for case-insensitive column matching
merged_df.columns = [col.strip().lower() for col in merged_df.columns]
normalized_columns_map = {col.lower(): col for col in desired_fields}
final_cols_lower = [col.strip().lower() for col in desired_fields]

#Ensure all desired fields exist
for col in final_cols_lower:
    if col not in merged_df.columns:
        merged_df[col] = ""

#Reorder and restore original column names
final_output = merged_df[final_cols_lower]
final_output.columns = desired_fields  # Pretty headers

#STEP 6: Export to CSV, file path should be updated accordingly to user's own file path
final_output.to_csv("C:/Users/zolyn/OneDrive/Desktop/BT4103/Output Files from Logs Standardisation System/dynamic_logs_output.csv", index=False)