In [None]:
import re
import csv
import pandas as pd
from datetime import datetime
from google.colab import files

In [None]:
pattern = re.compile(
    r"^(?P<month>\w{3})\s+(?P<day>\d{1,2})\s+(?P<time>\d{2}:\d{2}:\d{2})\s+(?P<host>\S+)\s+(?P<process>[^\[:]+)(?:\[(?P<pid>\d+)\])?:\s+(?P<message>.*)$"
)
ALL_FIELDS = [
    "_time", "hostname", "process", "pid", "message",
    "user", "file", "severity", "action", "command", "source_ip", "service"
]

def parse_timestamp(month, day, time_str):
    try:
        year = datetime.now().year
        dt = datetime.strptime(f"{year} {month} {day} {time_str}", "%Y %b %d %H:%M:%S")
        return dt.isoformat()
    except Exception as e:
        #print(f"Failed to parse timestamp: {month} {day} {time_str} → {e}")
        return ""
def extract_additional_fields(message):
    fields = {}

    #action
    for action in ['Started', 'Stopped', 'Failed', 'Finished', 'Created', 'Mounted']:
        if message.startswith(action):
            fields['action'] = action
            break

    # User detection
    user_match = re.search(r"user(?:name)?=([^\s]+)", message)
    if user_match:
        fields['user'] = user_match.group(1)

    # File detection
    file_match = re.search(r"(/\S+)", message)
    if file_match:
        fields['file'] = file_match.group(1)

    # Source IP
    ip_match = re.search(r"from (\d{1,3}(?:\.\d{1,3}){3})", message)
    if ip_match:
        fields['source_ip'] = ip_match.group(1)

    # Severity
    if "error" in message.lower():
        fields["severity"] = "error"
    elif "warn" in message.lower():
        fields["severity"] = "warning"
    elif "fail" in message.lower():
        fields["severity"] = "critical"

    return fields


matchedCsv = "structuredLogs.csv"
unmatchedCsv = "unmathedLogs.csv"

with open("logs.txt", "r") as infile, \
     open(matchedCsv, "w", newline="") as outfile, \
     open(unmatchedCsv, "w") as unmatched_file:

    writer = csv.DictWriter(outfile, fieldnames=ALL_FIELDS)
    writer.writeheader()

    for line in infile:

        match = pattern.match(line)
        if match:
            data = match.groupdict()
            row = dict.fromkeys(ALL_FIELDS, "")
            #print("RAW LINE:", line)
           # print("MATCHED:", data)
            # Basic fields
            row["_time"] = parse_timestamp(data["month"], data["day"], data["time"])
            row["hostname"] = data["host"]
            row["process"] = data["process"]
            row["pid"] = data.get("pid") or ""
            row["message"] = data["message"]

            # Additional fields
            extra = extract_additional_fields(data["message"])
            row.update(extra)

            writer.writerow(row)
        else:
            unmatched_file.write(line)


In [3]:

df = pd.read_csv("structuredLogs.csv")


df["_time"] = pd.to_datetime(df["_time"], errors="coerce")


target_time = datetime.strptime("2025-07-28 15:41:44", "%Y-%m-%d %H:%M:%S")
delta = timedelta(minutes=10)

filtered = df[(df["_time"] >= target_time - delta) & (df["_time"] <= target_time + delta)]

filtered.to_csv("filtered_logs.csv", index=False)
files.download("filtered_logs.csv")


  df = pd.read_csv("structuredLogs.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>