#### Name : Pranjal Keshav Patil
#### Roll No. : 45

#### Assignment No. 02

##### You are given a large log file containing various system events. Each line in the log file follows this format:
##### [YYYY-MM-DD HH:MM:SS] [LOG_LEVEL] [MODULE] Message
#### where:
##### • YYYY-MM-DD HH:MM:SS is a timestamp.
##### • LOG_LEVEL can be INFO, WARN, ERROR, or DEBUG.
##### • MODULE represents the system module name (alphanumeric, can contain underscores).
##### • Message is the actual log message (it may contain any characters).
#### task is :
##### Write a function extract_critical_errors(log_data: str) -> list[tuple] that takes a multiline string log_data (containing log entries) and returns a list of tuples containing:
##### 1. The timestamp
##### 2. The module name
##### 3. The error message
#### BUT only if:
##### • The LOG_LEVEL is ERROR.
##### • The message contains at least one IP address in IPv4 format (xxx.xxx.xxx.xxx, where xxx is in the range 0-255).
##### • The message contains a hexadecimal error code, formatted as 0x followed by exactly 8 hexadecimal digits (0-9, A-F).

In [14]:
import re

def extract_critical_errors(log_data: str):
    # Full regex pattern that captures timestamp, log level, module, and message
    pattern = re.compile(
        r'\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\] '  # Timestamp
        r'\[ERROR\] '  # Log level must be ERROR
        r'\[(?P<module>[A-Za-z0-9_]+)\] '  # Module name
        r'(?P<message>.*?)(?=\n|$)',  # Message with non-greedy match
        re.MULTILINE  # Handle multi-line log_data
    )

    # Regex to match a valid IPv4 address
    ip_pattern = r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
    
    # Regex to match a hexadecimal error code
    hex_pattern = r'0x[0-9A-Fa-f]{8}'

    # List to store the results as tuples
    critical_errors = []

    # Find all matching log entries
    matches = pattern.finditer(log_data)
    for match in matches:
        timestamp = match.group('timestamp')
        module = match.group('module')
        message = match.group('message')

        # Check if the message contains both an IP address and a hexadecimal error code
        ip_match = re.search(ip_pattern, message)
        hex_match = re.search(hex_pattern, message)

        # If both conditions are met, append the result to the list
        if ip_match and hex_match:
            critical_errors.append((timestamp, module, message))

    return critical_errors


# Example input
log_data = '''[2025-02-10 14:23:01] [INFO] [Auth_Module] User login successful.
[2025-02-10 15:45:32] [ERROR] [Net_Module] Connection timeout from 192.168.1.10. Error Code: 0xAB12CD34
[2025-02-10 16:01:10] [WARN] [Disk_Module] Low disk space warning.
[2025-02-10 17:12:05] [ERROR] [Security_Module] Unauthorized access detected from 10.0.0.5. Error Code: 0xDEADBEEF'''

# Call the function with the log data
critical_errors = extract_critical_errors(log_data)

# Print the result
for error in critical_errors:
    print(error)


('2025-02-10 15:45:32', 'Net_Module', 'Connection timeout from 192.168.1.10. Error Code: 0xAB12CD34')
('2025-02-10 17:12:05', 'Security_Module', 'Unauthorized access detected from 10.0.0.5. Error Code: 0xDEADBEEF')
