In [1]:
from collections import defaultdict
import csv
import os

In [2]:
# Define constants
FAILED_LOGIN_THRESHOLD = 10

In [3]:
def process_log_file(log_file):
    ip_counts = defaultdict(int)
    endpoint_counts = defaultdict(int)
    failed_logins = defaultdict(int)

    # Read log file
    with open(log_file, 'r') as file:
        for line in file:
            # Skip non-log lines (meta lines like 'Log file created.')
            if 'Log file created.' in line:
                continue

            # Split the line into parts and check if it's a valid log entry
            parts = line.strip().split(" ")

            # Check if the line has enough elements to avoid index errors
            if len(parts) > 8:  # Ensures there are enough parts in the line
                ip_address = parts[0]
                endpoint = parts[6]
                status_code = parts[8]
                error_message = "Invalid credentials" in line

                # Count requests per IP address
                ip_counts[ip_address] += 1

                # Count endpoint accesses
                endpoint_counts[endpoint] += 1

                # Detect failed login attempts (status 401 or 'Invalid credentials' message)
                if status_code == '401' or error_message:
                    failed_logins[ip_address] += 1
            else:
                print(f"Skipping line: '{line.strip()}' - insufficient elements.")  # Print skipped lines
                continue  # Move to next line in file

    # Sorting results
    sorted_ip_counts = sorted(ip_counts.items(), key=lambda x: x[1], reverse=True)

    # Check if endpoint_counts is empty before calling max
    if endpoint_counts:
        most_accessed_endpoint = max(endpoint_counts.items(), key=lambda x: x[1])
    else:
        most_accessed_endpoint = ("N/A", 0)  # Handle empty case

    # Find suspicious activity based on failed logins
    suspicious_activity = {ip: count for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD}

    return sorted_ip_counts, most_accessed_endpoint, suspicious_activity

In [4]:
# Function to display and save results
def display_and_save_results(sorted_ip_counts, most_accessed_endpoint, suspicious_activity):
    # Displaying results
    print("IP Address\t\tRequest Count")
    for ip, count in sorted_ip_counts:
        print(f"{ip}\t\t{count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    if suspicious_activity:
        print("\nSuspicious Activity Detected:")
        print("IP Address           Failed Login Attempts")
        for ip, count in suspicious_activity.items():
            print(f"{ip}        {count}")
    else:
        print("\nNo suspicious activity detected.")

    # Save results to CSV
    with open('log_analysis_results.csv', 'w', newline='') as csvfile:
        fieldnames = ['IP Address', 'Request Count']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for ip, count in sorted_ip_counts:
            writer.writerow({'IP Address': ip, 'Request Count': count})

        # Save most accessed endpoint
        writer.writerow({'IP Address': most_accessed_endpoint[0], 'Request Count': most_accessed_endpoint[1]})

        # Save suspicious activity
        for ip, count in suspicious_activity.items():
            writer.writerow({'IP Address': ip, 'Request Count': count})


In [5]:
# Main function to run the script
def main():
    log_file = "sample.log"  # Path to the log file
    sorted_ip_counts, most_accessed_endpoint, suspicious_activity = process_log_file(log_file)
    display_and_save_results(sorted_ip_counts, most_accessed_endpoint, suspicious_activity)

if __name__ == "__main__":
    main()

IP Address		Request Count
203.0.113.5		8
198.51.100.23		8
192.168.1.1		7
10.0.0.2		6
192.168.1.100		5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

No suspicious activity detected.
