In [3]:
import csv
from collections import defaultdict

LOG_FILE = 'sample.log'
SUSPICIOUS_THRESHOLD = 10

def parse_log_line(line):
    parts = line.strip().split()
    if len(parts) < 9:
        return None, None, None, False
    ip = parts[0]
    try:
        request_start = line.index('"')
        request_end = line.index('"', request_start + 1)
        request = line[request_start+1:request_end]
        request_parts = request.split()
        if len(request_parts) < 2:
            return ip, None, None, False
        endpoint = request_parts[1]
    except ValueError:
        return ip, None, None, False
    remainder = line[request_end+1:].strip().split()
    if len(remainder) < 2:
        return ip, endpoint, None, False

    try:
        status_code = int(remainder[0])
    except ValueError:
        status_code = None
    invalid_credentials_flag = "Invalid credentials" in line

    return ip, endpoint, status_code, invalid_credentials_flag

def main():
    ip_counts = defaultdict(int)
    endpoint_counts = defaultdict(int)
    failed_logins = defaultdict(int)

    # Process the log file
    with open(LOG_FILE, 'r') as f:
        for line in f:
            ip, endpoint, status_code, invalid_creds = parse_log_line(line)
            if ip is None or endpoint is None:
                continue
            ip_counts[ip] += 1
            endpoint_counts[endpoint] += 1
            if invalid_creds and status_code == 401 and "/login" in endpoint:
                failed_logins[ip] += 1
    sorted_ip_counts = sorted(ip_counts.items(), key=lambda x: x[1], reverse=True)
    if endpoint_counts:
        most_accessed_endpoint, max_count = max(endpoint_counts.items(), key=lambda x: x[1])
    else:
        most_accessed_endpoint, max_count = None, 0
    suspicious_ips = [(ip, count) for ip, count in failed_logins.items() if count > SUSPICIOUS_THRESHOLD]
    print("IP Address           Request Count")
    for ip, count in sorted_ip_counts:
        print(f"{ip:<20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    if most_accessed_endpoint:
        print(f"{most_accessed_endpoint} (Accessed {max_count} times)")
    else:
        print("No endpoints found.")

    print("\nSuspicious Activity Detected:")
    if suspicious_ips:
        print("IP Address           Failed Login Attempts")
        for ip, count in suspicious_ips:
            print(f"{ip:<20} {count}")
    else:
        print("No suspicious activity detected.")
    with open('log_analysis_results.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in sorted_ip_counts:
            writer.writerow([ip, count])

        writer.writerow([])
        writer.writerow(["Endpoint", "Access Count"])
        if most_accessed_endpoint:
            writer.writerow([most_accessed_endpoint, max_count])
        else:
            writer.writerow(["None", "0"])

        writer.writerow([])
        writer.writerow(["IP Address", "Failed Login Count"])
        if suspicious_ips:
            for ip, count in suspicious_ips:
                writer.writerow([ip, count])
        else:
            writer.writerow(["No suspicious activity detected", "0"])

if __name__ == "__main__":
    main()


IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5          8
192.168.1.100        5
