In [87]:
import re
from collections import Counter
import csv


# Step 1: Parse the log file
def parse_log_file(file_path):
    logs = []
    # Regular expression to parse IP, HTTP method, endpoint, and status
    log_pattern = re.compile(
        r'(?P<ip>\d+\.\d+\.\d+\.\d+).+ "(?P<method>[A-Z]+) (?P<endpoint>[^ ]+) HTTP/.+" (?P<status>\d+)'
    )
    with open(file_path, 'r') as file:
        for line in file:
            match = log_pattern.search(line)
            if match:
                logs.append(match.groupdict())
    return logs


# Step 2: Count requests by IP
def count_requests_by_ip(logs):
    ip_counter = Counter(log['ip'] for log in logs)
    return sorted(ip_counter.items(), key=lambda x: x[1], reverse=True)


# Step 3: Find the most accessed endpoint
def find_most_accessed_endpoint(logs):
    endpoint_counter = Counter(log['endpoint'] for log in logs)
    return max(endpoint_counter.items(), key=lambda x: x[1])


# Step 4: Detect suspicious activity
def detect_suspicious_activity(logs, threshold=1):
    failed_logins = Counter(log['ip'] for log in logs if log['status'] == '401')
    return {ip: count for ip, count in failed_logins.items() if count > threshold}


# Step 5: Save results to a CSV file
def save_results_to_csv(ip_counts, most_accessed, suspicious_ips, output_file='log_analysis_results.csv'):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write Requests per IP
        writer.writerow(['Requests per IP'])
        writer.writerow(['IP Address', 'Request Count'])
        writer.writerows(ip_counts)
        writer.writerow([])  # Blank row for separation

        # Write Most Accessed Endpoint
        writer.writerow(['Most Accessed Endpoint'])
        writer.writerow(['Endpoint', 'Access Count'])
        writer.writerow(most_accessed)
        writer.writerow([])  # Blank row for separation

        # Write Suspicious Activity
        writer.writerow(['Suspicious Activity'])
        writer.writerow(['IP Address', 'Failed Login Count'])
        writer.writerows(suspicious_ips.items())


# Step 6: Main function
def main():
    log_file = 'sample.log'  # Specify the log file name
    logs = parse_log_file(log_file)

    # Count requests by IP
    ip_counts = count_requests_by_ip(logs)
    print("IP Address           Request Count")
    for ip, count in ip_counts:
        print(f"{ip:<20}{count}")

    # Find the most accessed endpoint
    most_accessed = find_most_accessed_endpoint(logs)
    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed[0]} (Accessed {most_accessed[1]} times)")

    # Detect suspicious activity
    suspicious_ips = detect_suspicious_activity(logs,threshold=1)
    print("\nSuspicious Activity Detected:")
    print("IP Address           Failed Login Attempts")
    for ip, count in suspicious_ips.items():
        print(f"{ip:<20}{count}")

    # Save results to CSV
    save_results_to_csv(ip_counts, most_accessed, suspicious_ips)
    print("\nResults saved to log_analysis_results.csv")


if __name__ == "__main__":
    main()


IP Address           Request Count
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5         8
192.168.1.100       5

Results saved to log_analysis_results.csv
