In [7]:
import re
from collections import Counter
import csv


FAILED_LOGIN_THRESHOLD = 10


LOG_FILE = "sample.log"
OUTPUT_FILE = "log_analysis_results.csv"

def parse_log_file(log_file):
    """Parses the log file and extracts relevant information."""
    ip_addresses = []
    endpoints = []
    failed_logins = []

    log_pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+).*\s"(?P<method>\w+)\s(?P<endpoint>.*?)\sHTTP.*"\s(?P<status>\d+).*'
    failed_login_pattern = r'"POST /login HTTP/1.1"\s401'

    with open(log_file, 'r') as file:
        for line in file:
            match = re.match(log_pattern, line)
            if match:
                ip = match.group('ip')
                endpoint = match.group('endpoint')
                status = match.group('status')

                ip_addresses.append(ip)
                endpoints.append(endpoint)


                if re.search(failed_login_pattern, line):
                    failed_logins.append(ip)

    return ip_addresses, endpoints, failed_logins

def count_requests(data):

    return Counter(data)

def detect_suspicious_activity(failed_logins):

    failed_login_counts = count_requests(failed_logins)
    suspicious_ips = {ip: count for ip, count in failed_login_counts.items() if count > FAILED_LOGIN_THRESHOLD}
    return suspicious_ips

def save_to_csv(ip_counts, most_accessed_endpoint, suspicious_ips, output_file):

    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)

        # Write Requests per IP
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_counts.items():
            writer.writerow([ip, count])

        # Write Most Accessed Endpoint
        writer.writerow([])
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

        # Write Suspicious Activity
        writer.writerow([])
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_ips.items():
            writer.writerow([ip, count])

def main():

    ip_addresses, endpoints, failed_logins = parse_log_file(LOG_FILE)


    ip_counts = count_requests(ip_addresses)

    # Identify the most frequently accessed endpoint
    endpoint_counts = count_requests(endpoints)
    most_accessed_endpoint = endpoint_counts.most_common(1)[0]

    # Detect suspicious activity
    suspicious_ips = detect_suspicious_activity(failed_logins)

    # Display results
    print("Requests per IP:")
    for ip, count in ip_counts.items():
        print(f"{ip:<20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    if suspicious_ips:
        for ip, count in suspicious_ips.items():
            print(f"{ip:<20} {count}")
    else:
        print("No suspicious activity detected.")

    # Save results to a CSV file
    save_to_csv(ip_counts, most_accessed_endpoint, suspicious_ips, OUTPUT_FILE)
    print(f"\nResults saved to {OUTPUT_FILE}")

if __name__ == "__main__":
    main()


Requests per IP:
192.168.1.1          7
203.0.113.5          8
10.0.0.2             6
198.51.100.23        8
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to log_analysis_results.csv


In [8]:
import pandas as pd
df= pd.read_csv("/content/log_analysis_results.csv")
print(df)

                           Requests per IP
IP Address                   Request Count
192.168.1.1                              7
203.0.113.5                              8
10.0.0.2                                 6
198.51.100.23                            8
192.168.1.100                            5
Most Accessed Endpoint                 NaN
Endpoint                      Access Count
/login                                  13
Suspicious Activity                    NaN
IP Address              Failed Login Count
