#Parse the Log File

In [None]:
import re
from collections import defaultdict

# Function to parse the log file and extract required data
def parse_log(file_path):
    with open(file_path, 'r') as log_file:
        log_data = log_file.readlines()

    ip_addresses = []
    endpoints = []
    failed_logins = defaultdict(int)

    for line in log_data:
        # Extract IP address
        ip_match = re.match(r"(\d+\.\d+\.\d+\.\d+)", line)
        if ip_match:
            ip = ip_match.group(1)
            ip_addresses.append(ip)

        # Extract endpoint
        endpoint_match = re.search(r'"[A-Z]+\s(\/[^\s]*)\s', line)
        if endpoint_match:
            endpoint = endpoint_match.group(1)
            endpoints.append(endpoint)

        # Detect failed login attempts (status 401)
        if ' 401 ' in line and "Invalid credentials" in line:
            # Increment the failed login count for the respective IP address
            failed_logins[ip] += 1

    return ip_addresses, endpoints, failed_logins


#Count Requests per IP Address

---



In [None]:
# Count requests per IP and sort in descending order
def count_requests_per_ip(ip_addresses):
    ip_count = Counter(ip_addresses)
    sorted_ips = sorted(ip_count.items(), key=lambda x: x[1], reverse=True)
    return sorted_ips


#Find the Most Frequently Accessed Endpoint

In [None]:
# Identify the most accessed endpoint
def find_most_accessed_endpoint(endpoints):
    endpoint_count = Counter(endpoints)
    most_accessed = max(endpoint_count.items(), key=lambda x: x[1])
    return most_accessed


#Detect Suspicious Activit

In [None]:
def display_suspicious_activity(failed_logins, threshold=2):

    print("\nSuspicious Activity Detected:")
    print(f"{'IP Address':<20}{'Failed Login Attempts'}")

    # Flag to check if any suspicious activity is found
    suspicious_found = False

    for ip, count in failed_logins.items():
        # Only show IP addresses with failed login attempts exceeding the threshold
        if count > threshold:
            print(f"{ip:<20}{count}")
            suspicious_found = True

    if not suspicious_found:
        print("No suspicious activity detected.")


#Save Results to a CSV File

In [None]:
# Save the results to a CSV file
def save_to_csv(ip_data, endpoint_data, suspicious_data, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write IP request count
        writer.writerow(["IP Address", "Request Count"])
        writer.writerows(ip_data)

        # Write most accessed endpoint
        writer.writerow([])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow(endpoint_data)

        # Write suspicious activity
        writer.writerow([])
        writer.writerow(["IP Address", "Failed Login Count"])
        writer.writerows(suspicious_data.items())


#Main Function to Integrate All Components

In [None]:
def main():
    # File paths
    log_file_path = "sample.log"
    output_file_path = "log_analysis_results.csv"

    # Parse the log file
    ip_addresses, endpoints, failed_logins = parse_log(log_file_path)

    # Analyze the log data
    ip_data = count_requests_per_ip(ip_addresses)
    most_accessed_endpoint = find_most_accessed_endpoint(endpoints)
    suspicious_activity = detect_suspicious_activity(failed_logins,2)

    # Display results
    print("IP Address           Resquest Count")
    for ip, count in ip_data:
        print(f"{ip:<20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    print("IP Address           Failed Login Attempts")

    for ip, count in suspicious_activity.items():
        print(f"{ip:<20} {count}")

    # Save results to CSV
    save_to_csv(ip_data, most_accessed_endpoint, suspicious_activity, output_file_path)
    print("\nResults saved to log_analysis_results.csv")

if __name__ == "__main__":
    main()


IP Address           Resquest Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5          8
192.168.1.100        5

Results saved to log_analysis_results.csv
