<a href="https://colab.research.google.com/github/swetha-sundarams/LOG_ANALYSIS_SCRIPT_USING_PYTHON/blob/main/Log_Analysis_Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from collections import Counter
import re
import csv

def count_requests_by_ip(log_file_path):
    try:
        # Open and read the log file
        with open(log_file_path, 'r') as file:
            log_data = file.readlines()

        # Regular expression to match IP addresses
        ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'

        # Extract IP addresses from the log file
        ip_addresses = []
        for line in log_data:
            match = re.search(ip_pattern, line)
            if match:
                ip_addresses.append(match.group())

        # Count occurrences of each IP address
        ip_counts = Counter(ip_addresses)

        # Sort IP addresses by request count in descending order
        sorted_ip_counts = sorted(ip_counts.items(), key=lambda x: x[1], reverse=True)

        # Display results
        print(f"{'IP Address':<20} {'Request Count':<15}")
        print("-" * 35)
        for ip, count in sorted_ip_counts:
            print(f"{ip:<20} {count:<15}")

        return sorted_ip_counts

    except FileNotFoundError:
        print(f"Error: The file '{log_file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

def most_frequent_endpoint(log_file_path):
    try:
        # Open and read the log file
        with open(log_file_path, 'r') as file:
            log_data = file.readlines()

        # Regular expression to match endpoints (assuming they are after the HTTP method)
        endpoint_pattern = r'"[A-Z]+\s(/[^\s]*)'

        # Extract endpoints from the log file
        endpoints = []
        for line in log_data:
            match = re.search(endpoint_pattern, line)
            if match:
                endpoints.append(match.group(1))

        # Count occurrences of each endpoint
        endpoint_counts = Counter(endpoints)

        # Identify the most frequently accessed endpoint
        if endpoint_counts:
            most_common_endpoint, access_count = endpoint_counts.most_common(1)[0]
            print("Most Frequently Accessed Endpoint:")
            print(f"{most_common_endpoint} (Accessed {access_count} times)")
            return most_common_endpoint, access_count
        else:
            print("No endpoints found in the log file.")
            return None, 0

    except FileNotFoundError:
        print(f"Error: The file '{log_file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

def detect_suspicious_activity(log_file_path, threshold=10):
    try:
        # Open and read the log file
        with open(log_file_path, 'r') as file:
            log_data = file.readlines()

        # Regular expression to match failed login attempts (e.g., HTTP 401 or specific message)
        failed_login_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b.*(401|Invalid credentials)'

        # Extract IP addresses with failed login attempts
        failed_attempts = []
        for line in log_data:
            match = re.search(failed_login_pattern, line)
            if match:
                ip_match = re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', line)
                if ip_match:
                    failed_attempts.append(ip_match.group())

        # Count failed login attempts by IP address
        failed_counts = Counter(failed_attempts)

        # Filter IPs exceeding the threshold
        suspicious_ips = {ip: count for ip, count in failed_counts.items() if count > threshold}

        # Display suspicious activity
        if suspicious_ips:
            print("Suspicious Activity Detected:")
            print(f"{'IP Address':<20} {'Failed Login Attempts':<15}")
            print("-" * 40)
            for ip, count in suspicious_ips.items():
                print(f"{ip:<20} {count:<15}")
            return suspicious_ips
        else:
            print("No suspicious activity detected.")
            return {}

    except FileNotFoundError:
        print(f"Error: The file '{log_file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

def save_results_to_csv(requests_by_ip, most_accessed_endpoint, suspicious_activity, output_file="log_analysis_results.csv"):
    try:
        with open(output_file, mode='w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile)

            # Write Requests per IP
            csv_writer.writerow(["Requests per IP"])
            csv_writer.writerow(["IP Address", "Request Count"])
            for ip, count in requests_by_ip:
                csv_writer.writerow([ip, count])

            csv_writer.writerow([])  # Add a blank line

            # Write Most Accessed Endpoint
            csv_writer.writerow(["Most Accessed Endpoint"])
            csv_writer.writerow(["Endpoint", "Access Count"])
            if most_accessed_endpoint[0]:
                csv_writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

            csv_writer.writerow([])  # Add a blank line

            # Write Suspicious Activity
            csv_writer.writerow(["Suspicious Activity"])
            csv_writer.writerow(["IP Address", "Failed Login Count"])
            for ip, count in suspicious_activity.items():
                csv_writer.writerow([ip, count])

        print(f"Results saved to {output_file}")

    except Exception as e:
        print(f"An error occurred while saving to CSV: {e}")

# Example usage
log_file_path = 'example.log'
requests_by_ip = count_requests_by_ip(log_file_path)
most_accessed_endpoint = most_frequent_endpoint(log_file_path)
suspicious_activity = detect_suspicious_activity(log_file_path)
save_results_to_csv(requests_by_ip, most_accessed_endpoint, suspicious_activity)


IP Address           Request Count  
-----------------------------------
203.0.113.5          8              
198.51.100.23        8              
192.168.1.1          7              
10.0.0.2             6              
192.168.1.100        5              
Most Frequently Accessed Endpoint:
/login (Accessed 13 times)
No suspicious activity detected.
Results saved to log_analysis_results.csv
