In [16]:
# This is output file where all the outputs will be written in the requested format
OUTPUT_FILE = "log_analysis_results.csv"


In [12]:
import re
from collections import Counter

# Using a regular Expression to extract all the IPs present in the file
ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'

# Maintaining a counter to keep track of their occurences
ip_counts = Counter()

with open("Sample.log", "r") as file:
    for line in file:
        # Find all IP addresses in the line
        ips = re.findall(ip_pattern, line)
        # Update the counter with found IPs
        ip_counts.update(ips)

# Sorting to display in Descending Order
sorted_ips = sorted(ip_counts.items(), reverse=True)

for ip, count in sorted_ips:
    print(f"{ip}: {count}")


203.0.113.5: 8
198.51.100.23: 8
192.168.1.100: 5
192.168.1.1: 7
10.0.0.2: 6


In [13]:
# Regular Exp to extract endpoints from the file
endpoint_pattern = r'"(GET|POST|PUT|DELETE|PATCH)\s+(\S+)\s+HTTP/\d\.\d"'

# Counter to keep track of the most frequent endpoint
endpoint_counts = Counter()

with open("Sample.log", "r") as file:
    for line in file:
        # Searching for endpoints using the above String RE
        match = re.search(endpoint_pattern, line)
        if match:
            endpoint = match.group(2)  # Extract the URL/resource path
            endpoint_counts[endpoint] += 1

# Find the most frequently accessed endpoint
most_frequent_endpoint = max(endpoint_counts.items(), key=lambda x: x[1])

# Display the results
print("Most Frequently Accessed Endpoint:")
print(f"Endpoint: {most_frequent_endpoint[0]}")
print(f"Access Count: {most_frequent_endpoint[1]}")

Most Frequently Accessed Endpoint:
Endpoint: /login
Access Count: 13


In [14]:
FAILED_LOGIN_THRESHOLD = 5  # Set the threshold for suspicious activity
                    # The threshold can be altered based on our requirements

# Patterns to identify failed login attempts and extract IP addresses
failed_login_pattern = r'"(?:GET|POST|PUT|DELETE|PATCH)\s+\S+\s+HTTP/\d\.\d"\s+401'  # HTTP 401 status code
failed_logins = Counter()

# Read the log file and detect failed logins
with open("Sample.log", "r") as file:
    for line in file:
        if re.search(failed_login_pattern, line):
            ip_match = re.search(ip_pattern, line)
            if ip_match:
                ip = ip_match.group()  # Extract the IP address
                failed_logins[ip] += 1

# Filter IPs exceeding the failed login threshold
suspicious_ips = {ip: count for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD}

# Display the results
if suspicious_ips:
    print("Suspicious Activity Detected:")
    print(f"{'IP Address':<20} {'Failed Login Attempts':<5}")
    print("-" * 30)
    for ip, count in suspicious_ips.items():
        print(f"{ip:<20} {count:<5}")
else:
    print("No suspicious activity detected.")

Suspicious Activity Detected:
IP Address           Failed Login Attempts
------------------------------
203.0.113.5          8    


In [21]:
import csv
with open(OUTPUT_FILE, "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write Requests per IP
    csv_writer.writerow(["Requests per IP"])
    csv_writer.writerow(["IP Address", "Request Count"])
    for ip, count in ip_counts.items():
        csv_writer.writerow([ip, count])

    # Write Most Accessed Endpoint
    csv_writer.writerow([])
    csv_writer.writerow(["Most Accessed Endpoint"])
    csv_writer.writerow(["Endpoint", "Access Count"])
    csv_writer.writerow([most_frequent_endpoint[0],most_frequent_endpoint[1]])

    # Write Suspicious Activity
    csv_writer.writerow([])
    csv_writer.writerow(["Suspicious Activity"])
    csv_writer.writerow(["IP Address", "Failed Login Count"])
    for ip, count in suspicious_ips.items():
        csv_writer.writerow([ip, count])


In [22]:
from google.colab import files  # For Google Colab

# Ensure the file is created by running the main script first
files.download("log_analysis_results.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>