In [None]:
from google.colab import files

# Upload the log file
uploaded = files.upload()

# The uploaded file's name will be stored in the dictionary `uploaded`.
LOG_FILE = list(uploaded.keys())[0]  # Automatically assigns the uploaded file name
print(f"Log file uploaded: {LOG_FILE}")


Saving sample.log.txt to sample.log (6).txt
Log file uploaded: sample.log (6).txt


In [None]:
import re
import csv
from collections import defaultdict

# Configurable Parameters
OUTPUT_FILE = "log_analysis_results.csv"
FAILED_LOGIN_THRESHOLD = 10

# Data structures
ip_request_counts = defaultdict(int)
endpoint_access_counts = defaultdict(int)
failed_login_attempts = defaultdict(int)

# Regex pattern for log parsing
log_pattern = re.compile(
    r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[.*\] "(GET|POST) (?P<endpoint>\S+) HTTP/1.1" (?P<status>\d+)'
)

# Process the log file
with open(LOG_FILE, "r") as log:
    for line in log:
        match = log_pattern.search(line)
        if match:
            ip = match.group("ip")
            endpoint = match.group("endpoint")
            status = int(match.group("status"))

            # Count IP requests
            ip_request_counts[ip] += 1

            # Count endpoint accesses
            endpoint_access_counts[endpoint] += 1

            # Count failed login attempts (status 401)
            if status == 401:
                failed_login_attempts[ip] += 1

# Determine the most frequently accessed endpoint
most_accessed_endpoint = max(
    endpoint_access_counts.items(), key=lambda item: item[1]
)

# Filter suspicious activity
suspicious_ips = {
    ip: count for ip, count in failed_login_attempts.items() if count > FAILED_LOGIN_THRESHOLD
}

# Print results
print("\nIP Address Request Count:")
print("IP Address           Request Count")
for ip, count in sorted(ip_request_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{ip:<20} {count}")

print("\nMost Frequently Accessed Endpoint:")
print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

print("\nSuspicious Activity Detected:")
print("IP Address           Failed Login Attempts")
for ip, count in suspicious_ips.items():
    print(f"{ip:<20} {count}")

# Save results to a CSV file
with open(OUTPUT_FILE, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # Write Requests per IP
    writer.writerow(["Requests per IP"])
    writer.writerow(["IP Address", "Request Count"])
    writer.writerows(sorted(ip_request_counts.items(), key=lambda x: x[1], reverse=True))

    # Write Most Accessed Endpoint
    writer.writerow([])
    writer.writerow(["Most Accessed Endpoint"])
    writer.writerow(["Endpoint", "Access Count"])
    writer.writerow(most_accessed_endpoint)

    # Write Suspicious Activity
    writer.writerow([])
    writer.writerow(["Suspicious Activity"])
    writer.writerow(["IP Address", "Failed Login Count"])
    writer.writerows(suspicious_ips.items())

print(f"\nResults saved to {OUTPUT_FILE}")



IP Address Request Count:
IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts

Results saved to log_analysis_results.csv


In [None]:
from google.colab import files

# Download the CSV file
files.download(OUTPUT_FILE)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>