Install Dependencies and Import Libraries





In [1]:
import re
import csv
from collections import defaultdict
from google.colab import files

Upload Log File

In [2]:
uploaded = files.upload()

# The uploaded file will be stored as 'sample.log'
LOG_FILE = list(uploaded.keys())[0]
FAILED_LOGIN_THRESHOLD = 10  # Threshold for detecting suspicious activity
OUTPUT_FILE = "log_analysis_results.csv"


Saving sample.log to sample.log


Analysis Script

In [5]:
def parse_log_file(log_file):
    """Reads the log file and returns lines as a list."""
    with open(log_file, "r") as file:
        return file.readlines()

def count_requests_per_ip(lines):
    """Counts the number of requests per IP address."""
    ip_requests = defaultdict(int)
    for line in lines:
        match = re.search(r'(\d+\.\d+\.\d+\.\d+)', line)  # Match IPv4 addresses
        if match:
            ip = match.group(1)
            ip_requests[ip] += 1
    return dict(sorted(ip_requests.items(), key=lambda item: item[1], reverse=True))

def most_frequent_endpoint(lines):
    """Finds the most accessed endpoint."""
    endpoints = defaultdict(int)
    for line in lines:
        match = re.search(r'"(?:GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH)\s+(\S+)', line)
        if match:
            endpoint = match.group(1)
            endpoints[endpoint] += 1
    if endpoints:
        endpoint, count = max(endpoints.items(), key=lambda item: item[1])
        return endpoint, count
    return None, 0

def detect_suspicious_activity(lines, threshold):
    """Detects suspicious activity based on failed login attempts."""
    failed_attempts = defaultdict(int)
    for line in lines:
        if '401' in line or 'Invalid credentials' in line:
            match = re.search(r'(\d+\.\d+\.\d+\.\d+)', line)
            if match:
                ip = match.group(1)
                failed_attempts[ip] += 1
    return {ip: count for ip, count in failed_attempts.items() if count > threshold}

def save_to_csv(ip_requests, most_accessed, suspicious_activity, output_file):
    """Saves the results to a CSV file."""
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)

        # Requests per IP
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_requests.items():
            writer.writerow([ip, count])

        # Most Accessed Endpoint
        writer.writerow([])
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_accessed[0], most_accessed[1]])

        # Suspicious Activity
        writer.writerow([])
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_activity.items():
            writer.writerow([ip, count])

def main():
    # Step 1: Parse log file
    lines = parse_log_file(LOG_FILE)

    # Step 2: Count requests per IP
    ip_requests = count_requests_per_ip(lines)

    # Step 3: Identify the most frequently accessed endpoint
    most_accessed = most_frequent_endpoint(lines)

    # Step 4: Detect suspicious activity
    suspicious_activity = detect_suspicious_activity(lines, FAILED_LOGIN_THRESHOLD)

    # Step 5: Display results
    print("\nRequests per IP:")
    for ip, count in ip_requests.items():
        print(f"{ip:<20}{count}")

    print(f"\nMost Frequently Accessed Endpoint:\n{most_accessed[0]} (Accessed {most_accessed[1]} times)")

    print("\nSuspicious Activity Detected:")
    if suspicious_activity:
        for ip, count in suspicious_activity.items():
            print(f"{ip:<20}{count}")
    else:
        print("No suspicious activity detected.")

    # Step 6: Save results to CSV
    save_to_csv(ip_requests, most_accessed, suspicious_activity, OUTPUT_FILE)
    print(f"\nResults saved to {OUTPUT_FILE}")

main()


Requests per IP:
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to log_analysis_results.csv


Download Results

* Run this cell to download the CSV file.



In [4]:
files.download(OUTPUT_FILE)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>