In [2]:
from google.colab import files

# Upload file
uploaded = files.upload()


Saving sample.log to sample.log


In [3]:
import re

# File path to the log file
log_file = 'sample.log'

# Dictionary to store request counts by IP
ip_request_counts = {}
# Dictionary to store counts of each endpoint
endpoint_counts = {}

# Regular expression pattern for extracting IP and endpoint
log_pattern = r'(\d+\.\d+\.\d+\.\d+) - - \[\S+ \S+\] "(?:GET|POST) (\S+) HTTP/1.1" \d+'

# Open and read the log file
with open(log_file, 'r') as file:
    for line in file:
        # Use regex to find IP address and endpoint in each line
        match = re.search(log_pattern, line)
        if match:
            ip_address = match.group(1)
            endpoint = match.group(2)

            # Count requests by IP
            if ip_address in ip_request_counts:
                ip_request_counts[ip_address] += 1
            else:
                ip_request_counts[ip_address] = 1

            # Count accesses to each endpoint
            if endpoint in endpoint_counts:
                endpoint_counts[endpoint] += 1
            else:
                endpoint_counts[endpoint] = 1

# Display results so far
print("IP Address Request Counts:")
for ip, count in ip_request_counts.items():
    print(f"{ip}: {count} requests")

print("\nEndpoint Access Counts:")
for endpoint, count in endpoint_counts.items():
    print(f"{endpoint}: {count} accesses")


IP Address Request Counts:
192.168.1.1: 7 requests
203.0.113.5: 8 requests
10.0.0.2: 6 requests
198.51.100.23: 8 requests
192.168.1.100: 5 requests

Endpoint Access Counts:
/home: 5 accesses
/login: 13 accesses
/about: 5 accesses
/contact: 2 accesses
/register: 2 accesses
/dashboard: 3 accesses
/profile: 2 accesses
/feedback: 2 accesses


In [4]:
# Sort IP request counts by the number of requests (descending order)
sorted_ip_requests = sorted(ip_request_counts.items(), key=lambda x: x[1], reverse=True)

print("\nIP Address Request Counts (Sorted):")
for ip, count in sorted_ip_requests:
    print(f"{ip}: {count} requests")



IP Address Request Counts (Sorted):
203.0.113.5: 8 requests
198.51.100.23: 8 requests
192.168.1.1: 7 requests
10.0.0.2: 6 requests
192.168.1.100: 5 requests


In [5]:
# Find the most frequently accessed endpoint
most_accessed_endpoint = max(endpoint_counts.items(), key=lambda x: x[1])

print(f"\nMost Frequently Accessed Endpoint: {most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")



Most Frequently Accessed Endpoint: /login (Accessed 13 times)


In [15]:
import re

# Dictionary to store failed login attempts by IP address
failed_login_attempts = {}

# Configurable threshold for detecting brute force attempts (default: 10)
threshold = 3

# Regular expression pattern for failed login attempts (status code 401 or "Invalid credentials")
failed_login_pattern = r'(\d+\.\d+\.\d+\.\d+) - - \[\S+ \S+\] "POST /login HTTP/1.1" 401'

# Re-read the log file to detect failed login attempts
with open(log_file, 'r') as file:
    for line in file:
        # Match log lines for failed login attempts (status code 401)
        if re.search(failed_login_pattern, line):
            # Extract IP address from the log line
            ip_address = re.search(r'(\d+\.\d+\.\d+\.\d+)', line).group(1)

            # Count failed login attempts by IP
            if ip_address in failed_login_attempts:
                failed_login_attempts[ip_address] += 1
            else:
                failed_login_attempts[ip_address] = 1

# Display suspicious activity: IP addresses with failed login attempts exceeding the threshold
print("\nSuspicious Activity Detected:")
print("IP Address           Failed Login Attempts")
for ip, count in failed_login_attempts.items():
    if count > threshold:
        print(f"{ip:20} {count}")



Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5          8
192.168.1.100        5


In [16]:
import csv

# Save results to CSV
with open('log_analysis_results.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)

    # Write header for IP request counts
    csvwriter.writerow(['IP Address', 'Request Count'])
    for ip, count in sorted_ip_requests:
        csvwriter.writerow([ip, count])

    # Write the most accessed endpoint
    csvwriter.writerow([])
    csvwriter.writerow(['Most Accessed Endpoint', 'Access Count'])
    csvwriter.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

    # Write suspicious activity
    csvwriter.writerow([])
    csvwriter.writerow(['IP Address', 'Failed Login Count'])
    for ip, count in failed_login_attempts.items():
        if count > threshold:
            csvwriter.writerow([ip, count])

print("\nResults saved to 'log_analysis_results.csv'")



Results saved to 'log_analysis_results.csv'


In [17]:
# prompt: Download csv file

files.download('log_analysis_results.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>