In [29]:
# Importing required libraries
import pandas as pd
import re

# Context Manager for file handling
# Reading the log file into a string
with open("Sample.log", "r") as file:
    sample_log = file.read()

# Extracting necessary information from log data using regular expressions
IP_Address = re.findall(r"(\d.*)\ \-\ \-", sample_log)  # Extract IP addresses
endpoint = re.findall(r"\ (\/\S.+)\ H", sample_log)  # Extract endpoints
status_code = re.findall(r"\"\ (\d+)", sample_log)  # Extract status codes

# Adding a message column based on the status code
message = []
for code in status_code:
    if code != '200':
        message.append("Invalid Credentials")
    else:
        message.append("Valid")

# Creating a DataFrame to organize the extracted data
Data = pd.DataFrame({
    "IP address": IP_Address,
    "endpoint": endpoint,
    "status_code": status_code,
    "message": message
})

# Analyzing requests per IP address
IP_counts = Data["IP address"].value_counts(ascending=False).reset_index()
IP_counts.columns = ["IP address", "Request count"]
print(IP_counts, "\n")

# Analyzing most frequently accessed endpoint
endpoint_access_count = Data["endpoint"].value_counts(ascending=False).reset_index()
endpoint_access_count.columns = ["Endpoint", "Access Count"]
print(f"Most Frequently Accessed Endpoint:\n{endpoint_access_count['Endpoint'][0]} (Accessed {endpoint_access_count['Access Count'][0]} times)\n")

# Identifying suspicious activity (failed login attempts)
failed_attempts = Data[Data["status_code"] == "401"]["IP address"].value_counts(ascending=False).reset_index()
failed_attempts.columns = ["IP address", "Failed Login Count"]
print("Suspicious Activity Detected:\n", failed_attempts)

# Exporting analysis results to an Excel file
with pd.ExcelWriter("log_analysis_results.xlsx") as writer:
    IP_counts.to_excel(writer, sheet_name="Requests per IP", index=False)
    endpoint_access_count.to_excel(writer, sheet_name="Most Accessed Endpoint", index=False)
    failed_attempts.to_excel(writer, sheet_name="Suspicious Activity", index=False)


      IP address  Request count
0    203.0.113.5              8
1  198.51.100.23              8
2    192.168.1.1              7
3       10.0.0.2              6
4  192.168.1.100              5 

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
       IP address  Failed Login Count
0    203.0.113.5                   8
1  192.168.1.100                   5
