In [1]:
# Importing the necessary libraries for the task

In [2]:
import pandas as pd
from collections import Counter
import re


In [3]:
# A function to parse logs and extract key details like IP, endpoint, and status code

In [4]:
def parse_logs(file_path):
    logs = []
    try:
        with open(file_path, 'r') as file:
            for line in file:
                # Match the log format
                match = re.match(
                    r'(?P<ip>[\d.]+) - - \[.*\] "(?:GET|POST) (?P<endpoint>[^ ]*) HTTP/1.1" (?P<status>\d+) .*',
                    line
                )
                if match:
                    logs.append(match.groupdict())  # Save matched groups
    except Exception as e:
        print(f"Error while reading the file: {e}")
    return logs



In [5]:
# A function to count and rank requests by IP address

In [6]:
def analyze_requests_per_ip(logs):
    try:
        ip_counts = Counter(log['ip'] for log in logs)
        # Convert to DataFrame and sort
        return pd.DataFrame(
            ip_counts.items(), columns=['IP Address', 'Request Count']
        ).sort_values(by='Request Count', ascending=False)
    except Exception as e:
        print(f"Error analyzing requests per IP: {e}")
        return pd.DataFrame()


In [7]:
# Find the most frequently accessed endpoint

In [8]:
def most_accessed_endpoint(logs):
    endpoint_counts = Counter(log['endpoint'] for log in logs)
    if endpoint_counts:
        return endpoint_counts.most_common(1)[0]  # Get the top endpoint
    else:
        return "N/A", 0  # Return default values if no data is available


In [9]:
# Detect Suspicious Activity (Failed Login Attempts)

In [10]:
def detect_suspicious_activity(logs, threshold=1):
    try:
        failed_logins = Counter(log['ip'] for log in logs if log['status'] == '401')
        # Filter for IPs that exceed the threshold
        suspicious_ips = {ip: count for ip, count in failed_logins.items() if count > threshold}
        return pd.DataFrame(
            suspicious_ips.items(), columns=['IP Address', 'Failed Login Count']
        )
    except Exception as e:
        print(f"Error detecting suspicious activity: {e}")
        return pd.DataFrame()


In [11]:
# main Function

In [12]:
if __name__ == "__main__":
    # Define the log file path
    log_file_path = r"C:\Users\md yasirn\OneDrive\Desktop\InternsElite\VRV_Project\sample.log"

    # Step 1: Parse logs
    print("Reading and parsing the logs...")
    logs = parse_logs(log_file_path)

    # Step 2: Analyze requests per IP
    print("Analyzing the number of requests per IP...")
    requests_per_ip = analyze_requests_per_ip(logs)
    print("\nRequests Per IP Address:")
    print(requests_per_ip)

    # Step 3: Find the most accessed endpoint
    print("Finding the most frequently accessed endpoint...")
    endpoint, count = most_accessed_endpoint(logs)
    print(f"\nMost Accessed Endpoint: {endpoint} (Accessed {count} times)")

    # Step 4: Detect suspicious activity
    print("Checking for suspicious activity...")
    suspicious_ips = detect_suspicious_activity(logs)
    print("\nSuspicious IP Addresses:")
    print(suspicious_ips)

    # Step 5: Save results to CSV files
    print("Saving the results to CSV files...")
    try:
        requests_per_ip.to_csv('requests_per_ip.csv', index=False)
        suspicious_ips.to_csv('suspicious_ips.csv', index=False)
        # Save the most accessed endpoint
        pd.DataFrame([[endpoint, count]], columns=['Endpoint', 'Access Count']).to_csv('most_accessed_endpoint.csv', index=False)
        print("Results saved successfully!")
    except Exception as e:
        print(f"Error saving results: {e}")


Reading and parsing the logs...
Analyzing the number of requests per IP...

Requests Per IP Address:
      IP Address  Request Count
1    203.0.113.5              8
3  198.51.100.23              8
0    192.168.1.1              7
2       10.0.0.2              6
4  192.168.1.100              5
Finding the most frequently accessed endpoint...

Most Accessed Endpoint: /login (Accessed 13 times)
Checking for suspicious activity...

Suspicious IP Addresses:
      IP Address  Failed Login Count
0    203.0.113.5                   8
1  192.168.1.100                   5
Saving the results to CSV files...
Results saved successfully!
