In [4]:
import re
import csv
from collections import Counter

# Set the target date
TARGET_DATE = "30/Jan/2024"

# File paths (update accordingly)
LOG_FILE = "server.log"  # Replace with your actual log file path

# Patterns to extract IPs and timestamps
LOG_PATTERN = re.compile(r'(\d+\.\d+\.\d+\.\d+) - - \[(\d{2}/\w{3}/\d{4}):(\d{2}):')

# Data storage
ip_counter = Counter()
hour_counter = Counter()

# Read and process the log file
with open(LOG_FILE, "r") as file:
    for line in file:
        match = LOG_PATTERN.search(line)
        if match:
            ip, date, hour = match.groups()
            if date == TARGET_DATE:
                ip_counter[ip] += 1
                hour_counter[hour] += 1

print(f"\nLog Data for {TARGET_DATE}\n")

# **Display & Save Top 10 IPs**
top_10_ips = ip_counter.most_common(10)
print("Top 10 IPs by Occurrences:")
print("IP Address            | Occurrences")
print("-----------------------------------")
for ip, count in top_10_ips:
    print(f"{ip:20} | {count}")

top_10_ips_file = f"ips_{TARGET_DATE.replace('/', '-')}.csv"
with open(top_10_ips_file, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["IP Address", "Occurrences"])
    writer.writerows(ip_counter.items())  # Store all IPs


# **Display & Save Hourly Traffic**
print("\nHourly Traffic:")
print("Hour  | Visitors")
print("--------------------")
for hour, count in sorted(hour_counter.items()):
    print(f"{hour:2}    | {count}")

hourly_traffic_file = f"hourly_traffic_{TARGET_DATE.replace('/', '-')}.csv"
with open(hourly_traffic_file, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Hour", "Visitors"])
    writer.writerows(sorted(hour_counter.items()))


# **Get Top IPs contributing to 85% of traffic**
def get_top_contributors(counter, percentage):
    total = sum(counter.values())
    threshold = total * percentage
    running_total = 0
    top_contributors = []

    for item, count in counter.most_common():
        running_total += count
        top_contributors.append((item, count))
        if running_total >= threshold:
            break

    return top_contributors[:10]  # Limit displayed to top 10

top_10_ips_85 = get_top_contributors(ip_counter, 0.85)
print("\nTop 10 IPs Contributing to 85% of Traffic:")
print("IP Address            | Occurrences")
print("-----------------------------------")
for ip, count in top_10_ips_85:
    print(f"{ip:20} | {count}")

top_10_ips_85_file = f"ips_85_percent_{TARGET_DATE.replace('/', '-')}.csv"
with open(top_10_ips_85_file, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["IP Address", "Occurrences"])
    writer.writerows(ip_counter.items())  # Store all contributors


# **Get Hours contributing to 70% of traffic**
top_hours_70 = get_top_contributors(hour_counter, 0.70)
print("\nHours Contributing to 70% of Traffic:")
print("Hour  | Visitors")
print("--------------------")
for hour, count in top_hours_70:
    print(f"{hour:2}    | {count}")

top_hours_70_file = f"Traffic_70_percent_Overall_Traffic_{TARGET_DATE.replace('/', '-')}.csv"
with open(top_hours_70_file, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Hour", "Visitors"])
    writer.writerows(hour_counter.items())  # Store all hours

print("\nAll data successfully saved as CSV files.")



Log Data for 30/Jan/2024

Top 10 IPs by Occurrences:
IP Address            | Occurrences
-----------------------------------
10.50.108.12         | 57
10.22.174.100        | 53
10.54.51.147         | 52
10.200.235.145       | 52
10.54.126.70         | 51
10.157.192.12        | 51
10.66.79.126         | 48
10.71.58.65          | 47
10.31.73.85          | 47
10.63.204.75         | 47

Hourly Traffic:
Hour  | Visitors
--------------------
00    | 643
01    | 635
02    | 617
03    | 635
04    | 686
05    | 659
06    | 592
07    | 621
08    | 610
09    | 600
10    | 615
11    | 638
12    | 606
13    | 631
14    | 580
15    | 633
16    | 622
17    | 645
18    | 617
19    | 627
20    | 603
21    | 603
22    | 647
23    | 635

Top 10 IPs Contributing to 85% of Traffic:
IP Address            | Occurrences
-----------------------------------
10.50.108.12         | 57
10.22.174.100        | 53
10.54.51.147         | 52
10.200.235.145       | 52
10.54.126.70         | 51
10.157.192.12        | 51