In [1]:

sample_logs = """2025-10-10,12:01:32,192.168.1.2,GET,/index.html,200,1024
2025-10-10,12:01:33,192.168.1.3,GET,/products.html,200,850
2025-10-10,12:01:34,192.168.1.4,POST,/cart.html,500,0
2025-10-10,12:01:35,192.168.1.5,GET,/contact.html,404,512
2025-10-10,12:01:36,192.168.1.2,GET,/index.html,200,1024
2025-10-10,12:01:37,192.168.1.6,GET,/admin.html,403,256
2025-10-10,12:01:38,192.168.1.7,GET,/products.html,200,850
2025-10-10,12:01:39,192.168.1.8,GET,/index.html,200,1024
2025-10-10,12:01:40,192.168.1.9,GET,/notfound.html,404,128
2025-10-10,12:01:41,192.168.1.10,GET,/index.html,200,1024
2025-10-10,12:01:42,192.168.1.11,GET,/secret.html,403,0
2025-10-10,12:01:43,192.168.1.12,GET,/products.html,500,0
2025-10-10,12:01:44,192.168.1.13,GET,/index.html,404,64
2025-10-10,12:01:45,192.168.1.14,GET,/products.html,200,850
2025-10-10,12:01:46,192.168.1.15,GET,/api/data,500,0
"""
with open("weblogs.txt", "w", encoding="utf-8") as f:
    f.write(sample_logs)



def mapper(line):
    """
    الدالة mapper: تأخذ سطر نصي من اللوغ وتُعيد (status, 1).
    - نتعامل مع الأسطر غير الصحيحة بحذر (ترجع None).
    - متوقّع أن السطر مفصّل بفواصل: Date,Time,IP,Method,URL,Status,ResponseSize
    """
    line = line.strip()
    if not line:
        return None
    parts = line.split(",")

    if len(parts) < 6:

        return None

    status = parts[5].strip()

    if status == "":
        return None
    return (status, 1)



pairs = []
with open("weblogs.txt", "r", encoding="utf-8") as f:
    for line in f:
        out = mapper(line)
        if out is not None:
            pairs.append(out)

# Grouping + Reducing
from collections import defaultdict
counts = defaultdict(int)   # status -> total count

for key, value in pairs:
    counts[key] += value

for status in sorted(counts.keys(), key=lambda x: int(x)):
    print(f"HTTP {status}: {counts[status]} requests")


HTTP 200: 7 requests
HTTP 403: 2 requests
HTTP 404: 3 requests
HTTP 500: 3 requests


In [2]:
# Map: (url, 1)
def mapper_url(line):
    line = line.strip()
    if not line:
        return None
    parts = line.split(",")
    if len(parts) < 6:
        return None
    url = parts[4].strip()
    if url == "":
        return None
    return (url, 1)

pairs_url = []
with open("weblogs.txt", "r", encoding="utf-8") as f:
    for line in f:
        out = mapper_url(line)
        if out: pairs_url.append(out)

counts_url = defaultdict(int)
for k, v in pairs_url:
    counts_url[k] += v

# طباعة الـ top URLs
for url, cnt in sorted(counts_url.items(), key=lambda x: -x[1])[:20]:
    print(f"{url}: {cnt} requests")


/index.html: 5 requests
/products.html: 4 requests
/cart.html: 1 requests
/contact.html: 1 requests
/admin.html: 1 requests
/notfound.html: 1 requests
/secret.html: 1 requests
/api/data: 1 requests


In [3]:
# Map: (status, response_size)
def mapper_status_size(line):
    line = line.strip()
    if not line:
        return None
    parts = line.split(",")
    if len(parts) < 7:
        return None
    status = parts[5].strip()
    size_str = parts[6].strip()
    try:
        size = int(size_str)
    except:
        size = 0
    return (status, size)

pairs_status_size = []
with open("weblogs.txt", "r", encoding="utf-8") as f:
    for line in f:
        out = mapper_status_size(line)
        if out: pairs_status_size.append(out)

total_size_per_status = defaultdict(int)
for k, v in pairs_status_size:
    total_size_per_status[k] += v

for status in sorted(total_size_per_status.keys(), key=lambda x: int(x)):
    print(f"HTTP {status}: total response size = {total_size_per_status[status]} bytes")


HTTP 200: total response size = 6646 bytes
HTTP 403: total response size = 256 bytes
HTTP 404: total response size = 704 bytes
HTTP 500: total response size = 0 bytes


In [4]:
# نعدّ فقط الحالات غير 200
errors_count = defaultdict(int)
with open("weblogs.txt", "r", encoding="utf-8") as f:
    for line in f:
        out = mapper(line)
        if out:
            status, val = out
            if status != "200":
                errors_count[status] += val

for status, cnt in sorted(errors_count.items(), key=lambda x: int(x[0])):
    print(f"HTTP {status}: {cnt} error requests")


HTTP 403: 2 error requests
HTTP 404: 3 error requests
HTTP 500: 3 error requests
