### 1. Wczytaj plik CSV z logami logowania

In [10]:
import pandas as pd
from pathlib import Path

file_a = Path("../example-files/server_A.csv")
file_b = Path("../example-files/server_B.csv")

server_a = pd.read_csv(file_a, parse_dates=['timestamp'])
server_b = pd.read_csv(file_b, parse_dates=['timestamp'])

# data unification
server_b = server_b.rename(columns={'username': 'user', 'event': 'action'})
server_b['action'] = server_b['action'].apply(lambda x: 'LOGIN_FAIL' if x == 'LoginFailed' else 'LOGIN_OK')
server_b['user'] = server_b['user'].str.lower()

In [11]:
server_a.head()

Unnamed: 0,timestamp,IP,user,action
0,2025-09-28 09:00:00,192.168.0.6,edward,LOGIN_FAIL
1,2025-09-28 09:00:14,192.168.0.5,celina,LOGIN_OK
2,2025-09-28 09:00:26,192.168.0.8,damian,LOGIN_FAIL
3,2025-09-28 09:00:33,192.168.0.5,anna,LOGIN_OK
4,2025-09-28 09:01:20,10.0.0.3,anna,LOGIN_OK


In [12]:
server_b.head()

Unnamed: 0,timestamp,IP,user,action
0,2025-09-28 09:00:00,192.168.0.6,bartek,LOGIN_FAIL
1,2025-09-28 09:00:31,192.168.0.6,anna,LOGIN_OK
2,2025-09-28 09:00:46,192.168.0.6,celina,LOGIN_OK
3,2025-09-28 09:01:06,192.168.0.8,celina,LOGIN_FAIL
4,2025-09-28 09:01:44,192.168.0.9,damian,LOGIN_OK


### 2. Znajdź wspólne adresy IP występujące w obu logach

In [13]:
common_ips = set(server_a['IP']).intersection(set(server_b['IP']))
common_ips

{'10.0.0.3', '192.168.0.5', '192.168.0.6', '192.168.0.8', '192.168.0.9'}

### 3. Dla wspólnych adresów IP dopasuj zdarzenia logowania na podstawie czasu (±5 sekund)

In [14]:
a_common = (server_a[server_a["IP"].isin(common_ips)]).sort_values("timestamp")
b_common = (server_b[server_b["IP"].isin(common_ips)]).sort_values("timestamp")

merged = pd.merge_asof(
    a_common,
    b_common,
    on=['timestamp'],
    by=['IP'],
    direction="nearest",
    tolerance=pd.Timedelta('5s'),
    suffixes=("_A", "_B")
)

merged

Unnamed: 0,timestamp,IP,user_A,action_A,user_B,action_B
0,2025-09-28 09:00:00,192.168.0.6,edward,LOGIN_FAIL,bartek,LOGIN_FAIL
1,2025-09-28 09:00:14,192.168.0.5,celina,LOGIN_OK,,
2,2025-09-28 09:00:26,192.168.0.8,damian,LOGIN_FAIL,,
3,2025-09-28 09:00:33,192.168.0.5,anna,LOGIN_OK,,
4,2025-09-28 09:01:20,10.0.0.3,anna,LOGIN_OK,,
...,...,...,...,...,...,...
75,2025-09-28 09:29:42,10.0.0.3,celina,LOGIN_OK,,
76,2025-09-28 09:29:54,192.168.0.6,celina,LOGIN_FAIL,,
77,2025-09-28 09:31:30,192.168.0.6,anna,LOGIN_OK,,
78,2025-09-28 09:32:00,10.0.0.3,celina,LOGIN_FAIL,,


### 4. Wyznacz przypadki, gdy oba systemy zgłosiły błąd logowania dla tego samego IP


In [15]:
merged[(merged["action_A"] == "LOGIN_FAIL") & (merged["action_B"] == "LOGIN_FAIL")]

Unnamed: 0,timestamp,IP,user_A,action_A,user_B,action_B
0,2025-09-28 09:00:00,192.168.0.6,edward,LOGIN_FAIL,bartek,LOGIN_FAIL
16,2025-09-28 09:04:40,192.168.0.5,bartek,LOGIN_FAIL,damian,LOGIN_FAIL
26,2025-09-28 09:08:50,10.0.0.3,damian,LOGIN_FAIL,damian,LOGIN_FAIL


### 5. Sporządź statystyki błędów wg adresu IP

In [16]:
errors_a = server_a[server_a['action'] == 'LOGIN_FAIL'].groupby("IP").size().reset_index(name="error_count_A")
errors_b = server_b[server_b['action'] == 'LOGIN_FAIL'].groupby("IP").size().reset_index(name="error_count_B")

error_stats = pd.merge(errors_a, errors_b, on='IP', how='outer').fillna(0)
error_stats['error_sum'] = error_stats['error_count_A'] + error_stats['error_count_B']
error_stats

Unnamed: 0,IP,error_count_A,error_count_B,error_sum
0,10.0.0.3,11,10,21
1,192.168.0.5,7,10,17
2,192.168.0.6,7,7,14
3,192.168.0.8,10,4,14
4,192.168.0.9,8,7,15


### 6. Sprawdź, które adresy IP miały najwięcej korelacji błędów w obu systemach

In [17]:
error_stats.sort_values(['error_sum'], ascending=False)

Unnamed: 0,IP,error_count_A,error_count_B,error_sum
0,10.0.0.3,11,10,21
1,192.168.0.5,7,10,17
4,192.168.0.9,8,7,15
2,192.168.0.6,7,7,14
3,192.168.0.8,10,4,14


### 7. Podaj nazwy użytkowników z obu systemów dla tych samych IP i błędów logowania


In [18]:
merged[(merged["action_A"] == "LOGIN_FAIL") & (merged["action_B"] == "LOGIN_FAIL") & (merged['user_A'] == merged['user_B'])]

Unnamed: 0,timestamp,IP,user_A,action_A,user_B,action_B
26,2025-09-28 09:08:50,10.0.0.3,damian,LOGIN_FAIL,damian,LOGIN_FAIL
