In [44]:
import pandas as pd

df = pd.read_csv("dataset/cybersecurity_logs.csv")

df.head()

Unnamed: 0,Log_ID,IP_Address,Request_Type,Response_Time_ms,Anomaly_Score,Is_Threat
0,1,192.168.251.112,PUT,216,0.271,0
1,2,192.168.92.71,POST,280,0.029,0
2,3,192.168.164.48,PUT,223,0.605,0
3,4,192.168.51.32,DELETE,265,0.396,0
4,5,192.168.21.113,PUT,171,0.299,0


In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Log_ID            1000 non-null   int64  
 1   IP_Address        1000 non-null   object 
 2   Request_Type      1000 non-null   object 
 3   Response_Time_ms  1000 non-null   int64  
 4   Anomaly_Score     1000 non-null   float64
 5   Is_Threat         1000 non-null   int64  
dtypes: float64(1), int64(3), object(2)
memory usage: 47.0+ KB


In [46]:
# Understand Request Types

df['Request_Type'].value_counts()


Request_Type
GET       271
POST      267
PUT       231
DELETE    231
Name: count, dtype: int64

In [47]:
# Response time distribution

df['Response_Time_ms'].describe()


count    1000.000000
mean      197.887000
std        53.130774
min        23.000000
25%       161.000000
50%       198.000000
75%       233.000000
max       362.000000
Name: Response_Time_ms, dtype: float64

In [48]:
# What does "Anomaly_Score look like?

df['Anomaly_Score'].describe()


count    1000.000000
mean        0.494765
std         0.279659
min         0.000000
25%         0.263750
50%         0.493500
75%         0.725000
max         0.997000
Name: Anomaly_Score, dtype: float64

In [49]:
# How many threats exist?

df['Is_Threat'].value_counts()


Is_Threat
0    960
1     40
Name: count, dtype: int64

In [50]:
# Which IPs are more active?

df['IP_Address'].value_counts().head(10)


IP_Address
192.168.127.177    2
192.168.15.226     2
192.168.45.21      2
192.168.241.116    2
192.168.41.203     2
192.168.45.65      2
192.168.11.34      2
192.168.235.126    2
192.168.237.69     1
192.168.5.26       1
Name: count, dtype: int64

In [51]:
# Threats by IP

df[df['Is_Threat'] == 1]['IP_Address'].value_counts().head(10)


IP_Address
192.168.90.93      1
192.168.225.52     1
192.168.1.142      1
192.168.150.156    1
192.168.237.69     1
192.168.6.150      1
192.168.212.122    1
192.168.154.0      1
192.168.125.7      1
192.168.95.78      1
Name: count, dtype: int64

In [52]:
# Compare response times (normal vs threat)

df.groupby('Is_Threat')['Response_Time_ms'].mean()


Is_Threat
0    197.719792
1    201.900000
Name: Response_Time_ms, dtype: float64

In [53]:
# Requests per IP (traffic volume)

ip_request_count = (
    df.groupby('IP_Address').size().reset_index(name='request_count')
)

ip_request_count.sort_values('request_count', ascending=False).head()

Unnamed: 0,IP_Address,request_count
779,192.168.45.65,2
124,192.168.127.177,2
220,192.168.15.226,2
601,192.168.235.126,2
59,192.168.11.34,2


In [54]:
# Average anomaly score per IP

ip_anomaly = (
    df.groupby('IP_Address')['Anomaly_Score'].mean().reset_index(name='avg_anomaly_score')
)

In [55]:
# Threat count per IP

ip_threats = (
    df.groupby('IP_Address')['Is_Threat'].sum().reset_index(name='threat_count')
)

In [56]:
# Combine everything into one table

ip_summary = (
    ip_request_count.merge(ip_anomaly, on='IP_Address').merge(ip_threats, on='IP_Address')
)

ip_summary.head()

Unnamed: 0,IP_Address,request_count,avg_anomaly_score,threat_count
0,192.168.0.100,1,0.69,0
1,192.168.0.143,1,0.59,0
2,192.168.0.187,1,0.596,0
3,192.168.1.142,1,0.596,1
4,192.168.1.151,1,0.09,0


In [57]:
# Define Simple SOC rules, an IP is suspicious if any of these are true: many requests, high anomaly score, any threat flagged

ip_summary['suspicious'] = (
    (ip_summary['request_count'] > 20) |
    (ip_summary['avg_anomaly_score'] > 0.7) |
    (ip_summary['threat_count'] > 0)
)

In [58]:
# How many suspicious IPs?

ip_summary['suspicious'].value_counts()

suspicious
False    686
True     306
Name: count, dtype: int64

In [59]:
# View suspicious IPs only

suspicious_ips = ip_summary[ip_summary['suspicious'] == True]
suspicious_ips.head()

Unnamed: 0,IP_Address,request_count,avg_anomaly_score,threat_count,suspicious
3,192.168.1.142,1,0.596,1,True
8,192.168.10.189,1,0.895,0,True
9,192.168.10.192,1,0.766,0,True
11,192.168.10.72,1,0.902,1,True
19,192.168.101.69,1,0.811,0,True


In [60]:
ip_summary['risk_score'] = (
    ip_summary['request_count'] * 0.4 +
    ip_summary['avg_anomaly_score'] * 100 * 0.4 +
    ip_summary['threat_count'] * 10 * 0.2
)

In [61]:
def assign_risk(score):
    if score >= 80:
        return 'High'
    elif score >= 40:
        return 'Medium'
    else:
        return 'Low'

ip_summary['risk_level'] = ip_summary['risk_score'].apply(assign_risk)

In [62]:
ip_summary[['IP_Address', 'risk_score', 'risk_level']].head()

Unnamed: 0,IP_Address,risk_score,risk_level
0,192.168.0.100,28.0,Low
1,192.168.0.143,24.0,Low
2,192.168.0.187,24.24,Low
3,192.168.1.142,26.24,Low
4,192.168.1.151,4.0,Low


In [63]:
ip_summary.to_csv(
    "dataset/ip_security_risk_summary.csv",
    index=False
)