# SIEM Use Cases

- What IP address (and countries) are attacking the WAN endpoint?
 - [x] Visualise those against global map (grafana)
 - [ ] Block non AU countries

- Validate the firewall rules for VLAN isolation:
 - [ ] Flow from VLAN x to y is valid/not valid
 - [ ] Alert on not valid flows 

- Identify Unifi component anomalies
 - [ ] PAM activity ({job="ubnt-authpriv"} requires review)

- Detect loss of internet connectivity

In [2]:
import requests
import json
import pandas as pd
from datetime import datetime, timedelta

def get_logs_as_df(query, n_hours, step=100):
    dend = int(datetime.utcnow().timestamp())*1000000000
    dt = datetime.now() - timedelta(n_hours)
    dstart = int(dt.timestamp()*1000000000)

    url = "http://loki.loki:3100/loki/api/v1/query_range"
    headers = {'Content-type': 'application/json'}
    params = {
        'query': query,
        'direction': 'BACKWARD',
        'limit': 100,
        'start': dstart,
        'end': dend,
        'step': step
    }

    resp = requests.get(url, headers = headers, params = params)
    df = {}
    if (resp.status_code != 200):
        print("Failed to get data: " + resp.text)
        return {}
        
    json_obj = (json.loads(resp.text))

    # Refer https://github.com
    #   /BindiChen/machine-learning/blob/main/data-analysis/027-pandas-convert-json/pandas-convert-json.ipynb
    df = (pd.json_normalize(
        json_obj, 
        record_path = ['data','result','values'])
    )
    return df

In [57]:
# logs that contain external actors attempting to ingress via WAN
d = get_logs_as_df('{job="ubnt-geo"}', 3)
df = pd.DataFrame(d)
df.rename(columns = {0 : 'time', 1 : 'json'}, inplace = True)
df

Unnamed: 0,time,json
0,1651351405000000000,"{""geoplugin_request"":""192.241.213.128"",""geoplu..."
1,1651351394000000000,"{""geoplugin_request"":""167.248.133.147"",""geoplu..."
2,1651351376000000000,"{""geoplugin_request"":""185.156.73.57"",""geoplugi..."
3,1651351364000000000,"{""geoplugin_request"":""147.78.47.57"",""geoplugin..."
4,1651351335000000000,"{""geoplugin_request"":""54.173.95.250"",""geoplugi..."
...,...,...
95,1651350061000000000,"{""geoplugin_request"":""167.94.138.155"",""geoplug..."
96,1651350050000000000,"{""geoplugin_request"":""81.68.254.218"",""geoplugi..."
97,1651350040000000000,"{""geoplugin_request"":""45.145.66.212"",""geoplugi..."
98,1651350037000000000,"{""geoplugin_request"":""39.164.17.200"",""geoplugi..."


In [12]:
from glom import glom
import re

# Find IPs in syslog msg: SRC and DEST. 
# We assume SRC is the first regex group (0).
IP4_REGEX = r'(?:[0-9]{1,3}\.){3}[0-9]{1,3}'
df1 = df[1].apply(lambda row: 
    re.search( 
        IP4_REGEX,
        glom(json.loads(row), 'geoplugin_request')
    ).group(0)
)
df1


0      45.93.201.144
1     89.248.174.144
2     180.76.116.235
3       94.102.61.33
4      92.63.197.110
           ...      
95      64.62.197.87
96     121.179.39.53
97      194.26.29.37
98    195.24.207.209
99     92.63.197.110
Name: 1, Length: 100, dtype: object

In [3]:
# logs that have been dropped across VLANs
d = get_logs_as_df('{job="ubnt-kern"} |~"AN_OUT-.*-D"', 24, 3600)
df = pd.DataFrame(d)
df.rename(columns = {0 : 'time', 1 : 'json'}, inplace = True)
df
#df1 = pd.json_normalize(df, record_path=['json'],
#df1

Unnamed: 0,time,json
0,1651317976000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
1,1651317976000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
2,1651317976000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
3,1651317976000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
4,1651317975000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
...,...,...
95,1651317970000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
96,1651317970000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
97,1651317970000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
98,1651317970000000000,"{""facility"":""kern"",""facilityCode"":0,""severity""..."
