# Searching for IOCs on Splunk using Splunk API:

This notebook will (hopefully) help you to search for IOCs on Splunk. This is specially usefull when you receive a Threat Intel Report containing a .csv file attached with a big list of IOCs. 
***


## 1) Import the following libraries:

In [None]:
import pandas as pd
import numpy as np
import splunklib.client as client
import splunklib.results as results
import requests
from urllib.parse import urlparse
from time import sleep

## 2) Connect to Splunk server:

Update the following block with the information needed to connect to your Splunk instance.
Ref: https://dev.splunk.com/enterprise/docs/devtools/python/sdk-python/howtousesplunkpython/howtoconnectpython/

In [None]:
# Splunk server: 
HOST = "xx.xx.xx.xx"
PORT = xxx

# Token is created from api-user expires after year.
SESSION_KEY = XXXXXXXXXXX

# Create a Service instance and log in: 
service = client.connect(
    host=HOST,
    port=PORT,
    token=SESSION_KEY)

## 3) Upload the .csv cointaing the IoCs:
Change the following path to the location of the .csv on your local machine.
***
<font color='red'>IMPORTANT: </font>  The IOCs are likely defanged (8[.]8[.]8[.]8 or hxxp: ), you need to remove the square brackets and extra characters before importing the .csv. You can easily do it using Find/Replace on Excel or text editor.
This code is considering that the .csv file contains the columns ['Type','Indicator', 'Attribution'], if this is not your case you might have to edit it accordingly. 
***

In [None]:
iocs = pd.read_csv("C:/Users/mr.robot/Desktop/IOC_list.csv", usecols = ['Type','Indicator', 'Attribution'])
iocs = iocs.dropna()

## 4) Hash search:

In [None]:
# Create a dataframe with the hash values in the .csv:
hashes = iocs.loc[(iocs['Type']=='SHA256') | (iocs['Type']=='MD5')]
hashes = hashes['Indicator']
hashes = [','.join(map(str, hashes))]
hashes = ' '.join([str(item) for item in hashes])

In [None]:
# Send the search to Splunk, this might take some time depending on the amount of iocs:
# The only value that needs to be changed is the timeframe for the search (earliest=-15d)

In [None]:
# The variable job contains the SPL along the timeframe for the search. The SPL will depend on your data on Splunk:
job = service.jobs.create("SPL goes here")
while not job.is_done():
    sleep(.2)
rr = results.JSONResultsReader(job.results(output_mode='json'))
matches = pd.DataFrame()
for result in rr:
    if isinstance(result, results.Message):
        # Diagnostic messages may be returned in the results
        print('%s: %s' % (result.type, result.message))
    elif isinstance(result, dict):
        # Normal events are returned as dicts
        print(result)
        matches = matches.append(result, ignore_index= True)
assert rr.is_preview == False

In [None]:
# If there were any matches, you will see them bellow:
matches

## 5) IP search:

In [None]:
# Create a dataframe with the IPs values in the .csv:
ips = iocs.loc[(iocs['Type']=='IPv4') | (iocs['Type']=='IPv6') | (iocs['Type']=='IPv4')]
ips = ips['Indicator']
ips = [','.join(map(str, ips))]
ips = ' '.join([str(item) for item in ips])

In [None]:
# The variable job contains the SPL along the timeframe for the search:
job = service.jobs.create("SPL goes here")
while not job.is_done():
    sleep(.2)
rr = results.JSONResultsReader(job.results(output_mode='json'))
matches = pd.DataFrame()
for result in rr:
    if isinstance(result, results.Message):
        # Diagnostic messages may be returned in the results
        print('%s: %s' % (result.type, result.message))
    elif isinstance(result, dict):
        # Normal events are returned as dicts
        #print(result)
        matches = matches.append(result, ignore_index= True)
assert rr.is_preview == False

In [None]:
# If there were any matches, you will see them bellow:
matches

## 6) Domain search:

In [None]:
# Create a dataframe with the Domain values in the .csv:
domains = iocs.loc[(iocs['Type']=='Domain')]
domains = domains['Indicator']
domains = [','.join(map(str, domains))]
domains = ' '.join([str(item) for item in domains])

In [None]:
# The variable job contains the SPL along the timeframe for the search:
job = service.jobs.create("SPL goes here")
while not job.is_done():
    sleep(.2)
rr = results.JSONResultsReader(job.results(output_mode='json'))
matches = pd.DataFrame()
for result in rr:
    if isinstance(result, results.Message):
        # Diagnostic messages may be returned in the results
        print('%s: %s' % (result.type, result.message))
    elif isinstance(result, dict):
        # Normal events are returned as dicts
        print(result)
        matches = matches.append(result, ignore_index= True)
assert rr.is_preview == False


In [None]:
# If there were any matches, you will see them bellow:
matches

## 7) URL search:

####       URL:

In [None]:
# Create a dataframe with the URLs values in the .csv:
urls = iocs.loc[(iocs['Type']=='URL')]
urls = urls['Indicator']
urls = [','.join(map(str, urls))]
urls = ' '.join([str(item) for item in urls])

In [None]:
# The variable job contains the SPL along the timeframe for the search:
job = service.jobs.create("SPL goes here")
while not job.is_done():
    sleep(.2)
rr = results.JSONResultsReader(job.results(output_mode='json'))
matches = pd.DataFrame()
for result in rr:
    if isinstance(result, results.Message):
        # Diagnostic messages may be returned in the results
        print('%s: %s' % (result.type, result.message))
    elif isinstance(result, dict):
        # Normal events are returned as dicts
        print(result)
        matches = matches.append(result, ignore_index= True)
assert rr.is_preview == False

In [None]:
urls

In [None]:
# If there were any matches, you will see them bellow:
matches

#### Path:

In [None]:
# Create a dataframe with the path values extracted from the URLs:
urls = iocs.loc[(iocs['Type']=='URL')]
urls = urls['Indicator']
paths = []
for url in path:
    paths.append(urlparse(url).path)
paths = [','.join(map(str, paths))]
paths = ' '.join([str(item) for item in paths])

In [None]:
# The variable job contains the SPL along the timeframe for the search:
job = service.jobs.create("SPL goes here")
while not job.is_done():
    sleep(.2)
rr = results.JSONResultsReader(job.results(output_mode='json'))
matches = pd.DataFrame()
for result in rr:
    if isinstance(result, results.Message):
        # Diagnostic messages may be returned in the results
        print('%s: %s' % (result.type, result.message))
    elif isinstance(result, dict):
        # Normal events are returned as dicts
        print(result)
        matches = matches.append(result, ignore_index= True)
assert rr.is_preview == False

In [None]:
paths

In [None]:
# If there were any matches, you will see them bellow:
matches

## CVE search:

In [None]:
# Create a dataframe with the CVE values in the .csv:
cves = iocs.loc[(iocs['Type']=='CVE')]
cves = cves['Indicator']
cves = [','.join(map(str, cves))]
cves= ' '.join([str(item) for item in cves])

In [None]:
# The variable job contains the SPL along the timeframe for the search:
job = service.jobs.create("search index=vul earliest=-30d cve IN (" + cves + ") | table ip cve")
while not job.is_done():
    sleep(.2)
rr = results.JSONResultsReader(job.results(output_mode='json'))
matches = pd.DataFrame()
for result in rr:
    if isinstance(result, results.Message):
        # Diagnostic messages may be returned in the results
        print('%s: %s' % (result.type, result.message))
    elif isinstance(result, dict):
        # Normal events are returned as dicts
        print(result)
        matches = matches.append(result, ignore_index= True)
assert rr.is_preview == False

In [None]:
# If there were any matches, you will see them bellow:
matches