In [1]:
import uuid
import re
import json

import requests
from bs4 import BeautifulSoup

In [2]:
# получаем html код страницы, если она доступна, иначе Error
def get_html_code(url: str) -> str:
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    try:
        req = requests.get(url, headers=headers)
        # The HTTP 200 OK success status response code indicates that the request has succeeded.
        if req.status_code == 200:
            return req.text
        else:
            return "Error"
        
    except Exception as e:
        print(e)
        return "Error"

# переводим html код в объект soup класса BeautifulSoup
def html_to_soup(html: str) -> BeautifulSoup:
    soup = BeautifulSoup(html, "html.parser")
    return soup

# вытаскивает из текста совпадения, используя регулярные выражения
def find_by_regex(text: str, regular_expression: re.Pattern) -> list[str]:
    matches = re.findall(regular_expression, text)
    matches = list(set(matches))
    return matches

In [3]:
report_url = 'https://thedfirreport.com/2023/06/12/a-truly-graceful-wipe-out/'
html = get_html_code(report_url)
soup = html_to_soup(html)



In [4]:
domain_regex = r'\w+\[\.\][a-z]{1,6}'
ipv4_regex = r'\d{1,3}\.\d{1,3}\.\d{1,3}\[\.\]\d{1,3}'
hashMD5_regex = r'(?:^|\s)([0-9a-f]{32})(?:\s|$)'
hashSHA1_regex = r'(?:^|\s)([0-9a-f]{40})(?:\s|$)'
hashSHA256_regex = r'(?:^|\s)([0-9a-f]{64})(?:\s|$)'

domains = find_by_regex(soup.text, domain_regex)
ipv4 = find_by_regex(soup.text, ipv4_regex)
hashMD5 = find_by_regex(soup.text, hashMD5_regex)
hashSHA1 = find_by_regex(soup.text, hashSHA1_regex)
hashSHA256 = find_by_regex(soup.text, hashSHA256_regex)
ipv4, hashMD5, hashSHA1, hashSHA256

(['5.188.206[.]78',
  '45.182.189[.]71',
  '81.19.135[.]30',
  '92.118.36[.]199',
  '5.188.86[.]18',
  '139.60.160[.]166'],
 ['6164e9d297d29aa8682971259da06848',
  '2dc57a3836e4393d4d16c4eb04bf9c7e',
  '12011c44955fd6631113f68a99447515',
  'fbe295e5a1acfbd0a6271898f885fe6a',
  '72a589da586844d7f0818ce684948eea',
  'a0e9f5d64349fb13191bc781f81f42e1'],
 ['96b95edc1a917912a3181d5105fd5bfad1344de0',
  'c6a5b345cef4eb795866ba81dcac9bd933fdd86d',
  'd6d205922e61635472efb13c2bb92c9ac6cb96da',
  '4f4f8cf0f9b47d0ad95d159201fe7e72fbc8448d'],
 ['a1390a78533c47e55cc364e97af431117126d04a7faed49390210ea3e89dd0e1',
  'c92c158d7c37fea795114fa6491fe5f145ad2f8c08776b18ae79db811e8e36a3',
  '717beedcd2431785a0f59d194e47970e9544fbf398d462a305f6ad9a1b1100cb',
  '121a1f64fff22c4bfcef3f11a23956ed403cdeb9bdb803f9c42763087bd6d94e'])

In [5]:
rst_threat_apikey = "wzsGdJ9nstr74EVdlAaaCjPP9vZWv0so5GLk2wwbt1P0yGLv1jWMzg"

from functools import lru_cache

@lru_cache
def ioc_lookup(ioc: str):

  if "[.]" in ioc:
    ioc = ioc.replace("[.]", ".")

  headers = {
      'accept': 'application/json',
      'x-api-key': rst_threat_apikey,
  }

  query_params = {
      'value': ioc,
  }

  try:
    response = requests.get('https://api.rstcloud.net/v1/ioc', headers=headers, params=query_params)
    return response.json()
    
  except Exception as e:
    return e

In [6]:
response = ioc_lookup("45.66.248[.]119")

description = response.get("description")
report_source = response.get("src").get("report")
ioc_value = response.get("ioc_value")
ioc_type = response.get("ioc_type")
tag = description.split('.')[0].split(": ")[-1]
related_threat = description.split(".")[1].split(": ")[-1]


In [7]:
dict_data = {
  "ioc": ioc_value,
  "tag": tag,
  "related_threat": related_threat,
  "report_source": report_source,
  "ioc_type": ioc_type
}

In [8]:
def get_ioc_data(ioc: str) -> dict:
  #потом ставим try except
  try:
    response = ioc_lookup(ioc)
    report_source = response.get("src").get("report")
    tag = description.split(".")[0].split(": ")[-1]
    related_threat = description.split(".")[1].split(": ")[-1]
    ioc_type = response.get("ioc_type")

    dict_data = {
      "ioc": ioc,
      "tag": tag,
      "related_threat": related_threat,
      "report_source": report_source,
      "ioc_type": ioc_type
    }

    # return dict_data
  
  except Exception as e:
    dict_data = {
      "ioc": ioc,
      "tag": None,
      "related_threat": None,
      "report_source": None,
      "ioc_type": None
    }

  return dict_data

In [9]:
full_data = []

# for domain in domains:
#   data = get_ioc_data(domain)
#   # print(data)
#   full_data.append(data)

for ip in ipv4:
  data = get_ioc_data(ip)
  # print(data)
  full_data.append(data)
  
for h in hashMD5:
  data = get_ioc_data(h)
  print(data)
  full_data.append(data)
  
for h in hashSHA1:
  data = get_ioc_data(h)
  print(data)
  full_data.append(data)
  
for h in hashSHA256:
  data = get_ioc_data(h)
  print(data)
  full_data.append(data)

{'ioc': '6164e9d297d29aa8682971259da06848', 'tag': 'ransomware', 'related_threat': 'icedid, nokoyawa', 'report_source': 'https://www.rewterz.com/rewterz-news/rewterz-threat-alert-authorities-raise-concerns-over-escalating-truebot-malware-attacks-active-iocs', 'ioc_type': 'md5'}
{'ioc': '2dc57a3836e4393d4d16c4eb04bf9c7e', 'tag': 'ransomware', 'related_threat': 'icedid, nokoyawa', 'report_source': 'otx.alienvault.com', 'ioc_type': 'md5'}
{'ioc': '12011c44955fd6631113f68a99447515', 'tag': 'ransomware', 'related_threat': 'icedid, nokoyawa', 'report_source': 'https://www.rewterz.com/rewterz-news/rewterz-threat-alert-authorities-raise-concerns-over-escalating-truebot-malware-attacks-active-iocs', 'ioc_type': 'md5'}
{'ioc': 'fbe295e5a1acfbd0a6271898f885fe6a', 'tag': None, 'related_threat': None, 'report_source': None, 'ioc_type': None}
{'ioc': '72a589da586844d7f0818ce684948eea', 'tag': 'ransomware', 'related_threat': 'icedid, nokoyawa', 'report_source': 'otx.alienvault.com', 'ioc_type': 'md5'

In [10]:
full_data

[{'ioc': '5.188.206[.]78',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://github.com/stamparm/maltrail',
  'ioc_type': 'ipv4'},
 {'ioc': '45.182.189[.]71',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://github.com/stamparm/maltrail',
  'ioc_type': 'ipv4'},
 {'ioc': '81.19.135[.]30',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://www.rewterz.com/rewterz-news/rewterz-threat-alert-authorities-raise-concerns-over-escalating-truebot-malware-attacks-active-iocs',
  'ioc_type': 'ipv4'},
 {'ioc': '92.118.36[.]199',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://www.hybrid-analysis.com/sample/a54d0c15db9354e3b580a1aa3da17634782f8fefd6f81887294372bd1d3404e7/64c2794610c5d57df80eeb32',
  'ioc_type': 'ipv4'},
 {'ioc': '5.188.86[.]18',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://w

In [11]:
def write_to_json(filename: str, data: list[dict]):
    with open(filename, 'w') as f:
        json.dump(data,
                  f,
                  indent=4,
                  ensure_ascii=True)

In [12]:
write_to_json("ioc7.json", full_data)