In [40]:
import uuid
import re
import json

import requests
from bs4 import BeautifulSoup

In [41]:
# получаем html код страницы, если она доступна, иначе Error
def get_html_code(url: str) -> str:
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    try:
        req = requests.get(url, headers=headers)
        # The HTTP 200 OK success status response code indicates that the request has succeeded.
        if req.status_code == 200:
            return req.text
        else:
            return "Error"
        
    except Exception as e:
        print(e)
        return "Error"

# переводим html код в объект soup класса BeautifulSoup
def html_to_soup(html: str) -> BeautifulSoup:
    soup = BeautifulSoup(html, "html.parser")
    return soup

# вытаскивает из текста совпадения, используя регулярные выражения
def find_by_regex(text: str, regular_expression: re.Pattern) -> list[str]:
    matches = re.findall(regular_expression, text)
    matches = list(set(matches))
    return matches

In [42]:
report_url = 'https://thedfirreport.com/2023/05/22/icedid-macro-ends-in-nokoyawa-ransomware'
html = get_html_code(report_url)
soup = html_to_soup(html)

In [43]:
domain_regex = r'\w+\[\.\][a-z]{1,6}'
ipv4_regex = r'\d{1,3}\.\d{1,3}\.\d{1,3}\[\.\]\d{1,3}'

domains = find_by_regex(soup.text, domain_regex)
ipv4 = find_by_regex(soup.text, ipv4_regex)

domains, ipv4

(['simipimi[.]com',
  'curabiebarristie[.]com',
  'dropmefiles[.]com',
  'belliecow[.]wiki',
  'aicsoftware[.]com',
  'kicknocisd[.]com',
  'stayersa[.]art',
  'guaracheza[.]pics',
  'iconnectgs[.]com'],
 ['23.29.115[.]152',
  '5.8.18[.]242',
  '45.66.248[.]119',
  '50.3.132[.]232',
  '159.65.169[.]200',
  '137.74.104[.]108'])

In [49]:
rst_threat_apikey = "здесь ваш апи ключ от rst THREAT feed"

from functools import lru_cache

@lru_cache
def ioc_lookup(ioc: str):

  if "[.]" in ioc:
    ioc = ioc.replace("[.]", ".")

  headers = {
      'accept': 'application/json',
      'x-api-key': rst_threat_apikey,
  }

  query_params = {
      'value': ioc,
  }

  try:
    response = requests.get('https://api.rstcloud.net/v1/ioc', headers=headers, params=query_params)
    return response.json()
    
  except Exception as e:
    return e

In [61]:
response = ioc_lookup("45.66.248[.]119")

description = response.get("description")
report_source = response.get("src").get("report")
ioc_value = response.get("ioc_value")
ioc_type = response.get("ioc_type")
tag = description.split('.')[0].split(": ")[-1]
related_threat = description.split(".")[1].split(": ")[-1]


In [62]:
dict_data = {
  "ioc": ioc_value,
  "tag": tag,
  "related_threat": related_threat,
  "report_source": report_source,
  "ioc_type": ioc_type
}

In [63]:
def get_ioc_data(ioc: str) -> dict:
  #потом ставим try except
  try:
    response = ioc_lookup(ioc)
    report_source = response.get("src").get("report")
    tag = description.split(".")[0].split(": ")[-1]
    related_threat = description.split(".")[1].split(": ")[-1]
    ioc_type = response.get("ioc_type")

    dict_data = {
      "ioc": ioc,
      "tag": tag,
      "related_threat": related_threat,
      "report_source": report_source,
      "ioc_type": ioc_type
    }

    # return dict_data
  
  except Exception as e:
    dict_data = {
      "ioc": ioc,
      "tag": None,
      "related_threat": None,
      "report_source": None,
      "ioc_type": None
    }

  return dict_data

In [65]:
get_ioc_data("simipimi[.]com")

{'ioc': 'simipimi[.]com',
 'tag': 'ransomware',
 'related_threat': 'icedid, nokoyawa',
 'report_source': 'https://github.com/stamparm/maltrail',
 'ioc_type': 'domain'}

In [66]:
full_data = []

for domain in domains:
  data = get_ioc_data(domain)
  # print(data)
  full_data.append(data)

for ip in ipv4:
  data = get_ioc_data(ip)
  # print(data)
  full_data.append(data)

In [67]:
full_data

[{'ioc': 'simipimi[.]com',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://github.com/stamparm/maltrail',
  'ioc_type': 'domain'},
 {'ioc': 'curabiebarristie[.]com',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://github.com/stamparm/maltrail',
  'ioc_type': 'domain'},
 {'ioc': 'dropmefiles[.]com',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://thedfirreport.com/2023/05/22/icedid-macro-ends-in-nokoyawa-ransomware',
  'ioc_type': 'domain'},
 {'ioc': 'belliecow[.]wiki',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://github.com/stamparm/maltrail',
  'ioc_type': 'domain'},
 {'ioc': 'aicsoftware[.]com',
  'tag': 'ransomware',
  'related_threat': 'icedid, nokoyawa',
  'report_source': 'https://github.com/stamparm/maltrail',
  'ioc_type': 'domain'},
 {'ioc': 'kicknocisd[.]com',
  'tag': 'ransomware',
  'related_threat': '

In [68]:
def write_to_json(filename: str, data: list[dict]):
    with open(filename, 'w') as f:
        json.dump(data,
                  f,
                  indent=4,
                  ensure_ascii=True)

In [70]:
write_to_json("ioc.json", full_data)