In [9]:
import re
import socket
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [10]:
data = pd.read_csv("путь к вашему csv файлу с доменами", encoding='latin-1', sep=";")
data.head(10)

Unnamed: 0,äàòà,äîìåí,Unnamed: 2
0,2023-09-18,0394-PLATEJ.ru,
1,2023-09-18,0581.ru,
2,2023-09-18,0X-SWAP.ru,
3,2023-09-18,1000-SOVETOV.ru,
4,2023-09-18,100GRYZCHIKOVBOT.ru,
5,2023-09-18,1248020.ru,
6,2023-09-18,13RUS-SHTAMP.ru,
7,2023-09-18,17SEP.ru,
8,2023-09-18,1C-API.ru,
9,2023-09-18,1C-APP.ru,


In [11]:
domains = data.iloc[:, 1].str.lower()
domains.head()

0         0394-platej.ru
1                0581.ru
2             0x-swap.ru
3        1000-sovetov.ru
4    100gryzchikovbot.ru
Name: äîìåí, dtype: object

In [12]:
for domain in domains:
  if re.match(r".*vtb.*", domain):
    print(domain)

vopros2023vtb.ru
vtb-24-promo.ru
askvtbank.ru
vtb-personal-app.ru
vtb24-price.ru
paradisevtb.ru
vtb24-onclick.ru
vtb24kilprice.ru
vtb24olbek.ru
vtb24podarok3000.ru
vtb24link3000.ru


In [13]:
def filter_domains(domains: list, regular_expression: str):
  return [domain for domain in domains if re.match(regular_expression, domain)]

vtb_regex = r".*vtb.*"
filtered_domains = filter_domains(domains, vtb_regex)
filtered_domains

['vopros2023vtb.ru',
 'vtb-24-promo.ru',
 'askvtbank.ru',
 'vtb-personal-app.ru',
 'vtb24-price.ru',
 'paradisevtb.ru',
 'vtb24-onclick.ru',
 'vtb24kilprice.ru',
 'vtb24olbek.ru',
 'vtb24podarok3000.ru',
 'vtb24link3000.ru']

In [16]:
def send_response(domain: str) -> requests.Response:
  try:
    response = requests.get("https://" + domain, timeout=1)
    return response
  except Exception:
    pass

  try:
    response = requests.get("http://" + domain, timeout=1)
    return response
  except Exception:
    pass

  return 'Error'

def get_status_code(response: requests.Response) -> int:
  if isinstance(response, requests.Response):
    return response.status_code
  else:
    return "Error"
  
def get_title(response: requests.Response) -> str:

  if isinstance(response, requests.Response):
      soup = BeautifulSoup(response.text, 'html.parser') 

      title_tag = soup.find('title')

      if title_tag:
        title = title_tag.text
        return title
      else:
        return "Title not found"
      
  else:
    return "Error"
  
def get_redirect(response: requests.Response) -> list[str]:
  redirects = []
  if isinstance(response, requests.Response):
      if response.history:
        for resp in response.history:
          redirects.append([resp.status_code, resp.url])
  return redirects

def get_ip(domain: str) -> str:
  try:
    ip_address = socket.gethostbyname(domain)
    return ip_address
  
  except Exception as e:
    return "Error"
  
def get_whois_info(domain: str) -> list[str]:
  rst_whois_apikey = "здесь ключ от rst WHOIS api"

  headers = {
      'accept': 'application/json',
      'x-api-key': rst_whois_apikey,
  }

  response = requests.get(f'https://api.rstcloud.net/v1/whois/{domain}', headers=headers)

  if response.status_code == 200:
    whois_data = response.json()
    return whois_data.get('created_on'), whois_data.get("registrar")
  else:
    return "Unknown", "Unknown"
  
def get_info_about_domain(domain: str):
  response = send_response(domain)
  status_code = get_status_code(response)
  title = get_title(response)
  redirects = get_redirect(response)
  ip = get_ip(domain)
  created_on, registrar = get_whois_info(domain)
  return domain, status_code, title, redirects, ip, created_on, registrar

In [17]:
get_info_about_domain("google.com")

('google.com',
 200,
 'Google',
 [[301, 'https://google.com/']],
 '74.125.205.100',
 '1997-09-15 04:00:00',
 'MarkMonitor Inc.')

In [18]:
# full_data = []

# for domain in filtered_domains:
#   data = get_info_about_domain(domain)
#   full_data.append(data)

full_data = [get_info_about_domain(domain) for domain in filtered_domains]
full_data

[('vopros2023vtb.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-17 11:43:53',
  'unknown'),
 ('vtb-24-promo.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-17 04:38:16',
  'unknown'),
 ('askvtbank.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-18 16:01:25',
  'unknown'),
 ('vtb-personal-app.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-18 06:48:28',
  'unknown'),
 ('vtb24-price.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-18 09:05:01',
  'unknown'),
 ('paradisevtb.ru',
  200,
  'Paradiseproject - Главная страница',
  [],
  '213.174.157.153',
  '2023-09-19 13:20:01',
  'unknown'),
 ('vtb24-onclick.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-19 09:55:55',
  'unknown'),
 ('vtb24kilprice.ru',
  403,
  'Ð\xa0Ð°Ð±Ð¾Ñ\x82Ð° Ñ\x81Ð°Ð¹Ñ\x82Ð° Ð²Ñ\x80ÐµÐ¼ÐµÐ½Ð½Ð¾ Ð¿Ñ\x80Ð¸Ð¾Ñ\x81Ñ\x82Ð°Ð½Ð¾Ð²Ð»ÐµÐ½Ð°',
  [],
  '37.140.192.199',
  '2023-09-20 04:57:16',
  'unknown'),
 ('vtb24olbek.ru',
  'Error',
  'Error',
  [],
  'Error',
  '2023-09-21 09:16:

In [19]:
column_names = ["domain", "status_code", "title", "redirects", "ip", "created_on", "registrar"]
df = pd.DataFrame(full_data, columns=column_names)
df.head(15)

Unnamed: 0,domain,status_code,title,redirects,ip,created_on,registrar
0,vopros2023vtb.ru,Error,Error,[],Error,2023-09-17 11:43:53,unknown
1,vtb-24-promo.ru,Error,Error,[],Error,2023-09-17 04:38:16,unknown
2,askvtbank.ru,Error,Error,[],Error,2023-09-18 16:01:25,unknown
3,vtb-personal-app.ru,Error,Error,[],Error,2023-09-18 06:48:28,unknown
4,vtb24-price.ru,Error,Error,[],Error,2023-09-18 09:05:01,unknown
5,paradisevtb.ru,200,Paradiseproject - Главная страница,[],213.174.157.153,2023-09-19 13:20:01,unknown
6,vtb24-onclick.ru,Error,Error,[],Error,2023-09-19 09:55:55,unknown
7,vtb24kilprice.ru,403,Ð Ð°Ð±Ð¾ÑÐ° ÑÐ°Ð¹ÑÐ° Ð²ÑÐµÐ¼ÐµÐ½Ð½Ð¾ Ð¿ÑÐ...,[],37.140.192.199,2023-09-20 04:57:16,unknown
8,vtb24olbek.ru,Error,Error,[],Error,2023-09-21 09:16:11,unknown
9,vtb24podarok3000.ru,Error,Error,[],Error,2023-09-22 07:17:57,unknown


In [20]:
df.to_csv("domains.csv", index=False)

In [21]:
data = pd.read_csv("domains.csv")
data.head(15)

Unnamed: 0,domain,status_code,title,redirects,ip,created_on,registrar
0,vopros2023vtb.ru,Error,Error,[],Error,2023-09-17 11:43:53,unknown
1,vtb-24-promo.ru,Error,Error,[],Error,2023-09-17 04:38:16,unknown
2,askvtbank.ru,Error,Error,[],Error,2023-09-18 16:01:25,unknown
3,vtb-personal-app.ru,Error,Error,[],Error,2023-09-18 06:48:28,unknown
4,vtb24-price.ru,Error,Error,[],Error,2023-09-18 09:05:01,unknown
5,paradisevtb.ru,200,Paradiseproject - Главная страница,[],213.174.157.153,2023-09-19 13:20:01,unknown
6,vtb24-onclick.ru,Error,Error,[],Error,2023-09-19 09:55:55,unknown
7,vtb24kilprice.ru,403,Ð Ð°Ð±Ð¾ÑÐ° ÑÐ°Ð¹ÑÐ° Ð²ÑÐµÐ¼ÐµÐ½Ð½Ð¾ Ð¿ÑÐ...,[],37.140.192.199,2023-09-20 04:57:16,unknown
8,vtb24olbek.ru,Error,Error,[],Error,2023-09-21 09:16:11,unknown
9,vtb24podarok3000.ru,Error,Error,[],Error,2023-09-22 07:17:57,unknown


In [24]:
data[data["domain"] == "vtb-personal-app.ru"]

Unnamed: 0,domain,status_code,title,redirects,ip,created_on,registrar
3,vtb-personal-app.ru,Error,Error,[],Error,2023-09-18 06:48:28,unknown


In [26]:
data[data["status_code"] == "Error"]

Unnamed: 0,domain,status_code,title,redirects,ip,created_on,registrar
0,vopros2023vtb.ru,Error,Error,[],Error,2023-09-17 11:43:53,unknown
1,vtb-24-promo.ru,Error,Error,[],Error,2023-09-17 04:38:16,unknown
2,askvtbank.ru,Error,Error,[],Error,2023-09-18 16:01:25,unknown
3,vtb-personal-app.ru,Error,Error,[],Error,2023-09-18 06:48:28,unknown
4,vtb24-price.ru,Error,Error,[],Error,2023-09-18 09:05:01,unknown
6,vtb24-onclick.ru,Error,Error,[],Error,2023-09-19 09:55:55,unknown
8,vtb24olbek.ru,Error,Error,[],Error,2023-09-21 09:16:11,unknown
9,vtb24podarok3000.ru,Error,Error,[],Error,2023-09-22 07:17:57,unknown
10,vtb24link3000.ru,Error,Error,[],Error,2023-09-23 05:05:50,unknown


In [27]:
data.nunique()

domain         11
status_code     3
title           3
redirects       1
ip              3
created_on     11
registrar       1
dtype: int64

In [28]:
data.describe()

Unnamed: 0,domain,status_code,title,redirects,ip,created_on,registrar
count,11,11,11,11,11,11,11
unique,11,3,3,1,3,11,1
top,vopros2023vtb.ru,Error,Error,[],Error,2023-09-17 11:43:53,unknown
freq,1,9,9,11,9,1,11
