# Feature Engineering

In [1]:
pip install pandas numpy tldextract python-whois aiodns aiohttp idna joblib tqdm ipywidgets nest_asyncio brotli python-dateutil

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import math
import asyncio
import ssl
import socket
from datetime import datetime, timezone
from urllib.parse import urlparse, parse_qs
from concurrent.futures import ThreadPoolExecutor
from itertools import groupby
from dateutil import parser

import numpy as np
import pandas as pd
import tldextract
import idna
import whois
import aiodns
from joblib import Memory
import brotli

from tqdm import tqdm
from tqdm.asyncio import tqdm_asyncio as tqdma

import nest_asyncio
nest_asyncio.apply()


In [3]:
def lexical_features(df: pd.DataFrame) -> pd.DataFrame:
    u = df["url"]
    parsed = u.str.extract(
        r"^(?P<scheme>[^:]+)://(?P<rest>.+)$", expand=True).fillna("")
    scheme = parsed["scheme"]
    host_path = parsed["rest"]
    host = host_path.str.split("/", n=1).str[0]
    path_qr = host_path.str.split("/", n=1).str[1].fillna("")
    path = path_qr.str.split("?", n=1).str[0]
    query = path_qr.str.split("?", n=1).str[1].fillna("")
    frag = query.str.split("#", n=1).str[1].fillna("")
    query_only = query.str.split("#", n=1).str[0]
    v = pd.DataFrame(index=df.index)
    # Basic lengths
    v["url_length"]      = u.str.len().astype("int32")
    v["hostname_length"] = host.str.len().astype("int32")
    v["path_length"]     = path.str.len().astype("int32")
    v["query_length"]    = query_only.str.len().astype("int32")
    v["fragment_length"] = frag.str.len().astype("int32")
    # Symbol counts
    v["num_dots"]        = host.str.count(r"\.").astype("int16")
    v["num_hyphens"]     = u.str.count("-").astype("int16")
    v["num_underscores"] = u.str.count("_").astype("int16")
    v["digit_ratio"]     = (
        u.str.count(r"\d").astype("float32") / v["url_length"].clip(lower=1)
    )
    # Entropy
    def entropy(s):
        if not s: return 0.
        p = np.fromiter((s.count(c) / len(s) for c in set(s)), dtype=float)
        return -np.dot(p, np.log2(p))
    v["entropy_url"]  = u.map(entropy).astype("float32")
    v["entropy_host"] = host.map(entropy).astype("float32")
    # Vowel/consonant ratio
    vowels = host.str.count(r"[aeiouAEIOU]")
    conson = host.str.count(r"[A-Za-z]") - vowels
    v["vowel_cons_ratio"] = (vowels / conson.clip(lower=1)).astype("float32")
    # N-grams
    v["bigrams"]  = host.map(lambda s: len({s[i:i+2] for i in range(len(s)-1)})).astype("int16")
    v["trigrams"] = host.map(lambda s: len({s[i:i+3] for i in range(len(s)-2)})).astype("int16")
    # Phishing keywords and brand tokens (top 20+ from 2023-2025 literature)
    keywords = [
        "login","secure","update","verify","account","bank","signin","confirm",
        "apple","facebook","paypal","amazon","google","microsoft","netflix",
        "yahoo","dropbox","github","linkedin","ebay","wallet","invoice","payment",
        "support","alert","service","admin","webscr","session","billing"
    ]
    for kw in keywords:
        v[f"kw_{kw}_count"] = u.str.lower().str.count(kw).astype("int8")
    # Suspicious patterns
    v["has_at_symbol"]   = u.str.contains("@").astype("int8")
    v["has_double_slash"] = u.str.slice(8).str.contains("//").astype("int8")
    v["has_ip_address"]  = host.str.match(r"^(?:\d{1,3}\.){3}\d{1,3}$").astype("int8")
    v["uses_https"]      = (scheme == "https").astype("int8")
    v["param_count"]     = query_only.str.count("&").add(query_only.ne("").astype(int)).astype("int16")
    v["subdomain_count"] = host.fillna("").str.split(".").map(lambda x: max(len(x)-2, 0)).astype("int16")
    # Uppercase ratio
    v["uppercase_ratio"] = (
        u.str.count(r"[A-Z]").astype("float32") / v["url_length"].clip(lower=1)
    )
    # Char repetition (max run of same char class)
    v["max_char_run"] = u.map(lambda s: max([len(list(g)) for _, g in groupby(s)], default=0)).astype("int16")
    # Kolmogorov complexity (compressed size/len, optional)
    try:
        import brotli
        v["kolmogorov"] = u.map(lambda s: len(brotli.compress(s.encode(), quality=4)) / max(len(s),1)).astype("float32")
    except ImportError:
        v["kolmogorov"] = 0.0
    # TLD feature (optional: can be filled with .5 if not available)
    tld = host.map(lambda h: tldextract.extract(h).suffix)
    tld_legit = {"com":0.8,"org":0.7,"net":0.7,"ru":0.2,"cn":0.2,"xyz":0.1,"top":0.1,"info":0.3,"site":0.2,"online":0.2}
    v["tld_legit_prob"] = tld.map(lambda s: tld_legit.get(s,0.5)).astype("float32")
    return v


In [4]:
CACHE = Memory(".cache_feat", verbose=0)
DNS_RESOLVER = aiodns.DNSResolver(timeout=6)
SSL_CTX = ssl.create_default_context()

def _whois_dates(domain: str):
    try:
        w = whois.whois(domain)
        cd, ed = w.creation_date, w.expiration_date
        if isinstance(cd, list): cd = cd[0]
        if isinstance(ed, list): ed = ed[0]
        # Parse to datetime if string
        if isinstance(cd, str):
            cd = parser.parse(cd)
        if isinstance(ed, str):
            ed = parser.parse(ed)
        # Make both timezone-aware in UTC
        if cd is not None:
            if cd.tzinfo is None or cd.tzinfo.utcoffset(cd) is None:
                cd = cd.replace(tzinfo=timezone.utc)
            else:
                cd = cd.astimezone(timezone.utc)
        if ed is not None:
            if ed.tzinfo is None or ed.tzinfo.utcoffset(ed) is None:
                ed = ed.replace(tzinfo=timezone.utc)
            else:
                ed = ed.astimezone(timezone.utc)
        return cd, ed
    except Exception:
        return None, None

async def dns_features(host: str) -> dict:
    out = {"a_count":0,"mx_count":0,"ns_count":0,"reverse_dns":0}
    try:
        a = await DNS_RESOLVER.query(host, "A")
        out["a_count"] = len(a)
        try:
            rev = ".".join(reversed(host.split("."))) + ".in-addr.arpa"
            await DNS_RESOLVER.query(rev, "PTR")
            out["reverse_dns"]=1
        except: pass
    except: pass
    for rec,tp in (("MX","mx_count"),("NS","ns_count")):
        try:
            ans = await DNS_RESOLVER.query(host, rec)
            out[tp] = len(ans)
        except: pass
    return out

def _ssl_worker(host: str):
    res = {"cert_valid_days":0,"issuer_len":0,"days_since_issue":0,"self_signed":0}
    try:
        with SSL_CTX.wrap_socket(socket.socket(), server_hostname=host) as s:
            s.settimeout(8)
            s.connect((host,443))
            cert = s.getpeercert()
            nb = datetime.strptime(cert["notBefore"],"%b %d %H:%M:%S %Y %Z")
            na = datetime.strptime(cert["notAfter"], "%b %d %H:%M:%S %Y %Z")
            res["cert_valid_days"] = (na-nb).days
            res["days_since_issue"]= (datetime.now()-nb).days
            iss = dict(x[0] for x in cert.get("issuer", []))
            res["issuer_len"] = len(iss.get("commonName",""))
            res["self_signed"] = int(iss.get("organizationalUnitName","")=="Self-signed")
    except: pass
    return res

async def gather_net_features(hosts: list[str]) -> pd.DataFrame:
    loop  = asyncio.get_running_loop()
    pool  = ThreadPoolExecutor(max_workers=8)
    dns_coroutines  = [dns_features(h) for h in hosts]
    ssl_futures     = [loop.run_in_executor(pool, _ssl_worker, h) for h in hosts]
    dns_out = []
    for coro in tqdma.as_completed(dns_coroutines, total=len(hosts), desc="Async DNS"):
        dns_out.append(await coro)
    ssl_out = []
    for fut in tqdma.as_completed(ssl_futures, total=len(hosts), desc="TLS handshake"):
        ssl_out.append(await fut)
    whois_dates = []
    with ThreadPoolExecutor(max_workers=8) as wpool:
        for fut in tqdma.as_completed(
                [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
                 for h in hosts],
                total=len(hosts), desc="WHOIS"):
            whois_dates.append(await fut)
    today = datetime.now(timezone.utc)
    rows = []
    for h, dns_d, ssl_d, (cd, ed) in zip(hosts, dns_out, ssl_out, whois_dates):
        age = (today-cd).days if cd else 0
        to_exp = (ed-today).days if ed else 0
        rows.append(dict(
            host=h,
            domain_age_days=age,
            days_to_expiry=to_exp,
            **dns_d, **ssl_d
        ))
    return pd.DataFrame(rows).set_index("host")


In [5]:
def batch_extract_notebook(urls: list[str]) -> pd.DataFrame:
    return asyncio.get_event_loop().run_until_complete(async_extract(urls))

async def async_extract(urls: list[str]) -> pd.DataFrame:
    df = pd.DataFrame({"url": urls})
    with tqdm(total=len(df), desc="Lexical features", unit="url") as pb:
        feats = lexical_features(df)
        pb.update(len(df))
    hosts = df["url"].map(lambda u: urlparse(u).hostname or "")
    unique_hosts = hosts.unique().tolist()
    net_df = await gather_net_features(unique_hosts)
    feats = feats.join(net_df, on=hosts)
    feats.insert(0, "url", urls)
    return feats


In [None]:
DATA_PATH = r"C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\labeled_url_dataset.csv"
OUTPUT_PATH = r"C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv"
BATCH_SIZE = 1000   # Adjust based on your resources

reader = pd.read_csv(DATA_PATH, chunksize=BATCH_SIZE)
first = True
for chunk in tqdm(reader, desc="Overall Progress"):
    chunk = chunk.dropna(subset=['url', 'label']).reset_index(drop=True)
    urls = chunk['url'].tolist()
    labels = chunk['label'].tolist()
    feat_df = batch_extract_notebook(urls)
    feat_df['label'] = labels
    # Save batch to CSV (append mode)
    if first:
        feat_df.to_csv(OUTPUT_PATH, index=False, mode='w')
        first = False
    else:
        feat_df.to_csv(OUTPUT_PATH, index=False, mode='a', header=False)
    print(f"Processed {len(urls)} URLs, saved to {OUTPUT_PATH}")


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 5638.81url/s]
Async DNS: 100%|██████████| 816/816 [00:27<00:00, 30.18it/s]
TLS handshake: 100%|██████████| 816/816 [01:12<00:00, 11.18it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 20:52:02,316 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 20:52:03,611 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 20:52:06,220 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-09 20:52:14,703 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 20:53:53,439 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-09 20:

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3946.16url/s]
Async DNS: 100%|██████████| 832/832 [00:19<00:00, 43.71it/s]
TLS handshake: 100%|██████████| 832/832 [01:30<00:00,  9.20it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 21:02:41,680 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 21:02:41,685 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:02:42,375 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 21:02:42,421 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:02:45,687 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:02:53,196 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3780.54url/s]
Async DNS: 100%|██████████| 803/803 [00:23<00:00, 34.09it/s]
TLS handshake: 100%|██████████| 803/803 [01:09<00:00, 11.60it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 21:11:28,056 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:11:30,365 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 21:11:32,064 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 21:11:32,806 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-09 21:11:35,489 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4636.12url/s]
Async DNS: 100%|██████████| 837/837 [00:20<00:00, 41.85it/s]
TLS handshake: 100%|██████████| 837/837 [01:18<00:00, 10.67it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 21:21:36,612 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:21:41,632 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:21:44,040 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 21:21:51,611 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-09 21:21:53,498 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 21:21:54,300 - whois.whois - ERROR - Error trying to connect 

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4342.99url/s]
Async DNS: 100%|██████████| 819/819 [00:33<00:00, 24.32it/s]
TLS handshake: 100%|██████████| 819/819 [01:15<00:00, 10.82it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 22:21:32,977 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:21:36,619 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:21:39,149 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-09 22:21:40,322 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:21:41,154 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3907.68url/s]
Async DNS: 100%|██████████| 814/814 [00:17<00:00, 46.14it/s]
TLS handshake: 100%|██████████| 814/814 [01:22<00:00,  9.91it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 22:30:15,803 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 22:30:18,091 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 22:30:23,015 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-09 22:30:25,452 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:31:30,217 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:31:30,764 - whois.whois - ERROR - Error trying to connect t

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6557.95url/s]
Async DNS: 100%|██████████| 789/789 [00:20<00:00, 37.81it/s]
TLS handshake: 100%|██████████| 789/789 [09:06<00:00,  1.44it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 22:47:59,733 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:48:02,538 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:49:52,429 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10054] An existing connection was forcibly closed by the remote host
2025-07-09 22:49:54,068 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:50:10,453 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 2714.64url/s]
Async DNS: 100%|██████████| 797/797 [00:23<00:00, 33.37it/s]
TLS handshake: 100%|██████████| 797/797 [01:17<00:00, 10.31it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 22:58:00,638 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:58:10,783 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:58:14,610 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-09 22:58:16,059 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:58:22,360 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 22:58:24,166 - whois.whois - ERROR - Error trying to connect t

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 2685.84url/s]
Async DNS: 100%|██████████| 796/796 [00:25<00:00, 30.74it/s]
TLS handshake: 100%|██████████| 796/796 [01:32<00:00,  8.64it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 23:08:13,029 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 23:08:13,034 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:08:14,068 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 23:08:14,807 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:08:20,465 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 23:08:22,320 - whois.whois - ERROR - E

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3988.85url/s]
Async DNS: 100%|██████████| 800/800 [00:23<00:00, 33.60it/s]
TLS handshake: 100%|██████████| 800/800 [01:15<00:00, 10.56it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 23:16:41,330 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:16:44,567 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 23:16:45,164 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 23:18:02,768 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:18:03,631 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:18:05,294 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4112.87url/s]
Async DNS: 100%|██████████| 782/782 [00:31<00:00, 25.06it/s]
TLS handshake: 100%|██████████| 782/782 [01:00<00:00, 13.02it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 23:24:12,516 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:24:12,519 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 23:24:13,296 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 23:24:18,608 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-09 23:24:19,895 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4004.53url/s]
Async DNS: 100%|██████████| 811/811 [00:18<00:00, 43.76it/s]
TLS handshake: 100%|██████████| 811/811 [01:19<00:00, 10.25it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 23:35:48,749 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:35:49,465 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 23:35:50,318 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:35:50,662 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 23:35:51,899 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:35:54,110 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3364.13url/s]
Async DNS: 100%|██████████| 804/804 [00:23<00:00, 34.03it/s]
TLS handshake: 100%|██████████| 804/804 [00:57<00:00, 13.97it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-09 23:53:33,674 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-09 23:53:35,162 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:53:38,131 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-09 23:53:38,521 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-09 23:53:41,969 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively re

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4250.72url/s]
Async DNS: 100%|██████████| 819/819 [00:18<00:00, 43.44it/s]
TLS handshake: 100%|██████████| 819/819 [01:16<00:00, 10.69it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:07:32,720 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:07:32,960 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   1%|          | 6/819 [00:01<03:03,  4.43it/s]2025-07-10 00:07:32,977 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 00:07:35,556 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 00:07:45,285 - whois.whois - ERROR - Error trying to connec

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3422.94url/s]
Async DNS: 100%|██████████| 799/799 [00:16<00:00, 48.93it/s]
TLS handshake: 100%|██████████| 799/799 [01:24<00:00,  9.41it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:18:25,280 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:18:25,283 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 00:18:25,283 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 00:18:29,712 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   5%|▌         | 40/799 [00:07<02:01,  6.25it/s]2025-07-10 00:18:32,739 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo f

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3551.35url/s]
Async DNS: 100%|██████████| 805/805 [00:25<00:00, 31.77it/s]
TLS handshake: 100%|██████████| 805/805 [00:54<00:00, 14.88it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:29:59,828 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 00:30:01,821 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 00:30:03,692 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 00:30:07,390 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 00:30:09,175 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrin

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4176.09url/s]
Async DNS: 100%|██████████| 818/818 [00:28<00:00, 28.60it/s]
TLS handshake: 100%|██████████| 818/818 [01:09<00:00, 11.82it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:37:38,477 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:38:50,837 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:38:59,905 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 00:39:03,329 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:39:05,579 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 00:39:08,808 - whois.whois - ERROR - Error trying to connect to socket: closing socke

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 2477.81url/s]
Async DNS: 100%|██████████| 785/785 [00:21<00:00, 36.02it/s]
TLS handshake: 100%|██████████| 785/785 [01:17<00:00, 10.17it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:46:24,922 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 00:46:25,441 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 00:46:29,673 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 00:46:33,670 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:46:34,031 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 5214.53url/s]
Async DNS: 100%|██████████| 806/806 [00:20<00:00, 39.49it/s]
TLS handshake: 100%|██████████| 806/806 [01:20<00:00, 10.02it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:51:40,260 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:51:42,349 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:51:42,523 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:51:44,203 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it

WHOIS:   2%|▏         | 14/806 [00:09<08:17,  1.59it/s]2025-07-10 00:51:44,208 - whois.whois - ERROR - Error trying to conn

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6736.49url/s]
Async DNS: 100%|██████████| 818/818 [00:19<00:00, 41.77it/s]
TLS handshake: 100%|██████████| 818/818 [01:09<00:00, 11.74it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 00:59:53,605 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 00:59:56,100 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 00:59:57,868 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 01:01:26,119 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:01:27,486 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 01:01:27,915 - whois.whois - ERROR - E

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6226.71url/s]
Async DNS: 100%|██████████| 837/837 [00:27<00:00, 30.37it/s]
TLS handshake: 100%|██████████| 837/837 [01:19<00:00, 10.56it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 01:12:56,767 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 01:13:01,008 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 01:13:04,129 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 01:13:07,478 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 01:13:09,571 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:13:14,990 - whois.whois - ERROR - Error trying to connect to socket: closing socke

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 5494.57url/s]
Async DNS: 100%|██████████| 832/832 [00:19<00:00, 43.50it/s]
TLS handshake: 100%|██████████| 832/832 [01:23<00:00,  9.94it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 01:20:37,308 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 01:20:37,991 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:20:38,328 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 01:20:41,506 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:20:44,867 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively re

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6662.45url/s]
Async DNS: 100%|██████████| 812/812 [00:22<00:00, 35.37it/s]
TLS handshake: 100%|██████████| 812/812 [01:04<00:00, 12.54it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 01:27:11,231 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   6%|▌         | 48/812 [00:11<04:23,  2.90it/s]2025-07-10 01:27:13,566 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 01:27:17,290 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 01:27:17,757 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:27:18,238 - whois.whois - ERROR - Error trying to conne

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6300.44url/s]
Async DNS: 100%|██████████| 807/807 [00:18<00:00, 44.61it/s]
TLS handshake: 100%|██████████| 807/807 [01:15<00:00, 10.72it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 01:35:33,164 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:35:37,938 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:35:38,091 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 01:35:38,169 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   6%|▌         | 48/807 [00:14<03:11,  3.97it/s]2025-07-10 01:35:39,251 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo f

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4466.56url/s]
Async DNS: 100%|██████████| 815/815 [00:18<00:00, 44.81it/s]
TLS handshake: 100%|██████████| 815/815 [01:16<00:00, 10.66it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 09:58:13,008 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 09:58:15,848 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it

WHOIS:   2%|▏         | 20/815 [00:06<06:23,  2.07it/s]2025-07-10 09:58:15,853 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 09:58:16,807 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 09:58:22,430 - whois.whois - ERROR - Error trying to conne

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7863.47url/s]
Async DNS: 100%|██████████| 828/828 [00:23<00:00, 34.70it/s]
TLS handshake: 100%|██████████| 828/828 [01:19<00:00, 10.35it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:04:39,777 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 10:04:43,098 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 10:04:46,517 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:04:48,232 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 10:04:50,353 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:04:54,496 - whois.whois - ERROR - Error trying to connect t

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4543.57url/s]
Async DNS: 100%|██████████| 816/816 [00:21<00:00, 38.19it/s]
TLS handshake: 100%|██████████| 816/816 [01:17<00:00, 10.49it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:11:52,060 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:11:52,063 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 10:11:53,364 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 10:12:06,158 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 10:12:06,693 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7030.53url/s]
Async DNS: 100%|██████████| 790/790 [00:20<00:00, 39.28it/s]
TLS handshake: 100%|██████████| 790/790 [01:32<00:00,  8.55it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:22:54,795 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:22:56,436 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it

2025-07-10 10:22:56,440 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:23:58,157 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10054] An existing connection was forcibly closed by the remote host2025-07-10 10:23:58,918 - whois.whois - ERROR - Error trying to connect to soc

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7821.23url/s]
Async DNS: 100%|██████████| 808/808 [00:29<00:00, 27.26it/s]
TLS handshake: 100%|██████████| 808/808 [01:05<00:00, 12.29it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:30:25,085 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:30:26,254 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:30:26,321 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:30:38,862 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 10:30:41,056 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 10:30:41,061 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7738.01url/s]
Async DNS: 100%|██████████| 819/819 [00:21<00:00, 37.45it/s]
TLS handshake: 100%|██████████| 819/819 [04:50<00:00,  2.82it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:41:13,227 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:41:18,120 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:41:20,253 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:41:21,284 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:41:21,284 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:41:33,419 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4092.04url/s]
Async DNS: 100%|██████████| 817/817 [00:22<00:00, 36.29it/s]
TLS handshake: 100%|██████████| 817/817 [01:27<00:00,  9.38it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:47:57,577 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:47:59,419 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 10:47:59,765 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 10:48:03,366 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:48:03,471 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 10:48:04,640 - whois.whois - ERROR - E

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7906.72url/s]
Async DNS: 100%|██████████| 835/835 [00:19<00:00, 42.60it/s]
TLS handshake: 100%|██████████| 835/835 [01:34<00:00,  8.85it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 10:57:25,494 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 10:57:26,288 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:57:26,297 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:57:29,979 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 10:57:33,362 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6403.15url/s]
Async DNS: 100%|██████████| 832/832 [00:19<00:00, 42.81it/s]
TLS handshake: 100%|██████████| 832/832 [01:21<00:00, 10.25it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 11:06:18,927 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 11:06:21,021 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:06:21,962 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:06:22,267 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:07:26,035 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10054] An existing connection was forcibly closed by the remote host2025

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7509.15url/s]
Async DNS: 100%|██████████| 817/817 [00:20<00:00, 39.62it/s]
TLS handshake: 100%|██████████| 817/817 [01:30<00:00,  8.99it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 11:15:02,580 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:15:03,897 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:15:06,906 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:15:10,107 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:15:15,624 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively re

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 6650.10url/s]
Async DNS: 100%|██████████| 815/815 [00:25<00:00, 31.66it/s]
TLS handshake: 100%|██████████| 815/815 [01:13<00:00, 11.13it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 11:24:04,661 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:24:04,710 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:24:10,360 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 11:24:11,726 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 11:24:15,835 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:24:17,339 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 7049.53url/s]
Async DNS: 100%|██████████| 820/820 [00:22<00:00, 36.99it/s]
TLS handshake: 100%|██████████| 820/820 [01:24<00:00,  9.73it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 11:31:55,145 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:31:55,644 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 11:31:56,275 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 11:31:56,422 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10054] An existing connection was forcibly closed by the remote host

2025-07-10 11:31:57,245 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
202

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 2998.64url/s]
Async DNS: 100%|██████████| 792/792 [00:22<00:00, 35.29it/s]
TLS handshake: 100%|██████████| 792/792 [01:13<00:00, 10.84it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 11:45:53,481 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:45:56,810 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 11:45:56,815 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:45:57,594 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 11:45:57,619 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:45:59,144 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 5269.96url/s]
Async DNS: 100%|██████████| 820/820 [00:31<00:00, 25.71it/s]
TLS handshake: 100%|██████████| 820/820 [01:12<00:00, 11.33it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 11:57:53,606 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 11:57:56,154 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:57:57,140 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 11:57:58,884 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 11:57:58,984 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10060] A connection attempt failed because the connected party did not pr

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4338.96url/s]
Async DNS: 100%|██████████| 806/806 [00:17<00:00, 44.85it/s]
TLS handshake: 100%|██████████| 806/806 [01:24<00:00,  9.49it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 12:03:09,018 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:03:14,940 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:03:21,257 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:03:24,782 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 12:03:25,889 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:03:26,079 - whois.whois - ERROR - E

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4273.31url/s]
Async DNS: 100%|██████████| 797/797 [00:20<00:00, 39.43it/s]
TLS handshake: 100%|██████████| 797/797 [01:32<00:00,  8.59it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 12:10:14,957 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:11:22,291 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:11:24,444 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 12:12:43,705 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 12:12:44,869 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3612.83url/s]
Async DNS: 100%|██████████| 797/797 [00:31<00:00, 25.40it/s]
TLS handshake: 100%|██████████| 797/797 [01:04<00:00, 12.41it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 12:20:36,342 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:20:45,695 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:20:50,728 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:20:50,734 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:20:53,161 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 12:21:05,822 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3497.02url/s]
Async DNS: 100%|██████████| 812/812 [00:34<00:00, 23.80it/s]
TLS handshake: 100%|██████████| 812/812 [01:15<00:00, 10.79it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 12:27:35,227 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:27:37,506 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:27:37,872 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:27:41,075 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:27:41,708 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:27:42,464 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3971.43url/s]
Async DNS: 100%|██████████| 788/788 [00:29<00:00, 26.72it/s]
TLS handshake: 100%|██████████| 788/788 [01:18<00:00,  9.98it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 12:37:20,140 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:37:20,142 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:37:21,286 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 12:37:21,349 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:37:21,848 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:37:21,850 - whois.whois - ERROR - Er

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4035.76url/s]
Async DNS: 100%|██████████| 804/804 [00:20<00:00, 38.32it/s]
TLS handshake: 100%|██████████| 804/804 [01:15<00:00, 10.68it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 12:47:19,708 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:47:19,711 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:47:22,214 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 12:47:24,808 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 12:47:26,408 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 12:47:29,812 - whois.whois - ERROR - E

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 2602.63url/s]
Async DNS: 100%|██████████| 836/836 [00:29<00:00, 28.19it/s]
TLS handshake: 100%|██████████| 836/836 [01:29<00:00,  9.38it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:02:57,207 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:03:04,145 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
2025-07-10 13:03:04,269 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:03:04,862 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:03

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4763.65url/s]
Async DNS: 100%|██████████| 804/804 [00:20<00:00, 38.39it/s]
TLS handshake: 100%|██████████| 804/804 [01:21<00:00,  9.82it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:14:08,680 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   1%|          | 5/804 [00:01<02:51,  4.67it/s]2025-07-10 13:14:09,578 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 13:14:15,167 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:14:15,561 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:14:19,388 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo fai

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3540.78url/s]
Async DNS: 100%|██████████| 803/803 [00:19<00:00, 40.70it/s]
TLS handshake: 100%|██████████| 803/803 [01:25<00:00,  9.36it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:21:46,484 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:21:52,599 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:21:52,815 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   8%|▊         | 61/803 [00:13<03:09,  3.92it/s]2025-07-10 13:21:57,198 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 13:21:58,995 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo fa

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3865.96url/s]
Async DNS: 100%|██████████| 814/814 [00:19<00:00, 41.26it/s]
TLS handshake: 100%|██████████| 814/814 [01:16<00:00, 10.65it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:33:21,141 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:33:24,438 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 13:33:26,224 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

WHOIS:   4%|▎         | 30/814 [00:06<01:55,  6.79it/s]2025-07-10 13:33:26,230 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 13:33:26,301 - whois.whois - ERROR - Error trying to conne

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3522.94url/s]
Async DNS: 100%|██████████| 831/831 [00:23<00:00, 35.88it/s]
TLS handshake: 100%|██████████| 831/831 [01:11<00:00, 11.66it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:41:29,663 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:41:38,415 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:41:39,136 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:41:44,335 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 13:41:49,452 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3328.80url/s]
Async DNS: 100%|██████████| 824/824 [00:21<00:00, 37.77it/s]
TLS handshake: 100%|██████████| 824/824 [01:09<00:00, 11.78it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:52:23,023 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 13:52:23,596 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 13:52:24,770 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:52:28,761 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 13:52:32,229 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 13:52:32,704 - whois.whois - ERROR - E

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3422.20url/s]
Async DNS: 100%|██████████| 810/810 [00:22<00:00, 36.29it/s]
TLS handshake: 100%|██████████| 810/810 [01:13<00:00, 10.98it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 13:59:59,717 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 14:00:07,976 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 14:00:08,185 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 14:00:11,308 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 14:00:20,624 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 14:

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3378.29url/s]
Async DNS: 100%|██████████| 803/803 [00:20<00:00, 38.91it/s]
TLS handshake: 100%|██████████| 803/803 [01:19<00:00, 10.16it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 14:08:03,707 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 14:08:04,356 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it

2025-07-10 14:08:04,363 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 14:08:11,170 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 14:08:17,947 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinf

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 4274.37url/s]
Async DNS: 100%|██████████| 802/802 [00:20<00:00, 38.77it/s]
TLS handshake: 100%|██████████| 802/802 [01:23<00:00,  9.55it/s]
  [loop.run_in_executor(wpool, _whois_dates, tldextract.extract(h).registered_domain)
2025-07-10 14:13:27,214 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [WinError 10061] No connection could be made because the target machine actively refused it
2025-07-10 14:13:32,735 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed

2025-07-10 14:13:34,931 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed
2025-07-10 14:13:35,887 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 11001] getaddrinfo failed2025-07-10 14:13:37,681 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out
2025-07-10 14:

Processed 1000 URLs, saved to C:\prateek\projects\linkload\backend\ml_models\phishing_detection\data\phishing_dataset_with_features.csv


Lexical features: 100%|██████████| 1000/1000 [00:00<00:00, 3106.45url/s]
