# Crawling

In [28]:
import urllib.parse
import time
import requests
from bs4 import BeautifulSoup
from collections import deque
from queue import PriorityQueue
from urllib.parse import urlparse, urljoin
import re
import math
from nltk.stem import PorterStemmer
import concurrent.futures

# URL Canonicalization

In [29]:
def canonicalize_url(url, base_url=None):
        parsed_url = urllib.parse.urlparse(url)
 
        # Rule 1: Convert scheme and host to lower case
        parsed_url = parsed_url._replace(scheme=parsed_url.scheme.lower(), netloc=parsed_url.netloc.lower())
 
        # Rule 2: Remove default ports for http and https
        if parsed_url.port in (80, 443) and parsed_url.scheme in ('http', 'https'):
            parsed_url = parsed_url._replace(netloc=parsed_url.netloc.split(':')[0])
 
        # Rule 3: Make relative URLs absolute (if a base URL is provided)
        if base_url:
            parsed_base_url = urllib.parse.urlparse(base_url)
            if not parsed_url.netloc:
                parsed_url = parsed_url._replace(netloc=parsed_base_url.netloc)
                if not parsed_url.scheme:
                    parsed_url = parsed_url._replace(scheme=parsed_base_url.scheme)
                parsed_url = parsed_url._replace(path=urllib.parse.urljoin(parsed_base_url.path, parsed_url.path))
 
        # Rule 4: Remove fragment
        parsed_url = parsed_url._replace(fragment='')
 
        # Rule 5: Remove duplicate slashes
        clean_path = parsed_url.path
        while '//' in clean_path:
            clean_path = clean_path.replace('//', '/')
        parsed_url = parsed_url._replace(path=clean_path)
        
        # Extra normalizing to avoid unnecessary tokens
        extensions_to_avoid = [".jpg", ".jpeg", ".gif", ".png", ".mp4", ".pdf", "none", ".php", ".ppt", ".doc"]
        for ext in extensions_to_avoid:
            if parsed_url.path.endswith(ext):
                return None
 
        return urllib.parse.urlunparse(parsed_url)

In [30]:
porter_stemmer = PorterStemmer()
keywords = ["cyclone", "storm", "hurricane", "weather", "meterological", "meteor", "ike", "tropical"]
stemmed_keywords = [porter_stemmer.stem(word) for word in keywords]
print(stemmed_keywords)
url_special_chars_regex = r'[-_./~%?&:]'
trusted_domains = {".gov", ".edu", ".org"}

['cyclon', 'storm', 'hurrican', 'weather', 'meterolog', 'meteor', 'ike', 'tropic']


# Fetch page content from web

In [31]:
def fetch_page_content(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            content_type = response.headers.get('content-type', '')
            if content_type.startswith('text/html') or '<html>' in response.text.lower():
                soup = BeautifulSoup(response.content, 'html.parser')
                lang_attr = soup.html.get('lang')
                if lang_attr is None or 'en' in lang_attr.lower():
                    return response.text
                else:
                    print(f"{url} Content is not in English")
                    return None
            else:
                print(f"{url} Content is not HTML")
                return None
        else:
            print("Response status code:", response.status_code)
    except Exception as e:
        print("Error fetching page:", e)
    return None

In [32]:
def parse_links(html_content, base_url):
    soup = BeautifulSoup(html_content, 'html.parser')
    links = set()
    for link in soup.find_all('a', href=True):
        absolute_url = urljoin(base_url, link['href'])
        absolute_url = canonicalize_url(absolute_url, base_url)
        if absolute_url:
            anchor_text = link.get_text().strip()
            links.add((absolute_url, anchor_text))
    return links

In [51]:
def extract_text(html_content):
    try:
        soup = BeautifulSoup(html_content, 'html.parser')
        if soup.title and soup.title.string:
            title = soup.title.string.strip() if soup.title else ""
        else:
            title = ""
        body_text = " ".join(paragraph.get_text() for paragraph in soup.find_all('p'))
        body_text = re.sub(r'\s+', ' ', body_text.strip())
        return title, body_text
    except Exception as e:
        print("An error occurred:", e)
        return "", ""

In [52]:
def write_to_corpus_file(url, title, text, file_number):
    docno = url
    file_name = f"hurricane_data_{file_number}.txt"
    try:
        with open(file_name, 'a', encoding='utf-8') as f:
            f.write("<DOC>\n")
            f.write(f"<DOCNO>{docno}</DOCNO>\n")
            f.write(f"<HEAD>{title}</HEAD>\n")
            f.write(f"<TEXT>{text}</TEXT>\n")
            f.write("</DOC>\n")
    except Exception as e:
        print("An error occurred:", e)

In [53]:
def process_document(url, page_content, file_number):
    html_content = page_content
    if html_content:
        title, text = extract_text(html_content)
        write_to_corpus_file(url, title, text, file_number)

# Calculate outlinks score

In [36]:
def scoring_outlinks(url, anchor_text, hurricane_data, wave_number):
    score = 0
    parsed_url = urlparse(url)
    domain = parsed_url.netloc.lower()
    path = parsed_url.path.lower()
    
    stemmed_path = [porter_stemmer.stem(word) for word in re.split(url_special_chars_regex, path.lower())]
    
    is_trusted_domain = any(domain.endswith(ext) for ext in trusted_domains)

    related_words_in_url = sum(keyword in stemmed_path for keyword in stemmed_keywords)

    stemmed_anchor_words = [porter_stemmer.stem(word) for word in anchor_text.lower().split()]

    related_words_in_anchor = sum(keyword in stemmed_anchor_words for keyword in stemmed_keywords)

    total_words_in_url = len(stemmed_path)
    total_words_in_anchor = len(stemmed_anchor_words)

    score += ((related_words_in_anchor + related_words_in_url) / (total_words_in_url + total_words_in_anchor)) * 75
    if 'data' in url or 'data' in anchor_text:
        score += 10
    if 'structure' in url or 'structure' in anchor_text:
        score += 10
    
    if url in hurricane_data:
        in_links_count = sum(hurricane_data[url]['in_links'].values())
        score += in_links_count * 5
        trusted_inlinks_count = sum(1 for inlink, count in hurricane_data[url]['in_links'].items() if urlparse(inlink).netloc.lower().endswith(tuple(trusted_domains)))
        score += trusted_inlinks_count * 3

    if is_trusted_domain:
        score += 30

    score += (15 - wave_number)
    return score

In [37]:
def fetch_robots_txt(url):
    try:
        parsed_url = urlparse(url)
        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
        robots_url = urljoin(base_url, "/robots.txt")
        response = requests.get(robots_url)
        if response.status_code == 200:
            return response.text
    except Exception as e:
        print("Error fetching robots.txt:", e)
    return None

In [38]:
def parse_robots_txt(robots_txt):
    disallowed_urls = set()
    lines = robots_txt.split("\n")
    user_agent = None
    for line in lines:
        if line.lower().startswith("user-agent:"):
            user_agent = line.split(":")[1].strip()
        elif line.lower().startswith("disallow:") and user_agent == "*":
            disallowed_urls.add(line.split(":")[1].strip())
    return disallowed_urls

In [39]:
seed_urls = [
    "http://www.nhc.noaa.gov/outreach/history/",
    "https://en.wikipedia.org/wiki/List_of_United_States_hurricanes",
    "http://en.wikipedia.org/wiki/Hurricane_Ike",
    "https://www.weather.gov/hgx/projects_ike08",
    "https://geology.com/hurricanes/largest-hurricane/"
]

In [43]:
initial_score = -10000
hurricane_data = {}
visited_links = set()
doc_ready_to_write = set()
wave_number = 0
doc_count = 0
write_count = 5000
file_number = 1
num_urls_to_fetch = 30000
urls_in_frontier = set()

frontier_queue = PriorityQueue()

for url in seed_urls:
    frontier_queue.put((initial_score, url, wave_number))

robots_cache = {}

while not frontier_queue.empty() and len(visited_links) < num_urls_to_fetch:
    score, url, wave_number = frontier_queue.get()
    wave_number += 1
    canonicalized_url = canonicalize_url(url)
    canonicalized_url_http = canonicalized_url.replace("https://", "http://")
    canonicalized_url_https = canonicalized_url.replace("http://", "https://")
    if canonicalized_url_http not in visited_links and canonicalized_url_https not in visited_links and canonicalized_url is not None:
        parsed_url = urlparse(canonicalized_url)
        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
        if base_url not in robots_cache:
            robots_txt = fetch_robots_txt(base_url)
            if robots_txt:
                disallowed_urls = parse_robots_txt(robots_txt)
                robots_cache[base_url] = disallowed_urls
            else:
                robots_cache[base_url] = set()
        if parsed_url.path not in robots_cache[base_url]:
            page_content = fetch_page_content(canonicalized_url)
            if page_content is not None:
                links = parse_links(page_content, base_url)
                if canonicalized_url in hurricane_data:
                    hurricane_data[canonicalized_url]['content'] = page_content
                    hurricane_data[canonicalized_url]['out_links'] = links
                else:
                    hurricane_data[canonicalized_url] = {
                        'content': page_content,
                        'in_links': {},
                        'out_links': links
                    }
                visited_links.add(canonicalized_url)
                doc_ready_to_write.add(canonicalized_url)
                for link, anchor_text in links:
                    if link not in hurricane_data:
                        hurricane_data[link] = {
                            'content': None,
                            'in_links': {canonicalized_url: 1},
                            'out_links': set()
                        }
                    else:
                        if canonicalized_url != link:
                            if canonicalized_url in hurricane_data[link]['in_links']:
                                hurricane_data[link]['in_links'][canonicalized_url] += 1
                            else:
                                hurricane_data[link]['in_links'][canonicalized_url] = 1
                    score = scoring_outlinks(link, anchor_text, hurricane_data, wave_number) * -1
                    if link not in urls_in_frontier:
                        frontier_queue.put((score, link, wave_number))
                        urls_in_frontier.add(link)
                doc_count += 1
                if doc_count % 100 == 0:
                    print(f"{doc_count} documents")
            time.sleep(1)
        else:
            print(f"Skipping {canonicalized_url} due to robots.txt rules")
        

print("Crawling completed")

100 documents
200 documents
https://fr.wikipedia.org/wiki/Cyclone_tropical Content is not in English
https://ia.wikipedia.org/wiki/Cyclon_tropic Content is not in English
https://om.wikipedia.org/wiki/Tropical_cyclone Content is not in English
https://et.wiktionary.org/wiki/tropical_cyclone Content is not in English
https://fr.wiktionary.org/wiki/cyclone_tropical Content is not in English
https://ta.wiktionary.org/wiki/tropical_cyclone Content is not in English
https://zh-min-nan.wiktionary.org/wiki/tropical_cyclone Content is not in English
https://zh.wiktionary.org/wiki/tropical_cyclone Content is not in English
300 documents
400 documents
https://et.wiktionary.org/wiki/tropical_storm Content is not in English
https://mg.wiktionary.org/wiki/cyclone_tropical Content is not in English
https://mg.wiktionary.org/wiki/tropical_storm Content is not in English
https://sv.wiktionary.org/wiki/cyclone_tropical Content is not in English
https://ta.wiktionary.org/wiki/tropical_cyclones Content i

1800 documents
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 403
1900 documents
2000 documents
2100 documents
Response status code: 404
Response status code: 404
2200 documents
https://fr.wikipedia.org/wiki/Cyclone_tropical_Alberto Content is not in English
https://fr.wikipedia.org/wiki/Cyclone_tropical_Claudette Content is not in English
https://fr.wikipedia.org/wiki/Cyclone_tropical_Talas Content is not in English
https://it.wikipedia.org/wiki/Weather_(Tycho) Content is not in English
https://mg.wiktionary.org/wiki/ex-trop

Response status code: 404
Response status code: 404
Response status code: 404
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2008052717.ABPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2010010419.WTAUT'
3300 documents
Response status code: 503
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Error fetching page: HTTPConnectionPool(host='www.srh.noaa.gov', port=80): Max retries exceeded with url: /data/warn_archive/TBW/PSH/0906_191640.txt (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C461605D90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
https://cs.w

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6kUm9MuAy?url=http://gwydir.demon.co.uk/advisories/WTPQ20-RJTD_201609131200.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Response status code: 404
Response status code: 404
Response status code: 404
3500 documents
http://tgftp.nws.noaa.gov/data/raw/wt/wtpn21.pgtw..txt Content is not HTML
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008071417.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008071512.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008071523.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008071618.AXNT20'
Error fetching 

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006060608.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006060815.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006060915.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006080602.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006080609.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006082115.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2007072909.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fs

  soup = BeautifulSoup(response.content, 'html.parser')


Error fetching page: 'NoneType' object has no attribute 'get'
Response status code: 404
Error fetching page: 'NoneType' object has no attribute 'get'
https://web.archive.org/web/20110203113519/http:/weather.noaa.gov/pub/data/raw/ac/acpn50.phfo.two.cp.txt Content is not HTML
https://web.archive.org/web/20160611133740/http:/weather.noaa.gov/pub/data/raw/ac/acus72.ktbw.psh.tbw.txt Content is not HTML
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2007090821.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/

Response status code: 404
https://web.archive.org/web/20100903142024/http:/weather.noaa.gov/pub/data/raw/wt/wtpq21.rjtd..txt Content is not HTML
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
https://archive.org/compress/hurricane-idalia-tropical-cyclone-report Content is not HTML
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /68a7xIzcC (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /68a81UPlf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /69OGCEkr6?url=http://gwydir.demon.co.uk/advisories/WTPQ20-RJTD_201207240000.htm (Caused by SSLE

https://web.archive.org/web/20081014120901/http:/weather.noaa.gov/pub/data/raw/wt/wtpn21.pgtw..txt Content is not HTML
https://opensky.ucar.edu/islandora/object/articles%3A22481/datastream/PDF Content is not HTML
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6EVJRUQAQ?url=http://www.nhc.noaa.gov/data/tcr/AL182012_Sandy.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/New-Delhi/2008091607.DEMS'
Response status code: 403
5600 documents
https://web.archive.org/web/20141219145509/http:/weather.noaa.gov/pub/data/raw/ax/axpq20.rjtd..txt Content is not HTML
https://web.archive.org/web/20231022052659/https:/tgftp.nws.noaa.gov/data/raw/wt/wtps01.nffn..txt Content is not HTML
Error fetching page: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded 

Error fetching page: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /wiki/List_of_storms_named_Kammuri (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C490D82A90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /wiki/List_of_storms_named_Leslie (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C490D83550>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /wiki/List_of_storms_named_Pablo (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C490D83790>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='en.wikipedia.org', po

https://web.archive.org/web/20180819083023/http:/tgftp.nws.noaa.gov/data/raw/wt/wtpn33.pgtw..txt Content is not HTML
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6cp9Luyzm?url=http://www.pacificdisaster.net/pdnadmin/data/original/JB_DM183_VUT_TC_Prema_1993_damage_assessment.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /69ynuddUA?url=http://weather.noaa.gov/pub/data/raw/ab/abpw10.pgtw..txt (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Response status code: 404
https://tgftp.nws.noaa.gov/data/raw/wt/wtpq52.rjtd..txt Content is not HTML
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6EXmV4imS?url=http://gwydir.demon.co.uk/advisor

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /68YlYDX3n (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /68YlYRoZq (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /68YlZGPKZ (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /68YlZqalT (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries 

https://web.archive.org/web/20080723125928/http:/weather.noaa.gov/pub/data/raw/wt/wtxs21.pgtw..txt Content is not HTML
https://web.archive.org/web/20100808121248/http:/weather.noaa.gov/pub/data/raw/ab/abio10.pgtw..txt Content is not HTML
https://web.archive.org/web/20111031184348/http:/weather.noaa.gov/pub/data/raw/ab/abio10.pgtw..txt Content is not HTML
https://web.archive.org/web/20090717090142/http:/weather.noaa.gov/pub/data/raw/wt/wtpq21.rjtd..txt Content is not HTML
https://web.archive.org/web/20101203124421/http:/weather.noaa.gov/pub/data/raw/wt/wtps32.pgtw..txt Content is not HTML
https://web.archive.org/web/20101203125621/http:/weather.noaa.gov/pub/data/raw/wt/wtxs33.pgtw..txt Content is not HTML
https://web.archive.org/web/20120527165805/http:/weather.noaa.gov/pub/data/raw/ax/axpq20.rjtd..txt Content is not HTML
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Fiji/2010031212.WTPS01'
Error fetching page: No connection adapt

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6KASbhe6U?url=http://www.nhc.noaa.gov/data/tcr/AL012013_Andrea.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6KAn6JkM7?url=http://www.nhc.noaa.gov/data/tcr/AL022012_Beryl.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6KAoLOmdf?url=http://www.nhc.noaa.gov/data/tcr/AL042012_Debby.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6KAzW6zY1?url=http://www.nhc.noaa.gov/

https://de.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://el.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
https://eo.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://es.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://es.wiktionary.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://et.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://fa.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://fi.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://fr.wikipedia.org/wiki/User:Tropical_Storm_Angela Content is not in English
https://ga.wikipedia.org/wiki/User:Tropical_

https://sw.wiktionary.org/wiki/hurricanes Content is not in English
https://ta.wiktionary.org/wiki/cyclone Content is not in English
https://ta.wiktionary.org/wiki/hurricane Content is not in English
https://ta.wiktionary.org/wiki/tropical Content is not in English
https://te.wiktionary.org/wiki/hurricane Content is not in English
https://te.wiktionary.org/wiki/tropical Content is not in English
https://th.wiktionary.org/wiki/tropical Content is not in English
https://ti.wiktionary.org/wiki/hurricane Content is not in English
https://tr.wiktionary.org/wiki/tropical Content is not in English
https://ur.wiktionary.org/wiki/hurricane Content is not in English
https://ur.wiktionary.org/wiki/tropical Content is not in English
7200 documents
Response status code: 404
Response status code: 403
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5PpVXuYMo?url=http://ams.allenpress.com/archive/1520-0477/52/6/pdf/i1520-0477-52-6-438.pdf

https://web.archive.org/web/20090717084700/http:/weather.noaa.gov/pub/data/raw/wt/wtxs31.pgtw..txt Content is not HTML
https://web.archive.org/web/20101203125303/http:/weather.noaa.gov/pub/data/raw/wt/wtxs32.pgtw..txt Content is not HTML
http://tgftp.nws.noaa.gov/data/raw/wt/wtpq33.rjtd..txt Content is not HTML
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2006/Jul/2006071916.ABPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2006/Jul/2006072004.ABPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008030612-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008030812-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008030912-FMEE'
Error fetchi

Response status code: 420
Response status code: 403
Response status code: 404
http://tgftp.nws.noaa.gov/data/raw/ab/abpw10.pgtw..txt Content is not HTML
https://web.archive.org/web/20190406032312/http:/meteo.bmkg.go.id/data/tc/IDJ21030.txt Content is not HTML
https://web.archive.org/web/20160914200642/http:/www.srh.noaa.gov/data/warn_archive/JAX/PSH/0907_001540.txt Content is not HTML
https://web.archive.org/web/20210611104301/https:/www.data.jma.go.jp/multi/cyclone/cyclone_detail.html?id=60&lang=en Content is not in English
http://www.nhc.noaa.gov/data/hurdat/hurdat2-nepac-1949-2016-041317.txt Content is not HTML
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2003/Dec/2003120305.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2003/Dec/2003120311.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropic

https://de.wiktionary.org/wiki/weather Content is not in English
https://el.wikipedia.org/wiki/Special:Contributions/Tropical_Storm_Angela Content is not in English
https://el.wiktionary.org/wiki/cyclones Content is not in English
https://el.wiktionary.org/wiki/meteor Content is not in English
https://el.wiktionary.org/wiki/storm Content is not in English
https://el.wiktionary.org/wiki/tropic Content is not in English
https://el.wiktionary.org/wiki/weather Content is not in English
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6KzsDV10L?url=http://gwydir.demon.co.uk/advisories/RSMC_201311090400.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6L2sQPgdx?url=http://gwydir.demon.co.uk/advisories/RSMC_201311110600.pdf (Caused by SSLError(CertificateError("

https://ku.wiktionary.org/wiki/storm Content is not in English
https://ku.wiktionary.org/wiki/tropics Content is not in English
https://ku.wiktionary.org/wiki/weather Content is not in English
https://ky.wikipedia.org/wiki/Special:Contributions/Tropical_Storm_Angela Content is not in English
https://lb.wikipedia.org/wiki/Meteor Content is not in English
https://lb.wikipedia.org/wiki/Special:Contributions/Tropical_Storm_Angela Content is not in English
https://li.wiktionary.org/wiki/storm Content is not in English
https://li.wiktionary.org/wiki/weather Content is not in English
https://lld.wikipedia.org/wiki/Cyclone Content is not in English
https://lld.wikipedia.org/wiki/Cyclones Content is not in English
https://lmo.wiktionary.org/wiki/meteor Content is not in English
https://lo.wiktionary.org/wiki/storm Content is not in English
https://lo.wiktionary.org/wiki/weather Content is not in English
https://lt.wiktionary.org/wiki/weather Content is not in English
https://lv.wikipedia.org/wi

https://ta.wiktionary.org/wiki/tropic Content is not in English
https://ta.wiktionary.org/wiki/tropicalize Content is not in English
https://ta.wiktionary.org/wiki/tropics Content is not in English
https://ta.wiktionary.org/wiki/weather Content is not in English
https://te.wikipedia.org/wiki/Special:Contributions/Tropical_Storm_Angela Content is not in English
https://te.wiktionary.org/wiki/meteor Content is not in English
https://te.wiktionary.org/wiki/storm Content is not in English
https://te.wiktionary.org/wiki/tropic Content is not in English
https://te.wiktionary.org/wiki/weather Content is not in English
https://tg.wikipedia.org/wiki/Special:Contributions/Tropical_Storm_Angela Content is not in English
https://th.wiktionary.org/wiki/storm Content is not in English
https://th.wiktionary.org/wiki/weather Content is not in English
https://tl.wikipedia.org/wiki/Special:Contributions/Tropical_Storm_Angela Content is not in English
https://tr.wikipedia.org/wiki/Meteor Content is not i

Response status code: 404
9400 documents
Response status code: 403
Error fetching page: HTTPSConnectionPool(host='www.hurricanescience.org', port=443): Max retries exceeded with url: /history/storms/1960s/betsy/ (Caused by SSLError(CertificateError("hostname 'www.hurricanescience.org' doesn't match either of '*.web-hosting.com', 'web-hosting.com'")))
Response status code: 404
9500 documents
Response status code: 404
9600 documents
9700 documents
9800 documents
https://ja.wikipedia.org/wiki/Template:Tropical_Cyclone_Point_Maxima Content is not in English
https://ms.wikipedia.org/wiki/Templat:Infobox_tropical_cyclone_small Content is not in English
Error fetching robots.txt: HTTPSConnectionPool(host='rammb2.cira.colostate.edu', port=443): Max retries exceeded with url: /robots.txt (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)')))
Error fetching page: HTTPSConnectionPool(hos

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6fXt7xrfI?url=http://gwydir.demon.co.uk/advisories/WTPS31-PGTW_201602232100.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6pEGxWXsO?url=http://gwydir.demon.co.uk/advisories/WTPS31-PGTW_201703242100.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008052623.AXNT20'
Response status code: 404
10000 documents
Response status code: 400
Response status code: 403
Error fetching page: 'NoneType' object has no attribute 'get'
https://web.archive.org/web/20131109072800/http:/weather.noaa.gov/pub/data/raw/ab/abpw10.pgtw..txt Content is not HTML
h

Response status code: 404
10300 documents
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6PbLw6ex9?url=http://gcportal.guycarp.com/portal/extranet/popup/pdf/GCPub/tropcyc_02.pdf;JSESSIONIDGCPORTALWCPORTALAPP=mfYhT1lNyzPGt0tp3JVyy1nHVQJw459j6j1khZzhBfL4zYLq8Zwh!-158796142?vid=1 (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPConnectionPool(host='weather.noaa.gov', port=80): Max retries exceeded with url: /international.html (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C502792250>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Response status code: 404
Error fetching page: HTTPSConnectionPool(host='www.hurricanescience.org', port=443): Max retries exceeded with url: /history/storms/1950s/audrey/ (Caused by SSLError(CertificateError("hostname 'www.hurricanescience.org' doesn't m

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608041512/https:/weather.com/storms/hurricane-central (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C500447E10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160607232834/https:/weather.com/storms/severe (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C504039350>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160607233438/https:/weather.com/storms/hurricane/news/most-bizarre-hurricane-tropical-storm-c

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2004/Sep/2004092518.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2007122708.WTAUT'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2007122807.WTAUT'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2007122901.WTAUT'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2007123100.WTAUT'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2008010200.WTAUT'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Australia/2008010207.WTAUT'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.f

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008010812-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008012718-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008012906-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008012918-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008013006-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008013012-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008020100-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/p

https://et.wiktionary.org/wiki/weathered Content is not in English
https://et.wiktionary.org/wiki/weathering Content is not in English
https://eu.wikipedia.org/wiki/Meteor Content is not in English
https://fi.wiktionary.org/wiki/meteors Content is not in English
https://fi.wiktionary.org/wiki/stormed Content is not in English
https://fi.wiktionary.org/wiki/storming Content is not in English
https://fi.wiktionary.org/wiki/storms Content is not in English
https://fi.wiktionary.org/wiki/weathered Content is not in English
https://fi.wiktionary.org/wiki/weathering Content is not in English
https://fi.wiktionary.org/wiki/weathers Content is not in English
https://fr.wiktionary.org/wiki/Meteor Content is not in English
https://fr.wiktionary.org/wiki/Storm Content is not in English
https://fr.wiktionary.org/wiki/Tropic Content is not in English
https://fr.wiktionary.org/wiki/Tropics Content is not in English
https://fr.wiktionary.org/wiki/meteore Content is not in English
https://fr.wiktionar

https://tr.wiktionary.org/wiki/storms Content is not in English
https://ur.wiktionary.org/wiki/weathered Content is not in English
11100 documents
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 503
Error fetching page: HTTPSConnectionPool(host='www.noaanews.noaa.gov', port=443): Max retries exceeded with url: /stories/s662.htm (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5051A83D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5OqWZXlRX?url=http://twister.sbs.ohio-state.edu/text/station/VIDP/FKIN20.VIDP (Caused by SSLError(CertificateError("hostname '

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5sLaEZ1Ys?url=https://listserv.illinois.edu/wa.cgi?A2=ind1001c&L=wx-tropl&T=0&X=558D791777B97BA4EA&P=33495 (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6Dq7OwXVR?url=http://gwydir.demon.co.uk/advisories/AXAU21-ABRF_201301210630.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6Dq7Pvizl?url=http://gwydir.demon.co.uk/advisories/AXAU21-ABRF_201301201911.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Ma

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20240321011707/https:/weather.com/storms/hurricane?pg=4 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C4F5618590>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20240321011707/https:/weather.com/storms/hurricane?pg=6 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C4FC647150>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /621ySgolJ?url=http://gwydir.demon.co.uk/advisories/WTPQ22-RJTD_201109271800.htm (Caused by

https://web.archive.org/web/20101203130554/http:/weather.noaa.gov/pub/data/raw/wt/wtio22.fmee..txt Content is not HTML
12100 documents
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6cYFskbGO?url=http://gwydir.demon.co.uk/advisories/ABIO10-PGTW_201510251100.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6cZaqqGbG?url=http://gwydir.demon.co.uk/advisories/ABIO10-PGTW_201510261100.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6ceIAMDTs?url=http://gwydir.demon.co.uk/advisories/WTIO31-PGTW_201510291500.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't ma

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008022118-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008022218-FMEE'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Seychelles/2008022412-FMEE'
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6OYLWNKPx?url=http://gwydir.demon.co.uk/advisories/WTPS21-PGTW_201404030330.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
12200 documents
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6DUMyMOmM?url=http://gwydir.demon.co.uk/advisories/AXAU01-APRF_201301070647.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20221204163455/https:/weather.com/storms/hurricane/news/tropical-wave-explainer-tropics-hurricanes (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C472C3F290>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20221115095915/https:/weather.com/storms/hurricane/news/most-bizarre-hurricane-tropical-storm-cyclone-locations-20130829 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C472C3DA90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Response status code: 502
Response status code: 404
Response status code: 404
Response status 

https://kmlexport.toolforge.org/?project=commons&article=Category%3AStack+data+structures Content is not HTML
https://ml.wikipedia.org/wiki/%E0%B4%AB%E0%B4%B2%E0%B4%95%E0%B4%82:Infobox_data_structure Content is not in English
https://nl.wikipedia.org/wiki/Storm_(wind) Content is not in English
https://nn.wikipedia.org/wiki/Storm Content is not in English
https://no.wikipedia.org/wiki/Storm Content is not in English
https://oc.wikipedia.org/wiki/Clima_tropical Content is not in English
Response status code: 403
Skipping https://petscan.wmflabs.org/?language=commons&categories=Heap_data_structures&project=wikimedia&ns%5B6%5D=1 due to robots.txt rules
Skipping https://petscan.wmflabs.org/?language=commons&categories=Queue_data_structure&project=wikimedia&ns%5B6%5D=1 due to robots.txt rules
Skipping https://petscan.wmflabs.org/?language=commons&categories=Stack_data_structures&project=wikimedia&ns%5B6%5D=1 due to robots.txt rules
Skipping https://petscan.wmflabs.org/?language=commons&proje

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5iwzmgVuq?url=http://thegreatzo.dyndns.org/hurimg/hurricanes.html (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6YhNFgRdM?url=http://www1.ncdc.noaa.gov/pub/orders/IPS/IPS-7A3F9805-30CD-4CCF-9E02-9DF2B051688C.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
https://zh-yue.wikipedia.org/wiki/Template:Infobox_data_structure Content is not in English
https://zh.wikipedia.org/wiki/Template:Infobox_data_structure Content is not in English
https://zh.wiktionary.org/wiki/data_structure Content is not in English
Error fetching pag

https://ru.wiktionary.org/wiki/Weathers Content is not in English
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006071709.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2006071621.ABNT20'
https://sv.wiktionary.org/wiki/Meteore Content is not in English
https://sv.wiktionary.org/wiki/Meteors Content is not in English
https://sv.wiktionary.org/wiki/weatherized Content is not in English
https://sv.wiktionary.org/wiki/weatherizes Content is not in English
https://sv.wiktionary.org/wiki/weatherizing Content is not in English
https://ta.wiktionary.org/wiki/Meteors Content is not in English
Response status code: 403
Response status code: 503
Response status code: 503
Response status code: 503
Response status code: 404
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6CSTQ7E6p?url=http://gwyd

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6grIg3c2y?url=http://gwydir.demon.co.uk/advisories/WTIO30-FMEE_201604161309.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6gtUkkgSz?url=http://gwydir.demon.co.uk/advisories/WTIO30-FMEE_201604181834.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6gvxnGJog?url=http://gwydir.demon.co.uk/advisories/WTIO30-FMEE_201604210643.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url

Response status code: 404
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008062317.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2008062323.AXNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Pac-Dis/2008070203.AXPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Pac-Dis/2008070216.AXPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Pac-Dis/2008070221.AXPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Pac-Dis/2008070303.AXPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Pac-Dis/2008070316.AXPZ20'
Error fetching page: No connection adapters were found for 'ftp:

https://web.archive.org/web/20130516073440/http:/weather.noaa.gov/pub/data/raw/wt/wtio21.pgtw..txt Content is not HTML
https://web.archive.org/web/20211010080823/https:/tgftp.nws.noaa.gov/data/raw/wt/wtpq50.rjtd..txt Content is not HTML
Response status code: 403
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2009071005.ABPZ20'
https://tgftp.nws.noaa.gov/data/raw/wt/wtpq32.rjtd..txt Content is not HTML
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007111221-WTPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2008051521-WTPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2008081306-ABPW.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/New-Delhi/2004/Dec/20

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20210904132407/https:/climate.weather.gc.ca/historical_data/search_historic_data_stations_e.html?searchType=stnProv&timeframe=1&lstProvince=NL&optLimit=yearRange&StartYear=1840&EndYear=2021&Year=2021&Month=9&Day=3&selRowPerPage=100&txtCentralLatMin=0&txtCentralLatSec=0&txtCentralLongMin=0&txtCentralLongSec=0&startRow=101 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C55746C410>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
https://web.archive.org/web/20080708212144/http:/weather.noaa.gov/pub/data/raw/wt/wtio30.fmee..txt Content is not HTML
https://web.archive.org/web/20081011233239/http:/weather.noaa.gov/pub/data/raw/wd/wdpn31.pgtw..txt Content is not HTML
https://web.archive.org/web/20081014122930/http:/weather.noaa.gov/pub/data/raw/wt/wtin20.dems..t

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6DuEdjrrj?url=http://gwydir.demon.co.uk/advisories/IDW24100_201301230956.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2009090412-ABIO.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2009090500-WTIO.PGTW'
15200 documents
15300 documents
15400 documents
Response status code: 404
https://fr.vikidia.org/wiki/Cyclone Content is not in English
https://fr.vikidia.org/wiki/Cyclone_tropical Content is not in English
Skipping https://wikidata-externalid-url.toolforge.org/?p=4342&url_prefix=https://snl.no/&id=storm due to robots.txt rules
Skipping https://wikidata-externalid-url.toolforge.org/?p=8313&url_prefix=https://denstoredanske.lex.dk/&i

https://es.wikipedia.org/wiki/Selva_tropical Content is not in English
https://es.wikipedia.org/wiki/The_Hurricane Content is not in English
https://et.wikipedia.org/wiki/Carolina_Hurricanes Content is not in English
https://et.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://eu.wikipedia.org/wiki/Carolina_Hurricanes Content is not in English
https://eu.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://fi.wikipedia.org/wiki/Carolina_Hurricanes Content is not in English
https://fi.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://fi.wikipedia.org/wiki/Hurricane_(elokuva) Content is not in English
https://fi.wikipedia.org/wiki/Hurricane_(yhtye) Content is not in English
https://fi.wikipedia.org/wiki/Lethbridge_Hurricanes Content is not in English
https://fi.wikipedia.org/wiki/Meteor_(satelliitti) Content is not in English
https://fr.wikipedia.org/wiki/Cat%C3%A9gorie:Cyclone_tropical_par_pays Content is not in English
https://fr

Error fetching page: HTTPConnectionPool(host='www.hpc.ncep.noaa.gov', port=80): Max retries exceeded with url: /tropical/rain/2003.html (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C555FBA350>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
https://sk.wikipedia.org/wiki/Carolina_Hurricanes Content is not in English
https://sk.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://sl.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://sl.wikipedia.org/wiki/Waterloo_Hurricanes Content is not in English
https://sm.wiktionary.org/wiki/hurricane Content is not in English
https://sq.wikipedia.org/wiki/Hurricane_(grup) Content is not in English
https://sv.wikipedia.org/wiki/Carolina_Hurricanes Content is not in English
https://sv.wikipedia.org/wiki/Dragon_Storm Content is not in English
https:

16400 documents
Response status code: 420
Response status code: 420
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
https://www.nhc.noaa.g

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6ElT7nfSw?url=http://gwydir.demon.co.uk/advisories/AXAU01-APRF_201302271314.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6ElT7t56Z?url=http://gwydir.demon.co.uk/advisories/AXAU01-APRF_201302270702.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6ElT7zJiG?url=http://gwydir.demon.co.uk/advisories/AXAU01-APRF_201302270113.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20180710102518/http:/tgftp.nws.noaa.gov/data/raw/ab/abpw10.pgtw..txt (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C566242690>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160609070330/https:/weather.com/storms/hurricane/news/most-bizarre-hurricane-tropical-storm-cyclone-locations-20130829 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C563FCB590>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/200805131846

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /60ZNvP291?url=http://gwydir.demon.co.uk/advisories/WTPQ20-RJTD_201107301200.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /60f2c55BV?url=http://gwydir.demon.co.uk/advisories/WTPQ20-RJTD_201108030600.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /60o3ULASC?url=http://gwydir.demon.co.uk/advisories/WTPQ20-RJTD_201108081800.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007081603-WDPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007081615-WDPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007081803-WDPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007083003-WDPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007090703-WTPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007091422-WTPN.PGTW'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2007091509-WTPN.PGTW'
Error fetching page: No connection adapters were found 

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5bom00daT?url=http://www.met.gov.fj/documents/Media_release_for_2008-09_TC_Season1224732656.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5rcbRZVTX?url=http://www4.ncdc.noaa.gov/cgi-win/wwcgi.dll?wwevent~ShowEvent~563688 (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5reET4HoN?url=http://www.met.gov.fj/documents/TC_Seasonal_Summary_99-001188357240.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max 

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608120953/https:/weather.com/safety/hurricane/news/tropical-storm-colin-impacts-news (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C548593250>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608090859/http:/coast.noaa.gov/hurricanes/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5564E4650>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608044417/https:/weather.com/storms/hurricane/news/hurrican

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230818205629/https:/abc7.com/tag/storm/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C57FDD5790>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230818205629/https:/abc7.com/topic/weather/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C578FF4110>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819023638/https:/www.foxweather.com/category/hurricanes (Caused by NewConnectionError('<urllib3.connection.HT

http://www.nhc.noaa.gov/archive/text/TWDAT/2005/TWDAT.200507040006 Content is not HTML
http://www.nhc.noaa.gov/archive/text/TWDAT/2005/TWDAT.200507041731 Content is not HTML
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Atl-Dis/2004/Sep/2004090200.AXNT20'
Response status code: 404
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160209111822/https:/weather.com/storms/winter-central (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C55719EE90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160316070553/https:/weather.com/storms/winter-central (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C55719CD5

https://uk.wiktionary.org/wiki/ike Content is not in English
https://zh.wiktionary.org/wiki/Ike Content is not in English
https://zh.wiktionary.org/wiki/ike Content is not in English
https://nl.wikipedia.org/wiki/Storm_Ciara Content is not in English
18200 documents
Response status code: 404
Error fetching robots.txt: HTTPConnectionPool(host='www.ccsr.columbia.edu', port=80): Max retries exceeded with url: /robots.txt (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C56855D010>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='www.ccsr.columbia.edu', port=80): Max retries exceeded with url: /information/hurricanes/ (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C56855F950>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='www.erh.noaa.gov', port=80): Max retries exceeded with

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160605175240/https:/weather.com/storms/severe (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5788604D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160605231954/https:/weather.com/storms/severe (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C578862210>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608120300/https:/weather.com/safety/hurricane (Caused by NewConnectionError('<urllib3.connection.HTTP

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/GuamStuff/2008041713-ABPW.PGTW'
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160930163244/https:/weather.com/storms/hurricane (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C580756650>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160930163244/https:/weather.com/storms/hurricane/news/most-bizarre-hurricane-tropical-storm-cyclone-locations-20130829 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C580755650>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error f

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20060819011833/http:/weather.unisys.com/hurricane/atlantic/1933/index.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C58D207C90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20090214000339/http:/www.bom.gov.au/weather/cyclone/about/about-tropical-cyclones.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C58D17FA10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20140716085913/http:/www.nhc.noaa.gov/2

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20211104143701/https:/climate.weather.gc.ca/historical_data/search_historic_data_stations_e.html?searchType=stnProv&timeframe=1&lstProvince=NL&optLimit=yearRange&StartYear=1840&EndYear=2021&Year=2021&Month=9&Day=3&selRowPerPage=100&txtCentralLatMin=0&txtCentralLatSec=0&txtCentralLongMin=0&txtCentralLongSec=0&startRow=201 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C58CEE48D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20211104143701/https:/climate.weather.gc.ca/historical_data/search_historic_data_stations_e.html?searchType=stnProv&timeframe=1&lstProvince=NL&optLimit=yearRange&StartYear=1840&EndYear=2021&Year=2021&Month=9&Day=3&selRowPe

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20210212020240/https:/www.orlandosentinel.com/weather/hurricane/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C588838D10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20221001123738/https:/apnews.com/article/storms-mexico-california-hurricanes-tropical-b9a4c5378bbbacf7ffd4fc32ceb542bc (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5888B6B10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Response status code: 403
https://web.archive.org/web/20060616151919/http:/australiasevereweather.com/cyclones/2004/summ0401.txt C

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230526141941/https:/www.metoffice.gov.uk/research/weather/tropical-cyclones/seasonal/northatlantic2021may (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5773C8150>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230526141941/https:/www.metoffice.gov.uk/research/weather/tropical-cyclones/seasonal/northatlantic2022may (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5774A6E10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded w

https://ca.wiktionary.org/wiki/-ike Content is not in English
https://ca.wiktionary.org/wiki/tropical_sprue Content is not in English
19500 documents
https://ceb.wikipedia.org/wiki/Hurricane_Fault Content is not in English
https://ceb.wikipedia.org/wiki/Ike_Barinholtz Content is not in English
https://ceb.wikipedia.org/wiki/Storm_Mountain Content is not in English
https://co.wikipedia.org/wiki/Ike_Barinholtz Content is not in English
19600 documents
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404


https://es.wikipedia.org/wiki/Jacksonville_Cyclones Content is not in English
https://es.wikipedia.org/wiki/Lister_Storm Content is not in English
https://es.wikipedia.org/wiki/Melbourne_Storm Content is not in English
https://es.wikipedia.org/wiki/Miami_Tropics Content is not in English
https://es.wikipedia.org/wiki/Seattle_Storm Content is not in English
https://es.wikipedia.org/wiki/Weather_Systems Content is not in English
https://es.wikipedia.org/wiki/Weathers_(banda) Content is not in English
https://et.wikipedia.org/wiki/BlackBerry_Storm Content is not in English
https://et.wikipedia.org/wiki/Ike_Barinholtz Content is not in English
https://et.wiktionary.org/wiki/cyclone_cellar Content is not in English
https://et.wiktionary.org/wiki/hurricane_lamp Content is not in English
https://et.wiktionary.org/wiki/hurricane_lantern Content is not in English
https://et.wiktionary.org/wiki/tropical_year Content is not in English
https://eu.wikipedia.org/wiki/Ike_Turner Content is not in Eng

https://ia.wikipedia.org/wiki/Ike_Turner Content is not in English
https://id.wikipedia.org/wiki/Meteor Content is not in English
https://id.wiktionary.org/wiki/meteor Content is not in English
https://id.wiktionary.org/wiki/storm Content is not in English
https://id.wiktionary.org/wiki/weather Content is not in English
https://ig.wikipedia.org/wiki/Chika_Ike Content is not in English
https://ig.wikipedia.org/wiki/Ike_Ekweremadu Content is not in English
https://ig.wikipedia.org/wiki/Tropical_agriculture Content is not in English
https://ig.wikipedia.org/wiki/Tropical_night Content is not in English
https://ig.wikipedia.org/wiki/Weather_drone Content is not in English
Skipping https://intuition.toolforge.org/?returnto=%2Findex.php&returntoquery=returnto%3D%252Findex.php%26returntoquery%3Dlang%253Dcommons%2526name%253DArray%252Bdata%252Bstructure%2526namespace%253D14 due to robots.txt rules
https://io.wikipedia.org/wiki/Ike_Turner Content is not in English
https://it.wikipedia.org/wiki/

https://pl.wikipedia.org/wiki/AIL_Storm Content is not in English
https://pl.wikipedia.org/wiki/Anaheim_Storm Content is not in English
https://pl.wikipedia.org/wiki/Cincinnati_Cyclones Content is not in English
https://pl.wikipedia.org/wiki/Geo_Storm Content is not in English
https://pl.wikipedia.org/wiki/Gloster_Meteor Content is not in English
https://pl.wikipedia.org/wiki/Guelph_Storm Content is not in English
https://pl.wikipedia.org/wiki/Ike_Anigbogu Content is not in English
https://pl.wikipedia.org/wiki/Ike_Diogu Content is not in English
https://pl.wikipedia.org/wiki/Ike_Opara Content is not in English
https://pl.wikipedia.org/wiki/Ike_Quebec Content is not in English
https://pl.wikipedia.org/wiki/Ike_Shorunmu Content is not in English
https://pl.wikipedia.org/wiki/Ike_Skelton Content is not in English
https://pl.wikipedia.org/wiki/Ike_Turner Content is not in English
https://pl.wikipedia.org/wiki/Ike_Williams Content is not in English
https://pl.wikipedia.org/wiki/Mercury_Cyc

Response status code: 404
Error fetching robots.txt: Invalid URL '/robots.txt': No scheme supplied. Perhaps you meant https:///robots.txt?
Error fetching page: No connection adapters were found for "javascript://web.archive.org/videoLoader('999571')"
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20081011043611/http:/www.newschannel5.tv/Weather/Hurricane/992736 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C59774E5D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20081011043611/http:/www.newschannel5.tv/Weather/Hurricane/992251 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C58D607F10>: Failed to establish a new connection: [WinError 10061] No connec

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160316012111/https:/weather.com/storms/alerts (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C590E66ED0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160316012111/https:/weather.com/tv/shows/responding-by-storm (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C59E498DD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160316070553/https:/weather.com/storms/alerts (Caused by NewConnectionError('<urllib3.con

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220204205529/https:/www.ifrc.org/cyclones (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5A1B59F50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20221003082352/https:/apnews.com/article/hurricanes-north-carolina-storms-police-weather-61b791f8c9ba907335c528d6bf1c2a7f (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5A1B58110>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20221004135422/https:/apnews.com/hub

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5wPhKiduA?url=http://gwydir.demon.co.uk/advisories/AXAU01_201102110342.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5wao0Cst6?url=https://listserv.illinois.edu/wa.cgi?A2=ind1102c&L=wx-tropl&T=0&X=558D791777B97BA4EA&P=63179 (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6DFFpWIEp?url=http://gwydir.demon.co.uk/advisories/ABIO10-PGTW_201212280130.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max ret

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20190322214331/https:/www.wunderground.com/cat6/Africas-Hurricane-Katrina-Tropical-Cyclone-Idai-Causes-Extreme-Catastrophe (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5AE95C7D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20190804083147/https:/www.wunderground.com/cat6/Africas-Hurricane-Katrina-Tropical-Cyclone-Idai-Causes-Extreme-Catastrophe (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5AE863450>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', por

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608090908/https:/weather.com/storms/hurricane-central (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5AB714190>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608090908/https:/weather.com/safety/hurricane (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5AB6B9490>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160608090908/https:/weather.com/storms/severe (Caused by NewConnectionError('<urllib3.conn

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6LL6XwBtw?url=http://gwydir.demon.co.uk/advisories/AXAU01-APRF_201311221318.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6LMiZPpYR?url=http://gwydir.demon.co.uk/advisories/WTXS31-PGTW_201311240900.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6LMiZthUO?url=http://gwydir.demon.co.uk/advisories/WTXS31-PGTW_201311230900.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220603182257/https:/weather.com/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B1F33350>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230220171643/https:/weather.com/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B1EBE910>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230818212307/https:/abc7.com/tag/hurricane/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6WThGRRcS?url=http://gwydir.demon.co.uk/advisories/AXAU01-ADRM_201502191231.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6pZgb5hAs?url=http://gwydir.demon.co.uk/advisories/WTPS31-PGTW_201704080300.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6pcbpiqFy?url=http://gwydir.demon.co.uk/advisories/WTPS31-PGTW_201704100300.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2001/Aug/2001083102.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2001/Aug/2001083109.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2001/Sep/2001090602.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2001/Sep/2001090609.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2005/Sep/2005091322.ABPZ20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2008093005.ABPZ20'
22500 documents
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-P/2009101505.ABPZ20'
Error fetching 

https://sco.wikipedia.org/wiki/Template:Storm_colour Content is not in English
https://sh.wikipedia.org/wiki/Hurricane Content is not in English
Error fetching page: HTTPConnectionPool(host='www.hpc.ncep.noaa.gov', port=80): Max retries exceeded with url: /tropical/rain/elena1985.html (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C5D5985B90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
https://tum.wikipedia.org/wiki/Template:Storm_colour Content is not in English
https://uk.wikipedia.org/wiki/Monthly_Weather_Review Content is not in English
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080917145118/http:/search.cnn.com/search.jsp?query=hurricane%20ike&type=news&sortBy=date&intl=false&iref=mpvideossearch (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B1CF3690>: Failed to establish a new connection: [WinError 10061

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819023638/https:/www.foxweather.com/local-weather (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5A7C500D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819023638/https:/www.foxweather.com/weather-news (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5A9C09150>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819145610/https:/www.foxweather.com/extreme-weather (Caused by NewConnectionError('<urll

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20110929131058/http:/www.weather.bm/data/2002-10.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5AC189510>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20120205194157/http:/www.weather.bm/data/2003-09.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5ABB45550>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20120205194205/http:/www.weather.bm/data/2001-08.html (Caused by NewConnectionError('<urllib3.

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200709175904/https:/weather.com/data-rights (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B1D6B990>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201105071821/https:/www.foxnews.com/us/hurricane-season-names-2020-atlantic-list-tropical-weather-arthur-wilfred-storms (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5AC6F1710>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20221226050506/http:/www.bom.gov.a

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20190621232701/https:/www.metoffice.gov.uk/research/weather/tropical-cyclones/seasonal/northatlantic2017/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5DC61BFD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20190925052722/https:/weather.com/news/news/2019-01-03-thailand-tropical-storm-pabuk (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5DC653850>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200101004

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20161002060524/https:/weather.com/storms/hurricane/news/deadliest-hurricanes-atlantic-20130827 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B7F721D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20161002060524/https:/weather.com/storms/hurricane/news/haiti-hurricane-history-2016 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B9A6EDD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200829195418/https:/

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20090220144457/http:/www.bom.gov.au/weather/cyclone/tc-outlooks.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B93DDA50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20090220144457/http:/www.bom.gov.au/weather/nsw/cyclone/tcnsw.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B93DC550>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20090301011102/http:/www.bom.gov.au/weather/cyclone/tc-climatolo

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201102231421/https:/www.foxnews.com/category/world/disasters/hurricanes-typhoons (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B904D850>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201102232231/https:/www.foxnews.com/category/world/disasters/hurricanes-typhoons (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5B95AA1D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201223015225/https:/www.orlandosen

Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Tokyo/2008091300.RJTD'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Tokyo/2009062118.RJTD'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Tokyo/2009080300.RJTD'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Tokyo/2009081306.RJTD'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Tokyo/2009081400.RJTD'
Response status code: 404
Response status code: 404
Response status code: 404
Error fetching page: HTTPSConnectionPool(host='archive.usgs.gov', port=443): Max retries exceeded with url: /archive/sites/coastal.er.usgs.gov/hurricanes/ike/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5E1B89D50>: Failed to establish a new connectio

https://de.wikipedia.org/wiki/Frederik_Storm Content is not in English
https://de.wikipedia.org/wiki/Gale_Storm Content is not in English
https://de.wikipedia.org/wiki/Graeme_Storm Content is not in English
https://de.wikipedia.org/wiki/Gustav_Storm Content is not in English
https://de.wikipedia.org/wiki/HMS_Meteor Content is not in English
https://de.wikipedia.org/wiki/James_Storm Content is not in English
https://de.wikipedia.org/wiki/Johan_Storm Content is not in English
https://de.wikipedia.org/wiki/John_Weathers Content is not in English
https://de.wikipedia.org/wiki/Lance_Storm Content is not in English
https://de.wikipedia.org/wiki/Lucien_Storme Content is not in English
https://de.wikipedia.org/wiki/MBDA_Meteor Content is not in English
https://de.wikipedia.org/wiki/Mandyleigh_Storm Content is not in English
https://de.wikipedia.org/wiki/Matthias_Storme Content is not in English
https://de.wikipedia.org/wiki/Mercury_Meteor Content is not in English
https://de.wikipedia.org/wiki

https://es.wikipedia.org/wiki/Tormenta_tropical_Barry_(1995) Content is not in English
https://es.wikipedia.org/wiki/Tropical_Airplay Content is not in English
https://es.wikipedia.org/wiki/Tropical_Albums Content is not in English
https://et.wikipedia.org/wiki/Carl_Weathers Content is not in English
https://et.wikipedia.org/wiki/Frederik_Storm Content is not in English
https://et.wikipedia.org/wiki/Storm_Thorgerson Content is not in English
https://et.wiktionary.org/wiki/bad_weather Content is not in English
https://et.wiktionary.org/wiki/dirty_weather Content is not in English
https://et.wiktionary.org/wiki/dust_storm Content is not in English
https://et.wiktionary.org/wiki/fair-weather Content is not in English
https://et.wiktionary.org/wiki/good_weather Content is not in English
https://et.wiktionary.org/wiki/ice_storm Content is not in English
https://et.wiktionary.org/wiki/meteor_shower Content is not in English
https://et.wiktionary.org/wiki/storm_cellar Content is not in Englis

https://fr.wikipedia.org/wiki/Tempest_Storm Content is not in English
https://fr.wikipedia.org/wiki/Texas_Cyclone Content is not in English
https://fr.wikipedia.org/wiki/The_Meteors Content is not in English
https://fr.wikipedia.org/wiki/Theodor_Storm Content is not in English
https://fr.wikipedia.org/wiki/Tim_Storm Content is not in English
https://fr.wikipedia.org/wiki/Tim_Storms Content is not in English
https://fr.wikipedia.org/wiki/Toni_Storm Content is not in English
https://fr.wikipedia.org/wiki/Tropical_house Content is not in English
https://fr.wikipedia.org/wiki/Weather_Girl Content is not in English
https://fr.wiktionary.org/wiki/broadcast_storm Content is not in English
https://fr.wiktionary.org/wiki/hail_storm Content is not in English
https://fr.wiktionary.org/wiki/meteor_shower Content is not in English
https://fr.wiktionary.org/wiki/perfect_storm Content is not in English
https://fr.wiktionary.org/wiki/space_weather Content is not in English
https://fr.wiktionary.org/wi

https://ksh.wikipedia.org/wiki/Theodor_Storm Content is not in English
https://ku.wikipedia.org/wiki/Gale_Storm Content is not in English
https://ku.wiktionary.org/wiki/-tropic Content is not in English
https://ku.wiktionary.org/wiki/dust_storm Content is not in English
https://ku.wiktionary.org/wiki/weather_forecast Content is not in English
https://ku.wiktionary.org/wiki/weather_station Content is not in English
https://la.wikipedia.org/wiki/Theodorus_Storm Content is not in English
https://lb.wikipedia.org/wiki/Theodor_Storm Content is not in English
24700 documents
https://lmo.wikipedia.org/wiki/Storm_Worm Content is not in English
https://lo.wiktionary.org/wiki/weather_forecasting Content is not in English
https://lt.wiktionary.org/wiki/storm-petrel Content is not in English
https://lt.wiktionary.org/wiki/storm_petrel Content is not in English
https://lt.wiktionary.org/wiki/weather_forecast Content is not in English
https://lv.wikipedia.org/wiki/Frederiks_Storms Content is not in 

https://pl.wikipedia.org/wiki/Storm_Reid Content is not in English
https://pl.wikipedia.org/wiki/Storm_Thorgerson Content is not in English
https://pl.wikipedia.org/wiki/Storm_Uru Content is not in English
https://pl.wikipedia.org/wiki/Storm_Worm Content is not in English
https://pl.wikipedia.org/wiki/The_Meteors Content is not in English
https://pl.wikipedia.org/wiki/Theodor_Storm Content is not in English
https://pl.wikipedia.org/wiki/Toni_Storm Content is not in English
https://pl.wikipedia.org/wiki/USS_Meteor Content is not in English
https://pl.wiktionary.org/wiki/Alibi_Ike Content is not in English
https://pl.wiktionary.org/wiki/dust_storm Content is not in English
https://pl.wiktionary.org/wiki/electrical_storm Content is not in English
https://pl.wiktionary.org/wiki/ice_storm Content is not in English
https://pl.wiktionary.org/wiki/meteor_shower Content is not in English
https://pl.wiktionary.org/wiki/perfect_storm Content is not in English
https://pl.wiktionary.org/wiki/sand_s

https://ta.wiktionary.org/wiki/brain_storm Content is not in English
https://ta.wiktionary.org/wiki/dust_storm Content is not in English
https://ta.wiktionary.org/wiki/electrical_storm Content is not in English
https://ta.wiktionary.org/wiki/hail_storm Content is not in English
https://ta.wiktionary.org/wiki/ice_storm Content is not in English
https://ta.wiktionary.org/wiki/meteor_bumper Content is not in English
https://ta.wiktionary.org/wiki/meteor_shower Content is not in English
https://ta.wiktionary.org/wiki/sand_storm Content is not in English
https://ta.wiktionary.org/wiki/silver_storm Content is not in English
https://ta.wiktionary.org/wiki/solar_storm Content is not in English
https://ta.wiktionary.org/wiki/storm-bird Content is not in English
https://ta.wiktionary.org/wiki/storm-petrel Content is not in English
https://ta.wiktionary.org/wiki/storm-trooper Content is not in English
https://ta.wiktionary.org/wiki/storm_beach Content is not in English
https://ta.wiktionary.org/w

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20140714222617/http:/www.pagasa.dost.gov.ph/tropical-cyclone/weather-bulletin-update/139-tropical-cyclone/glenda-2014-bulletin/1044-1 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5A9BF0BD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20150909155810/http:/www.weather.com/storms/hurricane/video/why-has-hurricane-season-been-so-quiet (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C597863D50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max r

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200815053406/https:/www.orlandosentinel.com/weather/hurricane/hurricane-preparedness/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C597862D10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200815053406/https:/www.orlandosentinel.com/weather/hurricane/os-ne-tropical-storm-thursday-update-20200806-dhgapxvk5rho7bdarnct7rp6x4-story.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5A9BF18D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Ma

https://www.ready.gov/zh-hans/hurricanes Content is not in English
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5foKYoqA1?url=http://www.radioaustralianews.net.au/stories/200904/2535347.htm?desktop (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5uTgs9Nz8?url=http://gwydir.demon.co.uk/advisories/TC_OUTLOOK_201011241100.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5w8ySiBjv?url=http://gwydir.demon.co.uk/advisories/MEDIA_201101302200.pdf (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(ho

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6MGxm3hYj?url=http://gwydir.demon.co.uk/advisories/WTIO22-FMEE_201312301839.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6MGxq46Gj?url=http://gwydir.demon.co.uk/advisories/WTIO30-FMEE_201312301857.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6N2vq816h?url=http://gwydir.demon.co.uk/advisories/TCUPDATE_201401311100.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200608170706/https:/www.severe-weather.eu/tropical-weather/india-cyclone-amphan-bengal-mk/amp/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5FA5CDBD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201106154659/https:/www.weathernationtv.com/news/hurricane-eta/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5FA52CF90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230117161616/https:/abcnews.go.com/In

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201222164839/https:/weather.com/storms/hurricane?pg=1 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C603087FD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201223070436/https:/weather.com/storms/hurricane?pg=3 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C61A30B550>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220922041951/https:/bagong.pagasa.dost.gov.ph/tropical-cyclone/severe-weather-bulletin (

Error fetching page: HTTPConnectionPool(host='www.srh.noaa.gov', port=80): Max retries exceeded with url: /abq/?n=climonhigh2008sepwxsum (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C60222E550>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='www.srh.noaa.gov', port=80): Max retries exceeded with url: /abq/?n=climonhigh2012sepwxsum (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C60222E290>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='www.srh.noaa.gov', port=80): Max retries exceeded with url: /bmx/?n=event_cindy2005 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C60222EF90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='www.srh.noaa.gov', port=80): Max retr

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080723222148/http:/www.bom.gov.au/weather/cyclone/about/cyclones-eastern.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C601EB8A10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080723222148/http:/www.bom.gov.au/weather/cyclone/about/cyclones-northern.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5F829BCD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080723222148/http:/www.bom.gov.au/wea

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080723211023/http:/www.bom.gov.au/weather/wa/cyclone/about/climatology.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C601EC3E90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080723211023/http:/www.bom.gov.au/weather/wa/cyclone/about/towns.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5F827DE10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080725092232/http:/www.bom.gov.au/weather/cyclon

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20170908100401/http:/www.miamiherald.com/news/weather/hurricane/article171822327.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C60149D490>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20170911081203/http:/www.miamiherald.com/news/weather/hurricane/article172486886.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C601497850>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20170912072220/http:/www.miami

Response status code: 404
Error fetching page: HTTPSConnectionPool(host='www.srh.weather.gov', port=443): Max retries exceeded with url: /hgx/hurricanes/1980s.htm (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5EC95F6D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
https://www.weather.gov/images/hgx/hurricanes/Cindy17/besttrack.PNG Content is not HTML
Response status code: 404
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6FC0CcvS7?url=http://www.ncdc.noaa.gov/stormevents/eventdetails.jsp?id=5534695 (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6FGqD2Qb9?url=http://www1.ncdc.noaa.gov/pub/orders/IPS-FEDAE4E6-B974-4B3E-A6DF-6A3D4EAD8765.pdf (Caused by SSLError(CertificateError("hostname 

26500 documents
26600 documents
Response status code: 403
Response status code: 404
Response status code: 404
Error fetching page: HTTPConnectionPool(host='www.hpc.ncep.noaa.gov', port=80): Max retries exceeded with url: /tropical/rain/tcmidwest.html (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C5E1C0F890>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
http://www.nhc.noaa.gov/archive/dis/NAL0898.009 Content is not HTML
http://www.nhc.noaa.gov/archive/dis/NAL0898.010 Content is not HTML
http://www.nhc.noaa.gov/archive/text/TWDAT/2010/TWDAT.201009261748.txt Content is not HTML
http://www.nhc.noaa.gov/archive/text/TWDAT/2010/TWDAT.201009270000.txt Content is not HTML
http://www.nhc.noaa.gov/archive/text/TWDAT/2010/TWDAT.201009271829.txt Content is not HTML
http://www.nhc.noaa.gov/archive/text/TWDAT/2012/TWDAT.201205260004.txt Content is not HTML
http://www.nhc.noaa.gov/archive/text/TWOAT/2013/TWOAT.201312041747.txt Content

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20150910061930/http:/www.weather.com/tropicalupdate (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C60F424D90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160209111822/https:/weather.com/storms/tornado (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C60EEEE910>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160316012303/https:/weather.com/storms/tornado (Caused by NewConnectionError('<urllib3.connection.H

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220603230638/https:/weather.com/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5FFFBE390>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220604083025/https:/weather.com/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5FFFBDA50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220929195115/https:/www.apnews.com/hub/hurricanes (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0

Error fetching robots.txt: HTTPConnectionPool(host='helios.gsfc.nasa.gov', port=80): Max retries exceeded with url: /robots.txt (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C5D88223D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPConnectionPool(host='helios.gsfc.nasa.gov', port=80): Max retries exceeded with url: /weather.html (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001C5D8820850>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Response status code: 404
https://www.nhc.noaa.gov/archive/text/TWDAT/2022/TWDAT.202209220452.txt Content is not HTML
https://www.nhc.noaa.gov/archive/text/TWDAT/2022/TWDAT.202209230338.txt Content is not HTML
Response status code: 403
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5pnTYdRVo?url=http://gwydir.demon.co.uk/advisories/WTIN20_

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20000817210217/https:/www.usatoday.com/weather/wlinda.htm (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5F0DB4410>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080917145118/http:/www.cnn.com/2008/US/weather/09/09/hurricane.ike/index.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5DDA3A550>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
https://web.archive.org/web/20151122180221/http:/www.jamaicaobserver.com/latestnews/Clean-up-begins-after-Tropical-Storm-Fay-batters-Bermuda Content is not HTML
Error fetching 

https://www.nhc.noaa.gov/archive/text/TWOEP/2006/TWOEP.200607161624.txt Content is not HTML
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /5PKajUuuL?url=http://sonicfighters.com/rattleman/042534-abioweb.txt (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6fBilWDlQ?url=http://gwydir.demon.co.uk/advisories/ABPW10-PGTW_201602100230.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6pD0r4uB5?url=http://gwydir.demon.co.uk/advisories/ABPW10-PGTW_201703231300.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSCon

Response status code: 404
Response status code: 404
Response status code: 404
28500 documents
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
28600 documents
28700 documents
28800 documents
Response status code: 404
28900 documents
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2005/Oct/2005102115.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2005/Oct/2005102215.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2005/Oct/2005102515.ABNT20'
Error fetching page: No connection adapters were found for 'ftp://ftp.met.fsu.edu/pub/weather/tropical/Outlook-A/2005/Oct/2005102615.ABNT20'
29000 documents
Response status code: 404
http://medicanes.altervista.

https://id.wikipedia.org/wiki/Carolina_Hurricanes Content is not in English
https://id.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://id.wikipedia.org/wiki/Hujan_meteor Content is not in English
https://id.wikipedia.org/wiki/Meteor_(satelit) Content is not in English
https://it.wikipedia.org/wiki/Hurricane_(Grace_Jones) Content is not in English
https://it.wikipedia.org/wiki/Hurricane_(Natalie_Grant) Content is not in English
https://lb.wikipedia.org/wiki/Hurricane_(Serbesch_Band) Content is not in English
https://ms.wikipedia.org/wiki/Hawker_Hurricane Content is not in English
https://ms.wikipedia.org/wiki/Pancuran_meteor Content is not in English
https://ms.wikipedia.org/wiki/Templat:Category_5_South_Pacific_severe_tropical_cyclones Content is not in English
https://ms.wikipedia.org/wiki/Templat:Weather Content is not in English
https://ms.wikipedia.org/wiki/The_Hurricane Content is not in English
https://nl.wikipedia.org/wiki/The_Hurricane_(1999) Content is no

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20071016182619/http:/www.weather.com/ready/tropical/index.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63912CAD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20070713225219/http:/www.weather.com/encyclopedia/charts/tropical/saffirscale.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63CDA62D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20070713225219/http:/www.weather.com/encyclopedia/tropic

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20110606001327/http:/australiasevereweather.com/cyclones/2000/summ9912.htm (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63CFFA050>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20110819202304/http:/www.webcitation.org/60XXGwIfN (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63912D650>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20151023052306/http:/www.weather.com/news/weather-hurricanes/hurricane-typho

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200727104928/https:/weather.com/storms/hurricane/video/tropical-storm-fay-batters-jersey-shore (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C62B53E3D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20200727104928/https:/weather.com/storms/hurricane/video/tropical-storm-fays-danger-to-swimmers (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63D2FB410>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/202007271

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819000706/https:/www.foxweather.com/local-weather (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C512100C10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819000706/https:/www.foxweather.com/weather-news (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5F910B490>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20230819012849/https:/abc7.com/hurricane-hilary-category-la-tropical-storm/13667263/ (Caused 

Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6HmtgFqHA?url=http://gwydir.demon.co.uk/advisories/WDPN31-PGTW_201306300300.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6IxjxmB6V?url=http://gwydir.demon.co.uk/advisories/WDPN31-PGTW_201308180900.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url: /6WNeKlyfU?url=http://gwydir.demon.co.uk/advisories/WTPS21-PGTW_201502160600.htm (Caused by SSLError(CertificateError("hostname 'www.webcitation.org' doesn't match 'webcitation.org'")))
Error fetching page: HTTPSConnectionPool(host='www.webcitation.org', port=443): Max retries exceeded with url

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080203052447/http:/weather.noaa.gov/pub/data/raw/ww/wwjp25.rjtd..txt (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C6495D45D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20081013022622/http:/weather.noaa.gov/pub/data/raw/ax/axau01.aprf..txt (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C64AACB750>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20100614100207/http:/weather.noaa.gov/pub/data/raw/ww/wwfj40

Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Error fetching robots.txt: HTTPSConnectionPool(host='www.srh.noaa.gov', port=443): Max retries exceeded with url: /robots.txt (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63159ED50>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error fetching page: HTTPSConnectionPool(host='www.srh.noaa.gov', port=443): Max retries exceeded with url: /ike/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63159CA10>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
https://www.weather.gov/source/lch/ike/LSRLCH.txt Content is not HTML
Error fetching robots.txt: HTTPSCo

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160101085859/http:/www.weather.com/storms/hurricane-central (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C64BF66110>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160101085859/http:/www.weather.com/safety/hurricane (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C64F23A490>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160101085859/http:/www.weather.com/storms/severe (Caused by NewConnectionError('<url

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20201105071146/https:/www.foxnews.com/world/tropical-storm-rene-paulette-atlantic-hurricane-season-2020-tropical-weather.print (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63CECE650>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20220604145036/https:/www.orlandosentinel.com/weather/hurricane/os-ne-tropical-storm-alex-saturday-20220604-ch7vmnkzs5ebnjospgiqosd5lm-story.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C649511150>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080725095238/http:/www.bom.gov.au/weather/wa/cyclone/about/towns.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C62B679B10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080720112147/http:/www.bom.gov.au/weather/cyclone/tc-history.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C63B30DC90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080720112147/http:/www.bom.gov.au/weather/nsw/cyclone/tcns

29800 documents
Response status code: 404
29900 documents
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
Response status code: 404
https://web.archive.org/web/20060822223914/http:/www.australiasevereweather.com/cyclones/2004/summ0310.txt Content is not HTML
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080720112147/http:/www.bom.gov.au/weather/cyclone/about/cyclones-eastern.shtml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C512AE8290>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
E

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20080916074051/http:/www.nhc.noaa.gov/2001iris.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C64698B510>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20081011043611/http:/www.newschannel5.tv/Weather/Hurricane/992260 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C64BCEFFD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20081011043611/http:/www.newschannel5.tv/Weather/Hurricane/992737 (Caused by NewConn

Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20131109010754/http:/www.nhc.noaa.gov/2001allison.html (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C5F9303C50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20181225085959/https:/www.wunderground.com/hurricane/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C652506050>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20181225083524/https:/www.wunderground.com/tropical/ABNT20.html (Caused by NewConnectionError

Response status code: 403
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20110611115408/http:/www.atl.ec.gc.ca/weather/hurricane/bulletins/20100905175427.Earl.txt.en (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C6524FD890>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20110611123520/http:/www.atl.ec.gc.ca/weather/hurricane/bulletins/20080722180403.Cristobal.txt.en (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001C6524FC710>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded 

Response status code: 404
30000 documents
An error occurred: 'NoneType' object has no attribute 'strip'
Error fetching page: HTTPSConnectionPool(host='www.essl.org', port=443): Max retries exceeded with url: /cms/european-severe-weather-database/ewob/ (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001C60252D1D0>, 'Connection to www.essl.org timed out. (connect timeout=None)'))
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
Error fetching page: HTTPSConnectionPool(host='www.essl.org', port=443): Max retries exce

An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
Response status code: 404
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
Error fetching page: HTTPSConnectionPool(host='www.essl.org', port=443): Max retries exceeded with url: /cms/european-severe-weather-database/ewob/ewob-ads/ (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001C64BE23850>, 'Connection to www.essl.org timed out. (connect timeout=None)'))
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
Error fetching page: HTTPSConnectionPool(host='web.archive.org', port=443): Max retries exceeded with url: /web/20160310124443/https:/weather.com/storms/hurri

An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error occurred: 'NoneType' object has no attribute 'strip'
An error

PermissionError: [Errno 13] Permission denied: 'hurricane_data_1.txt'

In [44]:
frontier_length = frontier_queue.qsize()
print("Length of frontier_queue:", frontier_length)

Length of frontier_queue: 675153


In [45]:
visited_url_temp = visited_links

In [46]:
hurricane_data_temp = hurricane_data

In [47]:
frontier_temp = frontier_queue

# Write the crawled data into files

In [None]:
import json

def dump_inlinks_to_json(hurricane_data, output_file, doc_ready_to_write):
    inlinks_data = {}
    
    for link, data in hurricane_data.items():
        inlinks = list(data.get('in_links', {}).keys())
        if link in doc_ready_to_write:
            inlinks_data[link] = inlinks
    
    with open(output_file, 'w') as json_file:
        json.dump(inlinks_data, json_file, indent=4)

def dump_outlinks_to_json(hurricane_data, output_file, doc_ready_to_write):
    outlinks_data = {}
    
    for link, data in hurricane_data.items():
        outlinks = [sublist[0] for sublist in data.get('out_links', [])]
        if link in doc_ready_to_write: 
            outlinks_data[link] = outlinks
    
    with open(output_file, 'w') as json_file:
        json.dump(outlinks_data, json_file, indent=4)
        
        
inlinks_output_file = 'hurricane_inlinks_final.json'
outlinks_output_file = 'hurricane_outlinks_final.json'


dump_inlinks_to_json(hurricane_data, inlinks_output_file, doc_ready_to_write)
dump_outlinks_to_json(hurricane_data, outlinks_output_file, doc_ready_to_write)
print("File write completed")

In [54]:
file_number = "final"

count_processed = 0
for i, url in enumerate(doc_ready_to_write, start=1):
    if hurricane_data[url]['content'] is not None:
        process_document(url, hurricane_data[url]['content'], file_number)
        count_processed += 1
    if i % 500 == 0:
        print("Processed ", i, " URLs")

print("Total number of documents processed:", count_processed)

Processed  500  URLs
Processed  1000  URLs
Processed  1500  URLs
Processed  2000  URLs
Processed  2500  URLs
Processed  3000  URLs
Processed  3500  URLs
Processed  4000  URLs
Processed  4500  URLs
Processed  5000  URLs
Processed  5500  URLs
Processed  6000  URLs
Processed  6500  URLs
Processed  7000  URLs
Processed  7500  URLs
Processed  8000  URLs
Processed  8500  URLs
Processed  9000  URLs
Processed  9500  URLs
Processed  10000  URLs
Processed  10500  URLs
Processed  11000  URLs
Processed  11500  URLs
Processed  12000  URLs
Processed  12500  URLs
Processed  13000  URLs
Processed  13500  URLs
Processed  14000  URLs
Processed  14500  URLs
Processed  15000  URLs
Processed  15500  URLs
Processed  16000  URLs
Processed  16500  URLs
Processed  17000  URLs
Processed  17500  URLs
Processed  18000  URLs
Processed  18500  URLs
Processed  19000  URLs
Processed  19500  URLs
Processed  20000  URLs
Processed  20500  URLs
Processed  21000  URLs
Processed  21500  URLs
Processed  22000  URLs
Processe

In [55]:
print(wave_number)

8


In [56]:
print(len(visited_links))
print(len(doc_ready_to_write))

30000
30000
