In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import requests
import ssl
import os
from bs4 import BeautifulSoup
import csv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import re
from urllib.parse import urlparse

#DEFINE FUNCTIONS

In [None]:
# Read Url list from csv
def load_urls(file_path):
    urls = []
    with open(file_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            if row:  # Kiểm tra nếu dòng không rỗng
                urls.append(row[0])
    return urls

In [None]:
# Create SSLContext to avoid DH_KEY_TOO_SMALL error
def create_ssl_context():
    context = ssl.create_default_context()
    context.set_ciphers('ALL')
    return context

In [None]:
# Reload page và save HTML
def download_page(url):
    try:
        context = create_ssl_context()
        response = requests.get(url, verify=False, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error when dowwloading page {url}: {e}")
        return None

In [None]:
# Add scheme into URL
def add_scheme_to_url(url):
    parsed_url = urlparse(url)
    if not parsed_url.scheme:
        return 'http://' + url
    return url

In [None]:
# Check url is valid or not
def is_valid_url(url):
    parsed_url = urlparse(url)
    return bool(parsed_url.scheme and parsed_url.netloc)


def sanitize_url(url):
    sanitized_url = url.rstrip('.')
    if sanitized_url.count('.') < 2:
        sanitized_url = sanitized_url + ".gob.pe"
    return sanitized_url

In [None]:
# Take the text from .html
def extract_text_from_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    # Take <body>
    body_text = soup.body.get_text(separator=' ', strip=True) if soup.body else ""

    # Take <title>
    if not body_text:
        title = soup.title.get_text(separator=' ', strip=True) if soup.title else ""
        body_text = title

    # Take main content(ví dụ: <article>, <section>, <p>, <h1>, <h2>, <h3>)
    if not body_text:
        paragraphs = soup.find_all(['article', 'section', 'p', 'h1', 'h2', 'h3'])
        if paragraphs:
            body_text = ' '.join([p.get_text(separator=' ', strip=True) for p in paragraphs])

    # Take <header>, <footer> nếu cần
    if not body_text:
        header_footer = soup.find_all(['header', 'footer'])
        if header_footer:
            body_text = ' '.join([hf.get_text(separator=' ', strip=True) for hf in header_footer])

    # Take <meta name="description">
    if not body_text:
        meta_desc = soup.find('meta', {'name': 'description'})
        if meta_desc and meta_desc.get('content'):
            body_text = meta_desc['content']

    # Take <main>
    if not body_text:
        main_content = soup.find('main')
        if main_content:
            body_text = main_content.get_text(separator=' ', strip=True)

    # Take others
    if not body_text:
        body_text = soup.get_text(separator=' ', strip=True)

    return clean_text(body_text)

In [None]:
# Preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text

In [None]:
# Save text into folder
def save_text_to_file(url, text, folder):
    # Tách phần domain và phần đường dẫn trong URL
    domain = url.split('//')[1].split('/')[0]
    path = url.split(domain)[1].replace('/', '_')  # Thay dấu '/' bằng dấu '_' để tránh vấn đề tên tệp

    # Kết hợp domain và path thành tên tệp duy nhất
    filename = f"{domain}{path}"

    # Giới hạn độ dài tên tệp để tránh vượt quá giới hạn của hệ thống tệp (255 ký tự)
    max_filename_length = 200
    if len(filename) > max_filename_length:
        filename = filename[:max_filename_length]  # Cắt tên tệp nếu quá dài

    # Lưu văn bản vào file .txt
    filepath = os.path.join(folder, f"{filename}.txt")
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(text)

    return filepath


In [None]:
# sace URL does not load into fail.csv
def save_failed_urls():
    if failed_urls:
        with open('fail.csv', 'w', encoding='utf-8', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Failed URLs'])  # Tiêu đề cột
            for url in failed_urls:
                writer.writerow([url])

#Download Pages and Save Content

In [None]:
# Create folder to save output, including safe and defaced folders
output_folder = 'output'
safe_folder = os.path.join(output_folder, 'safe')
deface_folder = os.path.join(output_folder, 'deface')

# Create subfolders if they do not exist
if not os.path.exists(safe_folder):
    os.makedirs(safe_folder)
if not os.path.exists(deface_folder):
    os.makedirs(deface_folder)

# Save URLs that failed to load into fail.csv
failed_urls = []

# Read URLs from CSV file
safe_urls = load_urls('/content/safe_urls.csv')
deface_urls = load_urls('/content/deface_urls.csv')

# Reload pages and save output
safe_documents = []
deface_documents = []

# Load pages and save to safe and deface folders
for url in safe_urls:
    url = sanitize_url(url)
    url = add_scheme_to_url(url)
    if not is_valid_url(url):
        failed_urls.append(url)
        print(f"Invalid URL: {url}")
        continue
    print(f"Downloading page {url}...")
    html_content = download_page(url)
    if html_content:
        body_text = extract_text_from_html(html_content)

        if body_text:
            filename = save_text_to_file(url, body_text, safe_folder)
            safe_documents.append(filename)  # Save .txt
            print(f"Text saved to {filename}")
        else:
            failed_urls.append(url)
            print(f"No valid text from {url}")
    else:
        failed_urls.append(url)
        print(f"Failed to download page from {url}")

for url in deface_urls:
    url = sanitize_url(url)
    url = add_scheme_to_url(url)
    if not is_valid_url(url):
        failed_urls.append(url)
        print(f"Invalid URL: {url}")
        continue
    print(f"Downloading page {url}...")
    html_content = download_page(url)
    if html_content:
        body_text = extract_text_from_html(html_content)

        if body_text:
            filename = save_text_to_file(url, body_text, deface_folder)
            deface_documents.append(filename)
            print(f"Text saved to {filename}")
        else:
            failed_urls.append(url)
            print(f"No valid text from {url}")
    else:
        failed_urls.append(url)
        print(f"Failed to download page from {url}")

# Save failed URLs to fail.csv
save_failed_urls()


Downloading page https://moj.gov.vn/...




Text saved to output/safe/moj.gov.vn_.txt
Downloading page https://ncov.moh.gov.vn/...




Error when dowwloading page https://ncov.moh.gov.vn/: HTTPSConnectionPool(host='covid19.gov.vn', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e1484878250>: Failed to resolve 'covid19.gov.vn' ([Errno -2] Name or service not known)"))
Failed to download page from https://ncov.moh.gov.vn/
Downloading page http://chinhphu.vn/.gob.pe...




Error when dowwloading page http://chinhphu.vn/.gob.pe: 404 Client Error: Not Found for url: https://chinhphu.vn/.gob.pe
Failed to download page from http://chinhphu.vn/.gob.pe
Downloading page http://vanban.chinhphu.vn/portal/page/portal/chinhphu/hethongvanban...




Error when dowwloading page http://vanban.chinhphu.vn/portal/page/portal/chinhphu/hethongvanban: 404 Client Error: Not Found for url: https://vanban.chinhphu.vn//portal/page/portal/chinhphu/hethongvanban
Failed to download page from http://vanban.chinhphu.vn/portal/page/portal/chinhphu/hethongvanban
Downloading page http://chinhphu.vn/portal/page/portal/chinhphu/GioiThieu.gob.pe...




Error when dowwloading page http://chinhphu.vn/portal/page/portal/chinhphu/GioiThieu.gob.pe: 404 Client Error: Not Found for url: https://chinhphu.vn/portal/page/portal/chinhphu/GioiThieu.gob.pe
Failed to download page from http://chinhphu.vn/portal/page/portal/chinhphu/GioiThieu.gob.pe
Downloading page http://baochinhphu.vn/.gob.pe...




Error when dowwloading page http://baochinhphu.vn/.gob.pe: 404 Client Error: Not Found for url: https://baochinhphu.vn/.gob.pe
Failed to download page from http://baochinhphu.vn/.gob.pe
Downloading page http://baochinhphu.vn/Chinh-sach-moi/De-xuat-ho-tro-nan-nhan-bi-mua-ban-nguoi/402202.vgp...




Text saved to output/safe/baochinhphu.vn_Chinh-sach-moi_De-xuat-ho-tro-nan-nhan-bi-mua-ban-nguoi_402202.vgp.txt
Downloading page http://baochinhphu.vn/Hoi-nhap/453.vgp...




Text saved to output/safe/baochinhphu.vn_Hoi-nhap_453.vgp.txt
Downloading page http://chinhsachonline.chinhphu.vn/Danh-sach-cau-hoi/pagetype2/Chinh-sach-voi-nguoi-co-cong/section2.vgp...




Text saved to output/safe/chinhsachonline.chinhphu.vn_Danh-sach-cau-hoi_pagetype2_Chinh-sach-voi-nguoi-co-cong_section2.vgp.txt
Downloading page http://chinhsachonline.chinhphu.vn/Chi-tiet-cau-hoi/Ban-quan-ly-du-an-co-can-chung-chi-nang-luc-hoat-dong-xay-dung/26053.vgp...




Text saved to output/safe/chinhsachonline.chinhphu.vn_Chi-tiet-cau-hoi_Ban-quan-ly-du-an-co-can-chung-chi-nang-luc-hoat-dong-xay-dung_26053.vgp.txt
Downloading page https://dichvucong.gov.vn/...




No valid text from https://dichvucong.gov.vn/
Downloading page https://dangkykinhdoanh.gov.vn/...




Text saved to output/safe/dangkykinhdoanh.gov.vn_.txt
Downloading page https://www.gso.gov.vn/...




Text saved to output/safe/www.gso.gov.vn_.txt
Downloading page https://vnsw.gov.vn/...




Text saved to output/safe/vnsw.gov.vn_.txt
Downloading page https://www.moit.gov.vn/...




Text saved to output/safe/www.moit.gov.vn_.txt
Downloading page https://dichvuthongtin.dkkd.gov.vn/...




Text saved to output/safe/dichvuthongtin.dkkd.gov.vn_.txt
Downloading page https://moet.gov.vn/...
Error when dowwloading page https://moet.gov.vn/: HTTPSConnectionPool(host='moet.gov.vn', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e14839a6ce0>, 'Connection to moet.gov.vn timed out. (connect timeout=10)'))
Failed to download page from https://moet.gov.vn/
Downloading page https://mic.gov.vn/...




Text saved to output/safe/mic.gov.vn_.txt
Downloading page http://baocaobtn.vncdc.gov.vn/...




Text saved to output/safe/baocaobtn.vncdc.gov.vn_.txt
Downloading page http://vncdc.gov.vn/...




Error when dowwloading page http://vncdc.gov.vn/: HTTPSConnectionPool(host='vncdc.gov.vn', port=443): Read timed out.
Failed to download page from http://vncdc.gov.vn/
Downloading page https://egov.danang.gov.vn/...




Text saved to output/safe/egov.danang.gov.vn_.txt
Downloading page https://dichvucong.danang.gov.vn/...
Error when dowwloading page https://dichvucong.danang.gov.vn/: HTTPSConnectionPool(host='dichvucong.danang.gov.vn', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e14841d0c10>, 'Connection to dichvucong.danang.gov.vn timed out. (connect timeout=10)'))
Failed to download page from https://dichvucong.danang.gov.vn/
Downloading page https://baohiemxahoi.gov.vn/...




Text saved to output/safe/baohiemxahoi.gov.vn_.txt
Downloading page https://baohiemxahoi.gov.vn/gioithieu/Pages/gioi-thieu-chung.aspx...




Text saved to output/safe/baohiemxahoi.gov.vn_gioithieu_Pages_gioi-thieu-chung.aspx.txt
Downloading page https://mt.gov.vn/...




Error when dowwloading page https://mt.gov.vn/: 403 Client Error: Forbidden for url: https://mt.gov.vn/
Failed to download page from https://mt.gov.vn/
Downloading page https://egov.danang.gov.vn/gioithieu...




Text saved to output/safe/egov.danang.gov.vn_gioithieu.txt
Downloading page http://www.nchmf.gov.vn/...




Text saved to output/safe/www.nchmf.gov.vn_.txt
Downloading page https://www.quangtri.gov.vn/...




Text saved to output/safe/www.quangtri.gov.vn_.txt
Downloading page https://gplx.gov.vn/...




No valid text from https://gplx.gov.vn/
Downloading page https://dichvucong.gplx.gov.vn/...




Error when dowwloading page https://dichvucong.gplx.gov.vn/: 403 Client Error: Forbidden for url: https://dichvucong.gplx.gov.vn/
Failed to download page from https://dichvucong.gplx.gov.vn/
Downloading page https://dichvucong.gplx.gov.vn/faces/registration/guide.xhtml...
Error when dowwloading page https://dichvucong.gplx.gov.vn/faces/registration/guide.xhtml: 403 Client Error: Forbidden for url: https://dichvucong.gplx.gov.vn/faces/registration/guide.xhtml
Failed to download page from https://dichvucong.gplx.gov.vn/faces/registration/guide.xhtml
Downloading page https://www.mof.gov.vn/...




Text saved to output/safe/www.mof.gov.vn_.txt
Downloading page https://haiphong.gov.vn/...




Text saved to output/safe/haiphong.gov.vn_.txt
Downloading page https://www.moha.gov.vn/...




Text saved to output/safe/www.moha.gov.vn_.txt
Downloading page https://haiphong.gov.vn/tin-tuc-su-kien.html...




Text saved to output/safe/haiphong.gov.vn_tin-tuc-su-kien.html.txt
Downloading page https://dichvucong.moit.gov.vn/...




Text saved to output/safe/dichvucong.moit.gov.vn_.txt
Downloading page http://dangcongsan.vn/.gob.pe...




Error when dowwloading page http://dangcongsan.vn/.gob.pe: 404 Client Error: Not Found for url: https://dangcongsan.vn/404.aspx
Failed to download page from http://dangcongsan.vn/.gob.pe
Downloading page http://bacninh.gov.vn/...




Text saved to output/safe/bacninh.gov.vn_.txt
Downloading page http://cangvuhaiphong.gov.vn/...
Error when dowwloading page http://cangvuhaiphong.gov.vn/: HTTPSConnectionPool(host='cangvuhaiphong.gov.vn', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e1486a3b490>, 'Connection to cangvuhaiphong.gov.vn timed out. (connect timeout=10)'))
Failed to download page from http://cangvuhaiphong.gov.vn/
Downloading page https://www.sav.gov.vn/...




Text saved to output/safe/www.sav.gov.vn_.txt
Downloading page http://vietnamtourism.gov.vn/...




No valid text from http://vietnamtourism.gov.vn/
Downloading page http://moc.gov.vn/...




Text saved to output/safe/moc.gov.vn_.txt
Downloading page http://dichvucong.xaydung.gov.vn/web/cong-dich-vu-cong...




Text saved to output/safe/dichvucong.xaydung.gov.vn_web_cong-dich-vu-cong.txt
Downloading page https://daklak.gov.vn/...




Text saved to output/safe/daklak.gov.vn_.txt
Downloading page https://daklak.gov.vn/kinh-te...




Text saved to output/safe/daklak.gov.vn_kinh-te.txt
Downloading page https://daklak.gov.vn/van-hoa-xa-hoi...




Text saved to output/safe/daklak.gov.vn_van-hoa-xa-hoi.txt
Downloading page https://daklak.gov.vn/cai-cach-thu-tuc-hanh-chinh...




Text saved to output/safe/daklak.gov.vn_cai-cach-thu-tuc-hanh-chinh.txt
Downloading page http://vbpl.vn/.gob.pe...




Error when dowwloading page http://vbpl.vn/.gob.pe: 500 Server Error: INTERNAL SERVER ERROR for url: https://vbpl.vn/.gob.pe
Failed to download page from http://vbpl.vn/.gob.pe
Downloading page https://thanhhoa.gov.vn/portal/Pages/2020-7-17/Bo-Chinh-tri-thong-qua-De-an-Xay-dung-va-phat-trieeuift2.aspx...




Text saved to output/safe/thanhhoa.gov.vn_portal_Pages_2020-7-17_Bo-Chinh-tri-thong-qua-De-an-Xay-dung-va-phat-trieeuift2.aspx.txt
Downloading page https://thanhhoa.gov.vn/portal/Pages/default.aspx...




Text saved to output/safe/thanhhoa.gov.vn_portal_Pages_default.aspx.txt
Downloading page https://thanhhoa.gov.vn/portal/Pages/Chinh-tri.aspx...




Text saved to output/safe/thanhhoa.gov.vn_portal_Pages_Chinh-tri.aspx.txt
Downloading page http://baochinhphu.vn/Xay-dung-Chinh-phu-dien-tu/Day-manh-ung-dung-Cong-dich-vu-cong-quoc-gia/400194.vgp...




Text saved to output/safe/baochinhphu.vn_Xay-dung-Chinh-phu-dien-tu_Day-manh-ung-dung-Cong-dich-vu-cong-quoc-gia_400194.vgp.txt
Downloading page https://dichvucong.quangtri.gov.vn/quangtri/bothutuc...
Error when dowwloading page https://dichvucong.quangtri.gov.vn/quangtri/bothutuc: HTTPSConnectionPool(host='dichvucong.quangtri.gov.vn', port=443): Max retries exceeded with url: /quangtri/bothutuc (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e1485538b50>, 'Connection to dichvucong.quangtri.gov.vn timed out. (connect timeout=10)'))
Failed to download page from https://dichvucong.quangtri.gov.vn/quangtri/bothutuc
Downloading page https://nopthue.gdt.gov.vn/epay_nnt/home.jsp...
Error when dowwloading page https://nopthue.gdt.gov.vn/epay_nnt/home.jsp: HTTPSConnectionPool(host='nopthue.gdt.gov.vn', port=443): Max retries exceeded with url: /epay_nnt/home.jsp (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e1485538be0>, 'Connec



Text saved to output/safe/soyte.namdinh.gov.vn_.txt
Downloading page http://soyte.namdinh.gov.vn/home/tin-tuc/cap-nhat-tinh-hinh-dich-benh-viem-duong-ho-hap-do-covid-19-tinh-den-6h-ngay-2972020-2134...




Text saved to output/safe/soyte.namdinh.gov.vn_home_tin-tuc_cap-nhat-tinh-hinh-dich-benh-viem-duong-ho-hap-do-covid-19-tinh-den-6h-ngay-2972020-2134.txt
Downloading page http://soyte.namdinh.gov.vn/home/hoat-dong-nganh...




Text saved to output/safe/soyte.namdinh.gov.vn_home_hoat-dong-nganh.txt
Downloading page http://soyte.namdinh.gov.vn/home/van-ban...




Text saved to output/safe/soyte.namdinh.gov.vn_home_van-ban.txt
Downloading page http://soyte.namdinh.gov.vn/home/hoi-dap...




Text saved to output/safe/soyte.namdinh.gov.vn_home_hoi-dap.txt
Downloading page http://soyte.namdinh.gov.vn/home/video...




Text saved to output/safe/soyte.namdinh.gov.vn_home_video.txt
Downloading page https://www.cantho.gov.vn/wps/portal/home/Trang-chu...




Text saved to output/safe/www.cantho.gov.vn_wps_portal_home_Trang-chu.txt
Downloading page http://laichau.gov.vn/...




Text saved to output/safe/laichau.gov.vn_.txt
Downloading page http://laichau.gov.vn/danh-muc/thong-bao...




Text saved to output/safe/laichau.gov.vn_danh-muc_thong-bao.txt
Downloading page http://laichau.gov.vn/tin-tuc-su-kien/chuyen-de/tin-trong-nuoc/van-to-chuc-thi-tot-nghiep-thuc-hien-nghiem-cac-quy-dinh-cho.html...




Text saved to output/safe/laichau.gov.vn_tin-tuc-su-kien_chuyen-de_tin-trong-nuoc_van-to-chuc-thi-tot-nghiep-thuc-hien-nghiem-cac-quy-dinh-cho.html.txt
Downloading page http://laichau.gov.vn/tin-tuc-su-kien/hoat-dong-cua-lanh-dao-tinh/khai-mac-trong-the-dai-hoi-thi-dua-yeu-nuoc-tinh-lai-chau-la.html...




Text saved to output/safe/laichau.gov.vn_tin-tuc-su-kien_hoat-dong-cua-lanh-dao-tinh_khai-mac-trong-the-dai-hoi-thi-dua-yeu-nuoc-tinh-lai-chau-la.html.txt
Downloading page https://mail.laichau.gov.vn/...




Text saved to output/safe/mail.laichau.gov.vn_.txt
Downloading page https://dichvucong.laichau.gov.vn/...




Text saved to output/safe/dichvucong.laichau.gov.vn_.txt
Downloading page https://dichvucong.laichau.gov.vn/dichvucong/tiepnhanonline...




Text saved to output/safe/dichvucong.laichau.gov.vn_dichvucong_tiepnhanonline.txt
Downloading page https://dichvucong.laichau.gov.vn/dichvucong/tracuu...




Text saved to output/safe/dichvucong.laichau.gov.vn_dichvucong_tracuu.txt
Downloading page https://dichvucong.laichau.gov.vn/dichvucong/thongke...




Error when dowwloading page https://dichvucong.laichau.gov.vn/dichvucong/thongke: HTTPSConnectionPool(host='dichvucong.laichau.gov.vn', port=443): Read timed out. (read timeout=10)
Failed to download page from https://dichvucong.laichau.gov.vn/dichvucong/thongke
Downloading page https://dichvucong.laichau.gov.vn/dichvucong/vanbanphapluat...




Text saved to output/safe/dichvucong.laichau.gov.vn_dichvucong_vanbanphapluat.txt
Downloading page https://nhatrang.khanhhoa.gov.vn/...




Text saved to output/safe/nhatrang.khanhhoa.gov.vn_.txt
Downloading page https://nhatrang.khanhhoa.gov.vn/vi/tin-noi-bat/nha-trang-so-ket-cong-tac-dan-van-6-thang-dau-nam-2020...




Text saved to output/safe/nhatrang.khanhhoa.gov.vn_vi_tin-noi-bat_nha-trang-so-ket-cong-tac-dan-van-6-thang-dau-nam-2020.txt
Downloading page https://nhatrang.khanhhoa.gov.vn/vi/van-hoa-xa-hoi...




Text saved to output/safe/nhatrang.khanhhoa.gov.vn_vi_van-hoa-xa-hoi.txt
Downloading page https://nhatrang.khanhhoa.gov.vn/vi/thong-bao/thong-bao-ve-viec-khan-truong-thuc-hien-chi-dao-cua-ubnd-tinh-khanh-hoa-ve-kiem-soat-chat-che-dich-benh-covid-19-ngan-ngua-khong-de-lay-lan-trong-cong-dong...




Text saved to output/safe/nhatrang.khanhhoa.gov.vn_vi_thong-bao_thong-bao-ve-viec-khan-truong-thuc-hien-chi-dao-cua-ubnd-tinh-khanh-hoa-ve-kiem-soat-chat-che-dich-benh-covid-19-ngan-ngua-khong-de-lay-lan-trong-cong-dong.txt
Downloading page http://thainguyen.gov.vn/vi_VN/trang-chu...




Text saved to output/safe/thainguyen.gov.vn_vi_VN_trang-chu.txt
Downloading page http://thainguyen.gov.vn/vi_VN/chinh-quyen...




Text saved to output/safe/thainguyen.gov.vn_vi_VN_chinh-quyen.txt
Downloading page http://thainguyen.gov.vn/thu-tuc-hanh-chinh...




Text saved to output/safe/thainguyen.gov.vn_thu-tuc-hanh-chinh.txt
Downloading page https://dichvucong.thainguyen.gov.vn/...
Error when dowwloading page https://dichvucong.thainguyen.gov.vn/: HTTPSConnectionPool(host='dichvucong.thainguyen.gov.vn', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e1486612a70>, 'Connection to dichvucong.thainguyen.gov.vn timed out. (connect timeout=10)'))
Failed to download page from https://dichvucong.thainguyen.gov.vn/
Downloading page https://dichvucong.thainguyen.gov.vn/dich-vu-cong...
Error when dowwloading page https://dichvucong.thainguyen.gov.vn/dich-vu-cong: HTTPSConnectionPool(host='dichvucong.thainguyen.gov.vn', port=443): Max retries exceeded with url: /dich-vu-cong (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e1482ddf0a0>, 'Connection to dichvucong.thainguyen.gov.vn timed out. (connect timeout=10)'))
Failed to download page from htt



Text saved to output/safe/www.dienbien.gov.vn_portal.txt
Downloading page http://www.dienbien.gov.vn/portal/Pages/home-new/cong-dan.aspx...




Text saved to output/safe/www.dienbien.gov.vn_portal_Pages_home-new_cong-dan.aspx.txt
Downloading page https://dichvucong.dienbien.gov.vn/...




Text saved to output/safe/dichvucong.dienbien.gov.vn_.txt
Downloading page https://dichvucong.dienbien.gov.vn/thu-tuc-hanh-chinh...




Text saved to output/safe/dichvucong.dienbien.gov.vn_thu-tuc-hanh-chinh.txt
Downloading page https://dichvucong.dienbien.gov.vn/thu-tuc-hanh-chinh/-/tra-cuu-thu-tuc/chi-tiet-tthc/detail_tthc/thutuc/5101...




Text saved to output/safe/dichvucong.dienbien.gov.vn_thu-tuc-hanh-chinh_-_tra-cuu-thu-tuc_chi-tiet-tthc_detail_tthc_thutuc_5101.txt
Downloading page https://dichvucong.dienbien.gov.vn/thu-tuc-hanh-chinh/-/tra-cuu-thu-tuc/chi-tiet-tthc/detail_tthc/thutuc/5364...




Text saved to output/safe/dichvucong.dienbien.gov.vn_thu-tuc-hanh-chinh_-_tra-cuu-thu-tuc_chi-tiet-tthc_detail_tthc_thutuc_5364.txt
Downloading page https://dav.gov.vn/...
Error when dowwloading page https://dav.gov.vn/: HTTPSConnectionPool(host='dav.gov.vn', port=443): Read timed out. (read timeout=10)
Failed to download page from https://dav.gov.vn/
Downloading page https://dav.gov.vn/dich-vu-cong-c5.html...
Error when dowwloading page https://dav.gov.vn/dich-vu-cong-c5.html: HTTPSConnectionPool(host='dav.gov.vn', port=443): Read timed out. (read timeout=10)
Failed to download page from https://dav.gov.vn/dich-vu-cong-c5.html
Downloading page https://dav.gov.vn/dang-ki-thuoc-cn6.html...
Error when dowwloading page https://dav.gov.vn/dang-ki-thuoc-cn6.html: HTTPSConnectionPool(host='dav.gov.vn', port=443): Read timed out. (read timeout=10)
Failed to download page from https://dav.gov.vn/dang-ki-thuoc-cn6.html
Downloading page https://dav.gov.vn/cong-bo-bao-cao-tinh-hinh-vi-pham-cua-nh



Error when dowwloading page http://tiengchuong.vn/.gob.pe: 404 Client Error: Not Found for url: https://tiengchuong.chinhphu.vn/.gob.pe
Failed to download page from http://tiengchuong.vn/.gob.pe
Downloading page http://tiengchuong.vn/O-dau-the-nao/Binh-Phuoc-Khong-con-xa-phuong-thi-tran-trong-diem-ve-te-nan-mai-dam/37747.vgp...




Text saved to output/safe/tiengchuong.vn_O-dau-the-nao_Binh-Phuoc-Khong-con-xa-phuong-thi-tran-trong-diem-ve-te-nan-mai-dam_37747.vgp.txt
Downloading page http://tiengchuong.vn/documents/z82.vgp...




Text saved to output/safe/tiengchuong.vn_documents_z82.vgp.txt
Downloading page https://botrach.quangbinh.gov.vn/3cms/...




Text saved to output/safe/botrach.quangbinh.gov.vn_3cms_.txt
Downloading page https://www.quangbinh.gov.vn/3cms/...




Text saved to output/safe/www.quangbinh.gov.vn_3cms_.txt
Downloading page https://www.quangbinh.gov.vn/3cms/day-manh-trien-khai-cac-giai-phap-dieu-hanh-thuc-hien-nhiem-vu-tai-chinh---ngan-sach-nhung-than.htm...




Error when dowwloading page https://www.quangbinh.gov.vn/3cms/day-manh-trien-khai-cac-giai-phap-dieu-hanh-thuc-hien-nhiem-vu-tai-chinh---ngan-sach-nhung-than.htm: 404 Client Error:  for url: https://quangbinh.gov.vn/3cms/day-manh-trien-khai-cac-giai-phap-dieu-hanh-thuc-hien-nhiem-vu-tai-chinh---ngan-sach-nhung-than.htm
Failed to download page from https://www.quangbinh.gov.vn/3cms/day-manh-trien-khai-cac-giai-phap-dieu-hanh-thuc-hien-nhiem-vu-tai-chinh---ngan-sach-nhung-than.htm
Downloading page https://dichvucong.quangbinh.gov.vn/...




Text saved to output/safe/dichvucong.quangbinh.gov.vn_.txt
Downloading page https://dichvucong.quangbinh.gov.vn/tra-cuu-thong-tin...
Text saved to output/safe/dichvucong.quangbinh.gov.vn_tra-cuu-thong-tin.txt
Downloading page https://dichvucong.quangbinh.gov.vn/thu-tuc-hanh-chinh...




Text saved to output/safe/dichvucong.quangbinh.gov.vn_thu-tuc-hanh-chinh.txt
Downloading page http://quangbinh.gdt.gov.vn/wps/portal...




Text saved to output/safe/quangbinh.gdt.gov.vn_wps_portal.txt
Downloading page http://www.gdt.gov.vn/wps/portal...




Text saved to output/safe/www.gdt.gov.vn_wps_portal.txt
Downloading page http://www.mod.gov.vn/wps/portal...




Text saved to output/safe/www.mod.gov.vn_wps_portal.txt
Downloading page https://bvhttdl.gov.vn/...




Text saved to output/safe/bvhttdl.gov.vn_.txt
Downloading page https://bvhttdl.gov.vn/van-ban-quan-ly.htm...




Text saved to output/safe/bvhttdl.gov.vn_van-ban-quan-ly.htm.txt
Downloading page http://vanban.bvhttdl.gov.vn/Pages/login.zul...
Error when dowwloading page http://vanban.bvhttdl.gov.vn/Pages/login.zul: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
Failed to download page from http://vanban.bvhttdl.gov.vn/Pages/login.zul
Downloading page https://bvhttdl.gov.vn/bien-gioi-bien-dao-viet-nam-t191644.htm...




Text saved to output/safe/bvhttdl.gov.vn_bien-gioi-bien-dao-viet-nam-t191644.htm.txt
Downloading page https://bvhttdl.gov.vn/trien-lam-anh-dat-va-nguoi-tren-que-huong-hai-doi-hoang-sa-qua-goc-nhin-cac-nha-nhiep-anh-7498.htm...




Text saved to output/safe/bvhttdl.gov.vn_trien-lam-anh-dat-va-nguoi-tren-que-huong-hai-doi-hoang-sa-qua-goc-nhin-cac-nha-nhiep-anh-7498.htm.txt
Downloading page https://bvhttdl.gov.vn/co-so-du-lieu-nganh/danh-muc-cac-di-san-van-hoa.htm...




Text saved to output/safe/bvhttdl.gov.vn_co-so-du-lieu-nganh_danh-muc-cac-di-san-van-hoa.htm.txt
Downloading page http://tnmtphutho.gov.vn/...
Error when dowwloading page http://tnmtphutho.gov.vn/: HTTPConnectionPool(host='tnmtphutho.gov.vn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e1483b55510>, 'Connection to tnmtphutho.gov.vn timed out. (connect timeout=10)'))
Failed to download page from http://tnmtphutho.gov.vn/
Downloading page http://tnmtphutho.gov.vn/index.php?language=vi&nv=tthc...
Error when dowwloading page http://tnmtphutho.gov.vn/index.php?language=vi&nv=tthc: HTTPConnectionPool(host='tnmtphutho.gov.vn', port=80): Max retries exceeded with url: /index.php?language=vi&nv=tthc (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e1483b55cc0>, 'Connection to tnmtphutho.gov.vn timed out. (connect timeout=10)'))
Failed to download page from http://tnmtphutho.gov.vn/index.p



Error when dowwloading page https://baomoi.com/.gob.pe: 403 Client Error: Forbidden for url: https://baomoi.com/.gob.pe
Failed to download page from https://baomoi.com/.gob.pe
Downloading page https://vietnamnet.vn/.gob.pe...




Error when dowwloading page https://vietnamnet.vn/.gob.pe: 404 Client Error: Not Found for url: https://vietnamnet.vn/.gob.pe
Failed to download page from https://vietnamnet.vn/.gob.pe
Downloading page https://tuoitre.vn/.gob.pe...




Error when dowwloading page https://tuoitre.vn/.gob.pe: 404 Client Error: Not Found for url: https://tuoitre.vn/.gob.pe
Failed to download page from https://tuoitre.vn/.gob.pe
Downloading page https://tiki.vn/.gob.pe...
Error when dowwloading page https://tiki.vn/.gob.pe: 403 Client Error: Forbidden for url: https://tiki.vn/.gob.pe
Failed to download page from https://tiki.vn/.gob.pe
Downloading page https://dantri.com.vn/...




Text saved to output/safe/dantri.com.vn_.txt
Downloading page https://zalo.me/pc.gob.pe...




Text saved to output/safe/zalo.me_pc.gob.pe.txt
Downloading page https://thethao247.vn/.gob.pe...




Error when dowwloading page https://thethao247.vn/.gob.pe: 404 Client Error: Not Found for url: https://thethao247.vn/404.html
Failed to download page from https://thethao247.vn/.gob.pe
Downloading page https://www.24h.com.vn/...




Text saved to output/safe/www.24h.com.vn_.txt
Downloading page https://vtv.vn/.gob.pe...




Error when dowwloading page https://vtv.vn/.gob.pe: HTTPSConnectionPool(host='vtv.vn', port=443): Read timed out. (read timeout=10)
Failed to download page from https://vtv.vn/.gob.pe
Downloading page https://shopee.vn/.gob.pe...
Text saved to output/safe/shopee.vn_.gob.pe.txt
Downloading page https://zingnews.vn/.gob.pe...




Error when dowwloading page https://zingnews.vn/.gob.pe: 403 Client Error: Forbidden for url: https://znews.vn/.gob.pe
Failed to download page from https://zingnews.vn/.gob.pe
Downloading page https://zingnews.vn/sach-hay.html...




Text saved to output/safe/zingnews.vn_sach-hay.html.txt
Downloading page https://vnexpress.net/.gob.pe...
Error when dowwloading page https://vnexpress.net/.gob.pe: 406 Client Error: Not Acceptable for url: https://vnexpress.net/.gob.pe
Failed to download page from https://vnexpress.net/.gob.pe
Downloading page https://vnexpress.net/ha-noi-dung-cac-hoat-dong-dong-nguoi-4138237.html...




Text saved to output/safe/vnexpress.net_ha-noi-dung-cac-hoat-dong-dong-nguoi-4138237.html.txt
Downloading page https://thanhnien.vn/.gob.pe...
Error when dowwloading page https://thanhnien.vn/.gob.pe: 403 Client Error: Forbidden for url: https://thanhnien.vn/.gob.pe
Failed to download page from https://thanhnien.vn/.gob.pe
Downloading page https://bongdanet.vn/.gob.pe...




Error when dowwloading page https://bongdanet.vn/.gob.pe: HTTPSConnectionPool(host='bongdanet.vn', port=443): Max retries exceeded with url: /.gob.pe (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e14839d73a0>: Failed to resolve 'bongdanet.vn' ([Errno -2] Name or service not known)"))
Failed to download page from https://bongdanet.vn/.gob.pe
Downloading page https://www.lazada.vn/...
Text saved to output/safe/www.lazada.vn_.txt
Downloading page https://www.nguyenkim.com/...




Text saved to output/safe/www.nguyenkim.com_.txt
Downloading page https://thanhnien.vn/suc-khoe/chang-trai-roi-tu-tang-2-dam-vao-hang-rao-sat-duoc-cuu-song-1257855.html...




Text saved to output/safe/thanhnien.vn_suc-khoe_chang-trai-roi-tu-tang-2-dam-vao-hang-rao-sat-duoc-cuu-song-1257855.html.txt
Downloading page https://www.2banh.vn/...




Text saved to output/safe/www.2banh.vn_.txt
Downloading page https://www.thuocbietduoc.com.vn/home/...




Text saved to output/safe/www.thuocbietduoc.com.vn_home_.txt
Downloading page http://xemphimplus.net/.gob.pe...
No valid text from http://xemphimplus.net/.gob.pe
Downloading page https://soha.vn/.gob.pe...




Error when dowwloading page https://soha.vn/.gob.pe: 403 Client Error: Forbidden for url: https://soha.vn/.gob.pe
Failed to download page from https://soha.vn/.gob.pe
Downloading page https://cafef.vn/.gob.pe...
Error when dowwloading page https://cafef.vn/.gob.pe: 403 Client Error: Forbidden for url: https://cafef.vn/.gob.pe
Failed to download page from https://cafef.vn/.gob.pe
Downloading page https://taimienphi.vn/.gob.pe...




Error when dowwloading page https://taimienphi.vn/.gob.pe: 404 Client Error: Not Found for url: https://taimienphi.vn/.gob.pe
Failed to download page from https://taimienphi.vn/.gob.pe
Downloading page http://www.nettruyen.com/...
Error when dowwloading page http://www.nettruyen.com/: HTTPConnectionPool(host='www.nettruyen.com', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14830bcb20>: Failed to resolve 'www.nettruyen.com' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.nettruyen.com/
Downloading page https://tinhte.vn/.gob.pe...




Error when dowwloading page https://tinhte.vn/.gob.pe: 404 Client Error: Not Found for url: https://tinhte.vn/.gob.pe
Failed to download page from https://tinhte.vn/.gob.pe
Downloading page https://91.com.vn/...
Error when dowwloading page https://91.com.vn/: HTTPSConnectionPool(host='91.com.vn', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e14830be0b0>, 'Connection to 91.com.vn timed out. (connect timeout=10)'))
Failed to download page from https://91.com.vn/
Downloading page https://ngoisao.net/.gob.pe...




Error when dowwloading page https://ngoisao.net/.gob.pe: 406 Client Error: Not Acceptable for url: https://ngoisao.vnexpress.net/.gob.pe
Failed to download page from https://ngoisao.net/.gob.pe
Downloading page https://zingmp3.vn/.gob.pe...




Text saved to output/safe/zingmp3.vn_.gob.pe.txt
Downloading page https://petrotimes.vn/.gob.pe...
Error when dowwloading page https://petrotimes.vn/.gob.pe: 404 Client Error: Not Found for url: https://petrotimes.vn/.gob.pe
Failed to download page from https://petrotimes.vn/.gob.pe
Downloading page https://quantrimang.com/.gob.pe...




Error when dowwloading page https://quantrimang.com/.gob.pe: 400 Client Error: Bad Request for url: https://quantrimang.com/s?q=gob
Failed to download page from https://quantrimang.com/.gob.pe
Downloading page https://portal.vietcombank.com.vn/Pages/Home.aspx?devicechannel=default...




Text saved to output/safe/portal.vietcombank.com.vn_Pages_Home.aspx?devicechannel=default.txt
Downloading page https://www.sendo.vn/...




Text saved to output/safe/www.sendo.vn_.txt
Downloading page https://bomboxtv.com/.gob.pe...
Error when dowwloading page https://bomboxtv.com/.gob.pe: HTTPSConnectionPool(host='bomboxtv.com', port=443): Max retries exceeded with url: /.gob.pe (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e148408d390>: Failed to resolve 'bomboxtv.com' ([Errno -2] Name or service not known)"))
Failed to download page from https://bomboxtv.com/.gob.pe
Downloading page https://coccoc.com/.gob.pe...




Error when dowwloading page https://coccoc.com/.gob.pe: 404 Client Error: Not Found for url: https://coccoc.com/.gob.pe
Failed to download page from https://coccoc.com/.gob.pe
Downloading page https://www.nhaccuatui.com/...




Text saved to output/safe/www.nhaccuatui.com_.txt
Downloading page https://www.yes24.vn/...
Text saved to output/safe/www.yes24.vn_.txt
Downloading page https://nld.com.vn/...




Text saved to output/safe/nld.com.vn_.txt
Downloading page http://accesstrade.vn/.gob.pe...




Text saved to output/safe/accesstrade.vn_.gob.pe.txt
Downloading page https://afamily.vn/.gob.pe...




Error when dowwloading page https://afamily.vn/.gob.pe: 403 Client Error: Forbidden for url: https://afamily.vn/.gob.pe
Failed to download page from https://afamily.vn/.gob.pe
Downloading page https://voz.vn/.gob.pe...
Error when dowwloading page https://voz.vn/.gob.pe: 403 Client Error: Forbidden for url: https://voz.vn/.gob.pe
Failed to download page from https://voz.vn/.gob.pe
Downloading page https://plo.vn/.gob.pe...




Error when dowwloading page https://plo.vn/.gob.pe: 404 Client Error: Not Found for url: https://plo.vn/404
Failed to download page from https://plo.vn/.gob.pe
Downloading page https://kienthuc.net.vn/...




Text saved to output/safe/kienthuc.net.vn_.txt
Downloading page http://www.bongda.com.vn/...




Text saved to output/safe/www.bongda.com.vn_.txt
Downloading page https://www.chotot.com/...
Text saved to output/safe/www.chotot.com_.txt
Downloading page https://thuvienphapluat.vn/.gob.pe...
Error when dowwloading page https://thuvienphapluat.vn/.gob.pe: 403 Client Error: Forbidden for url: https://thuvienphapluat.vn/.gob.pe
Failed to download page from https://thuvienphapluat.vn/.gob.pe
Downloading page https://download.com.vn/...




Text saved to output/safe/download.com.vn_.txt
Downloading page https://www.tienphong.vn/...




Text saved to output/safe/www.tienphong.vn_.txt
Downloading page https://vietjack.com/.gob.pe...




Error when dowwloading page https://vietjack.com/.gob.pe: 404 Client Error:  for url: https://vietjack.com/.gob.pe
Failed to download page from https://vietjack.com/.gob.pe
Downloading page https://vndoc.com/.gob.pe...
Error when dowwloading page https://vndoc.com/.gob.pe: 400 Client Error: Bad Request for url: https://vndoc.com/s?q=gob
Failed to download page from https://vndoc.com/.gob.pe
Downloading page https://gamek.vn/.gob.pe...




Error when dowwloading page https://gamek.vn/.gob.pe: 404 Client Error: Not Found for url: https://gamek.vn/.gob.pe
Failed to download page from https://gamek.vn/.gob.pe
Downloading page https://baodautu.vn/.gob.pe...
Error when dowwloading page https://baodautu.vn/.gob.pe: 404 Client Error: Not Found for url: https://baodautu.vn/.gob.pe
Failed to download page from https://baodautu.vn/.gob.pe
Downloading page https://saostar.vn/.gob.pe...




Error when dowwloading page https://saostar.vn/.gob.pe: 403 Client Error: Forbidden for url: https://www.saostar.vn/.gob.pe
Failed to download page from https://saostar.vn/.gob.pe
Downloading page https://fptshop.com.vn/...
Error when dowwloading page https://fptshop.com.vn/: 403 Client Error: Forbidden for url: https://fptshop.com.vn/
Failed to download page from https://fptshop.com.vn/
Downloading page http://bongdaso.com/main.aspx...
Text saved to output/safe/bongdaso.com_main.aspx.txt
Downloading page https://docbao.vn/.gob.pe...




Error when dowwloading page https://docbao.vn/.gob.pe: 404 Client Error: Not Found for url: https://docnhanh.vn/.gob.pe
Failed to download page from https://docbao.vn/.gob.pe
Downloading page https://vov.vn/.gob.pe...




Error when dowwloading page https://vov.vn/.gob.pe: 403 Client Error: Forbidden for url: https://vov.vn/.gob.pe
Failed to download page from https://vov.vn/.gob.pe
Downloading page https://batdongsan.com.vn/...
Error when dowwloading page https://batdongsan.com.vn/: 403 Client Error: Forbidden for url: https://batdongsan.com.vn/
Failed to download page from https://batdongsan.com.vn/
Downloading page http://xemvtvnet.net/.gob.pe...
Error when dowwloading page http://xemvtvnet.net/.gob.pe: HTTPConnectionPool(host='xemvtvnet.net', port=80): Max retries exceeded with url: /.gob.pe (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1486ad6ec0>: Failed to resolve 'xemvtvnet.net' ([Errno -2] Name or service not known)"))
Failed to download page from http://xemvtvnet.net/.gob.pe
Downloading page https://www.yan.vn/...




Error when dowwloading page https://www.yan.vn/: 403 Client Error: Forbidden for url: https://www.yan.vn/
Failed to download page from https://www.yan.vn/
Downloading page https://truyenfull.vn/.gob.pe...




Error when dowwloading page https://truyenfull.vn/.gob.pe: 404 Client Error: Not Found for url: https://truyenfull.io/.gob.pe/
Failed to download page from https://truyenfull.vn/.gob.pe
Downloading page https://fptplay.vn/.gob.pe...




Error when dowwloading page https://fptplay.vn/.gob.pe: 404 Client Error: Not Found for url: https://fptplay.vn/.gob.pe
Failed to download page from https://fptplay.vn/.gob.pe
Downloading page https://bongdaplus.vn/.gob.pe...




Text saved to output/safe/bongdaplus.vn_.gob.pe.txt
Downloading page https://phongvu.vn/.gob.pe...




Error when dowwloading page https://phongvu.vn/.gob.pe: 404 Client Error: Not Found for url: https://phongvu.vn/.gob.pe
Failed to download page from https://phongvu.vn/.gob.pe
Downloading page https://cafebiz.vn/.gob.pe...




Error when dowwloading page https://cafebiz.vn/.gob.pe: 403 Client Error: Forbidden for url: https://cafebiz.vn/.gob.pe
Failed to download page from https://cafebiz.vn/.gob.pe
Downloading page https://www.vietnamworks.com/...
Text saved to output/safe/www.vietnamworks.com_.txt
Downloading page https://luatvietnam.vn/.gob.pe...
Error when dowwloading page https://luatvietnam.vn/.gob.pe: 403 Client Error: Forbidden for url: https://luatvietnam.vn/.gob.pe
Failed to download page from https://luatvietnam.vn/.gob.pe
Downloading page https://cellphones.com.vn/...




Text saved to output/safe/cellphones.com.vn_.txt
Downloading page https://genk.vn/.gob.pe...




Error when dowwloading page https://genk.vn/.gob.pe: 403 Client Error: Forbidden for url: https://genk.vn/.gob.pe
Failed to download page from https://genk.vn/.gob.pe
Downloading page https://tinnhanhchungkhoan.vn/.gob.pe...




Error when dowwloading page https://tinnhanhchungkhoan.vn/.gob.pe: 404 Client Error: Not Found for url: https://www.tinnhanhchungkhoan.vn/.gob.pe
Failed to download page from https://tinnhanhchungkhoan.vn/.gob.pe
Downloading page https://www.vtvgiaitri.vn/...
Error when dowwloading page https://www.vtvgiaitri.vn/: HTTPSConnectionPool(host='www.vtvgiaitri.vn', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7e14842f5c00>: Failed to establish a new connection: [Errno 111] Connection refused'))
Failed to download page from https://www.vtvgiaitri.vn/
Downloading page https://www.otofun.net/forums/...




Text saved to output/safe/www.otofun.net_forums_.txt
Downloading page https://www.garena.vn/...




Text saved to output/safe/www.garena.vn_.txt
Downloading page https://baoxaydung.com.vn/...
Text saved to output/safe/baoxaydung.com.vn_.txt
Downloading page https://www.vietnamplus.vn/...




Text saved to output/safe/www.vietnamplus.vn_.txt
Downloading page http://truyenqq.com/.gob.pe...
Error when dowwloading page http://truyenqq.com/.gob.pe: 403 Client Error: Forbidden for url: http://truyenqq.com/.gob.pe
Failed to download page from http://truyenqq.com/.gob.pe
Downloading page https://www.vietjetair.com/Sites/Web/vi-VN/Home...
Text saved to output/safe/www.vietjetair.com_Sites_Web_vi-VN_Home.txt
Downloading page https://vietstock.vn/.gob.pe...




Error when dowwloading page https://vietstock.vn/.gob.pe: 403 Client Error: Forbidden for url: https://vietstock.vn/.gob.pe
Failed to download page from https://vietstock.vn/.gob.pe
Downloading page https://www.topcv.vn/...
Error when dowwloading page https://www.topcv.vn/: 403 Client Error: Forbidden for url: https://www.topcv.vn/
Failed to download page from https://www.topcv.vn/
Downloading page https://vtc.vn/.gob.pe...




Error when dowwloading page https://vtc.vn/.gob.pe: 404 Client Error: Not Found for url: https://vtcnews.vn/error.html
Failed to download page from https://vtc.vn/.gob.pe
Downloading page https://baodatviet.vn/.gob.pe...
Error when dowwloading page https://baodatviet.vn/.gob.pe: 404 Client Error: Not Found for url: https://baodatviet.vn/.gob.pe
Failed to download page from https://baodatviet.vn/.gob.pe
Downloading page http://vinmec.com/vi/.gob.pe...




Error when dowwloading page http://vinmec.com/vi/.gob.pe: 404 Client Error: Not Found for url: https://www.vinmec.com/vie/khong-tim-thay/
Failed to download page from http://vinmec.com/vi/.gob.pe
Downloading page https://careerbuilder.vn/.gob.pe...




Error when dowwloading page https://careerbuilder.vn/.gob.pe: 410 Client Error: Gone for url: https://careerviet.vn/.gob.pe
Failed to download page from https://careerbuilder.vn/.gob.pe
Downloading page https://viettel.vn/.gob.pe...




Error when dowwloading page https://viettel.vn/.gob.pe: 404 Client Error: Not Found for url: https://viettel.vn/.gob.pe
Failed to download page from https://viettel.vn/.gob.pe
Downloading page http://fanpage.gr/family/gineka/%ce%b1%cf%85%cf%84%ce%bf%ce%af-%ce%b5%ce%af%ce%bd%ce%b1%ce%b9-%ce%bf%ce%b9-13-%cf%89%cf%81%ce%b1%ce%b9%cf%8c%cf%84%ce%b5%cf%81%ce%bf%ce%b9-%ce%ad%ce%bb%ce%bb%ce%b7%ce%bd%ce%b5%cf%82-%ce%b1%ce%bd-2/.gob.pe...




Text saved to output/safe/fanpage.gr_family_gineka_%ce%b1%cf%85%cf%84%ce%bf%ce%af-%ce%b5%ce%af%ce%bd%ce%b1%ce%b9-%ce%bf%ce%b9-13-%cf%89%cf%81%ce%b1%ce%b9%cf%8c%cf%84%ce%b5%cf%81%ce%bf%ce%b9-%ce%ad%ce%bb%ce%bb%ce%b7%ce%bd%ce%b5.txt
Downloading page http://worldoftanks.ru/ru/content/guide/payments_instruction/mobile-payments-rostelekom-ural-utel/.gob.pe...




Text saved to output/safe/worldoftanks.ru_ru_content_guide_payments_instruction_mobile-payments-rostelekom-ural-utel_.gob.pe.txt
Downloading page https://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%A2%E3%83%8B%E3%83%A1%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FwjfL4fhPp1lPxTZb+%E3%82%A2%E3%83%AA%E3%82%B5%E5%86%8D%E3%81%B3%EF%BC%81%E3%82%A8%E3%83%AD%E3%82%A2%E3%83%8B%E3%83%A1+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%A2%E3%83%8B%E3%83%A1%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FwjfL4fhPp1lPxTZb+%E3%82%A2%E3%83%AA%E3%82%B5%E5%86%8D%E3%81%B.txt
Downloading page http://correios.com.br/para-sua-empresa/comunicacao/certificados-digitais/ajuda-interativa/informacoes-sobre-a-baixa-dos-certificados-a1...




Text saved to output/safe/correios.com.br_para-sua-empresa_comunicacao_certificados-digitais_ajuda-interativa_informacoes-sobre-a-baixa-dos-certificados-a1.txt
Downloading page http://qz.com/403774/quartz-daily-brief-japanese-military-verizon-aol-sporty-branson-saucy-swedes/.gob.pe...




Error when dowwloading page http://qz.com/403774/quartz-daily-brief-japanese-military-verizon-aol-sporty-branson-saucy-swedes/.gob.pe: 404 Client Error: Not Found for url: https://qz.com/403774/quartz-daily-brief-japanese-military-verizon-aol-sporty-branson-saucy-swedes/.gob.pe
Failed to download page from http://qz.com/403774/quartz-daily-brief-japanese-military-verizon-aol-sporty-branson-saucy-swedes/.gob.pe
Downloading page http://mylust.com/videos/232790/hentai-slut-with-big-juicy-tits-gets-fucked-doggy-style/.gob.pe...




Error when dowwloading page http://mylust.com/videos/232790/hentai-slut-with-big-juicy-tits-gets-fucked-doggy-style/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/232790/hentai-slut-with-big-juicy-tits-gets-fucked-doggy-style/.gob.pe
Failed to download page from http://mylust.com/videos/232790/hentai-slut-with-big-juicy-tits-gets-fucked-doggy-style/.gob.pe
Downloading page http://mic.com/articles/117758/mtv-just-gave-these-artists-a-huge-platform-to-bring-attention-to-indigenous-rights.gob.pe...




Error when dowwloading page http://mic.com/articles/117758/mtv-just-gave-these-artists-a-huge-platform-to-bring-attention-to-indigenous-rights.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/117758/mtv-just-gave-these-artists-a-huge-platform-to-bring-attention-to-indigenous-rights.gob.pe
Failed to download page from http://mic.com/articles/117758/mtv-just-gave-these-artists-a-huge-platform-to-bring-attention-to-indigenous-rights.gob.pe
Downloading page http://grantland.com/the-triangle/2015-mlb-over-under-bets-chicago-cubs-houston-astros-oakland-as-tampa-bay-rays/.gob.pe...




Error when dowwloading page http://grantland.com/the-triangle/2015-mlb-over-under-bets-chicago-cubs-houston-astros-oakland-as-tampa-bay-rays/.gob.pe: 404 Client Error: Not Found for url: https://grantland.com/the-triangle/2015-mlb-over-under-bets-chicago-cubs-houston-astros-oakland-as-tampa-bay-rays/.gob.pe
Failed to download page from http://grantland.com/the-triangle/2015-mlb-over-under-bets-chicago-cubs-houston-astros-oakland-as-tampa-bay-rays/.gob.pe
Downloading page http://noticias.uol.com.br/album/bbc/2015/04/01/obras-de-anel-rodoviario-de-r-68-bilhoes-afetam-rios-e-nascentes-em-sp.htm...
Error when dowwloading page http://noticias.uol.com.br/album/bbc/2015/04/01/obras-de-anel-rodoviario-de-r-68-bilhoes-afetam-rios-e-nascentes-em-sp.htm: HTTPConnectionPool(host='noticias.uol.com.br', port=80): Max retries exceeded with url: /album/bbc/2015/04/01/obras-de-anel-rodoviario-de-r-68-bilhoes-afetam-rios-e-nascentes-em-sp.htm (Caused by NameResolutionError("<urllib3.connection.HTTPConne



Text saved to output/safe/kienthuc.net.vn_diem-thi_diem-chuan-dai-hoc-bach-khoa-ha-noi-nam-2014-487293.html.txt
Downloading page http://pikabu.ru/tag/%D0%BC%D0%B8%D0%BD%D0%B8%D0%BC%D0%B0%D0%BB%D0%B8%D0%B7%D0%BC/hot.gob.pe...




Error when dowwloading page http://pikabu.ru/tag/%D0%BC%D0%B8%D0%BD%D0%B8%D0%BC%D0%B0%D0%BB%D0%B8%D0%B7%D0%BC/hot.gob.pe: 502 Server Error: Bad Gateway for url: https://pikabu.ru/tag/%D0%BC%D0%B8%D0%BD%D0%B8%D0%BC%D0%B0%D0%BB%D0%B8%D0%B7%D0%BC/hot.gob.pe
Failed to download page from http://pikabu.ru/tag/%D0%BC%D0%B8%D0%BD%D0%B8%D0%BC%D0%B0%D0%BB%D0%B8%D0%B7%D0%BC/hot.gob.pe
Downloading page http://gizmodo.com/one-year-after-the-first-snowden-leak-whats-really-cha-1586213046/all.gob.pe...




Error when dowwloading page http://gizmodo.com/one-year-after-the-first-snowden-leak-whats-really-cha-1586213046/all.gob.pe: 404 Client Error: Not Found for url: https://gizmodo.com/one-year-after-the-first-snowden-leak-whats-really-cha-1586213046/all.gob.pe
Failed to download page from http://gizmodo.com/one-year-after-the-first-snowden-leak-whats-really-cha-1586213046/all.gob.pe
Downloading page http://uproxx.com/dimemag/2015/05/lebron-instagram-like-wants-to-leave-school-to-be-model-she-shouldnt/.gob.pe...




Text saved to output/safe/uproxx.com_dimemag_2015_05_lebron-instagram-like-wants-to-leave-school-to-be-model-she-shouldnt_.gob.pe.txt
Downloading page http://elitedaily.com/entertainment/film/uncle-wears-princess-dress-movie-niece/976238/.gob.pe...
Error when dowwloading page http://elitedaily.com/entertainment/film/uncle-wears-princess-dress-movie-niece/976238/.gob.pe: 404 Client Error: Not Found for url: https://www.elitedaily.com/entertainment/film/uncle-wears-princess-dress-movie-niece/976238/.gob.pe
Failed to download page from http://elitedaily.com/entertainment/film/uncle-wears-princess-dress-movie-niece/976238/.gob.pe
Downloading page https://medium.com/human-parts/the-laziest-coming-out-story-you-ve-ever-heard-a9f877acdd14?source=has-recommended.gob.pe...




Text saved to output/safe/medium.com_human-parts_the-laziest-coming-out-story-you-ve-ever-heard-a9f877acdd14?source=has-recommended.gob.pe.txt
Downloading page http://qz.com/371908/the-greeks-money-already-stuffed-under-mattresses-may-learn-their-fate-this-weekend/.gob.pe...




Error when dowwloading page http://qz.com/371908/the-greeks-money-already-stuffed-under-mattresses-may-learn-their-fate-this-weekend/.gob.pe: 404 Client Error: Not Found for url: https://qz.com/371908/the-greeks-money-already-stuffed-under-mattresses-may-learn-their-fate-this-weekend/.gob.pe
Failed to download page from http://qz.com/371908/the-greeks-money-already-stuffed-under-mattresses-may-learn-their-fate-this-weekend/.gob.pe
Downloading page http://mixi.jp/share.pl?u=http://alfalfalfa.com/articles/117917.html&k=e09afc106e473491952cfe324aa83aabe5b07446...




Text saved to output/safe/mixi.jp_share.pl?u=http:__alfalfalfa.com_articles_117917.html&k=e09afc106e473491952cfe324aa83aabe5b07446.txt
Downloading page http://kakaku.com/kaden/food-processor/ranking_2129/pricedown/div-gpt-ad-k/header_text.gob.pe...




Error when dowwloading page http://kakaku.com/kaden/food-processor/ranking_2129/pricedown/div-gpt-ad-k/header_text.gob.pe: 404 Client Error: Not Found. for url: https://kakaku.com/kaden/food-processor/ranking_2129/pricedown/div-gpt-ad-k/header_text.gob.pe
Failed to download page from http://kakaku.com/kaden/food-processor/ranking_2129/pricedown/div-gpt-ad-k/header_text.gob.pe
Downloading page http://mylust.com/videos/69734/a-small-cock-of-that-guy-is-still-good-for-this-vintage-whore/.gob.pe...
Error when dowwloading page http://mylust.com/videos/69734/a-small-cock-of-that-guy-is-still-good-for-this-vintage-whore/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/69734/a-small-cock-of-that-guy-is-still-good-for-this-vintage-whore/.gob.pe
Failed to download page from http://mylust.com/videos/69734/a-small-cock-of-that-guy-is-still-good-for-this-vintage-whore/.gob.pe
Downloading page http://torrentdn.com/bbs/s.php?bo_table=torrent_util&wr_id=15244&k=%ED%8F%AC%ED%86%A



Error when dowwloading page https://hollywoodlife.com/2015/05/14/new-york-rangers-beat-washington-capitals-overtime-game-7-fan-reaction/.gob.pe: 404 Client Error: Not Found for url: https://hollywoodlife.com/2015/05/14/new-york-rangers-beat-washington-capitals-overtime-game-7-fan-reaction/.gob.pe
Failed to download page from https://hollywoodlife.com/2015/05/14/new-york-rangers-beat-washington-capitals-overtime-game-7-fan-reaction/.gob.pe
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%BE%8E%E5%B0%91%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FmAsWlaomtJkLLbgk+%E6%95%99%E3%81%88%E5%AD%90%E3%81%AF%E7%BE%8E%E3%83%9E%E3%83%B3%E5%A5%B3%E5%AD%90%E6%A0%A1%E7%94%9F+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%BE%8E%E5%B0%91%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FmAsWlaomtJkLLbgk+%E6%95%99%E3%81%88%E5%AD%90%E3%81%AF%E7%BE%8.txt
Downloading page http://haberler.com/galatasaray-mersin-idman-yurdu-nu-1-0-yendi-7303426-haberi-yorumlari/.gob.pe...




Error when dowwloading page http://haberler.com/galatasaray-mersin-idman-yurdu-nu-1-0-yendi-7303426-haberi-yorumlari/.gob.pe: 404 Client Error: Not Found for url: https://www.haberler.com/galatasaray-mersin-idman-yurdu-nu-1-0-yendi-7303426-haberi/.gob.pe/
Failed to download page from http://haberler.com/galatasaray-mersin-idman-yurdu-nu-1-0-yendi-7303426-haberi-yorumlari/.gob.pe
Downloading page http://cheezburger.com/8440974848/funny-fail-photo-portrait-bobby-jindal-politics?ref=rightarrow&siteId=1264.gob.pe...




Text saved to output/safe/cheezburger.com_8440974848_funny-fail-photo-portrait-bobby-jindal-politics?ref=rightarrow&siteId=1264.gob.pe.txt
Downloading page http://dribbble.com/shots/1269523-Sperant-mobile-sidebar-concepts/attachments/174429.gob.pe...




Text saved to output/safe/dribbble.com_shots_1269523-Sperant-mobile-sidebar-concepts_attachments_174429.gob.pe.txt
Downloading page http://nguyentandung.org/141-ha-noi-phat-hien-nhanh-2-xe-di-muon-la-xe-trom-cap.html...




Text saved to output/safe/nguyentandung.org_141-ha-noi-phat-hien-nhanh-2-xe-di-muon-la-xe-trom-cap.html.txt
Downloading page http://superuser.com/questions/811630/how-can-i-limit-the-speed-of-the-chasis-fan-on-an-asus-m5a78l-m-lx-plus.gob.pe...




Text saved to output/safe/superuser.com_questions_811630_how-can-i-limit-the-speed-of-the-chasis-fan-on-an-asus-m5a78l-m-lx-plus.gob.pe.txt
Downloading page http://elitedaily.com/humor/guys-donkey-prank-women-thinking-being-catcalled-video/922767/.gob.pe...




Error when dowwloading page http://elitedaily.com/humor/guys-donkey-prank-women-thinking-being-catcalled-video/922767/.gob.pe: 404 Client Error: Not Found for url: https://www.elitedaily.com/humor/guys-donkey-prank-women-thinking-being-catcalled-video/922767/.gob.pe
Failed to download page from http://elitedaily.com/humor/guys-donkey-prank-women-thinking-being-catcalled-video/922767/.gob.pe
Downloading page http://techcrunch.com/2013/06/09/first-cut-pro-just-made-post-production-collaborative-video-editing-much-less-painful/.gob.pe...




Error when dowwloading page http://techcrunch.com/2013/06/09/first-cut-pro-just-made-post-production-collaborative-video-editing-much-less-painful/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2013/06/09/first-cut-pro-just-made-post-production-collaborative-video-editing-much-less-painful/.gob.pe
Failed to download page from http://techcrunch.com/2013/06/09/first-cut-pro-just-made-post-production-collaborative-video-editing-much-less-painful/.gob.pe
Downloading page http://mic.com/articles/97512/here-s-the-one-simple-reason-why-we-need-more-openly-bisexual-characters-on-television.gob.pe...




Error when dowwloading page http://mic.com/articles/97512/here-s-the-one-simple-reason-why-we-need-more-openly-bisexual-characters-on-television.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/97512/here-s-the-one-simple-reason-why-we-need-more-openly-bisexual-characters-on-television.gob.pe
Failed to download page from http://mic.com/articles/97512/here-s-the-one-simple-reason-why-we-need-more-openly-bisexual-characters-on-television.gob.pe
Downloading page http://ecnavi.jp/redirect/?url=http://ad-4091.affit.jp/c.ts/35n.2/-/1g.html?mu=%user_id%...




Text saved to output/safe/ecnavi.jp_redirect_?url=http:__ad-4091.affit.jp_c.ts_35n.2_-_1g.html?mu=%user_id%.txt
Downloading page http://mashable.com/category/spacex/2014/04/25/elon-musk-spacex-booster-stage-atlantic.gob.pe...




Text saved to output/safe/mashable.com_category_spacex_2014_04_25_elon-musk-spacex-booster-stage-atlantic.gob.pe.txt
Downloading page http://serverfault.com/questions/674326/remote-location-management-copy-and-install-large-software-updates-to-50-lan.gob.pe...




Text saved to output/safe/serverfault.com_questions_674326_remote-location-management-copy-and-install-large-software-updates-to-50-lan.gob.pe.txt
Downloading page http://nguyentandung.org/vu-viec-tranh-go-sua-o-ha-noi-xu-ly-cham-do-doi-interpol-tra-loi.html...




Text saved to output/safe/nguyentandung.org_vu-viec-tranh-go-sua-o-ha-noi-xu-ly-cham-do-doi-interpol-tra-loi.html.txt
Downloading page http://udn.com/news/story/7321/900837-%E9%AB%98%E9%9B%842%E8%AD%B0%E5%93%A1%E7%95%B6%E9%81%B8%E7%84%A1%E6%95%88%E4%B9%8B%E8%A8%B4-1%E5%AF%A9%E9%A7%81%E5%9B%9E.gob.pe...
No valid text from http://udn.com/news/story/7321/900837-%E9%AB%98%E9%9B%842%E8%AD%B0%E5%93%A1%E7%95%B6%E9%81%B8%E7%84%A1%E6%95%88%E4%B9%8B%E8%A8%B4-1%E5%AF%A9%E9%A7%81%E5%9B%9E.gob.pe
Downloading page https://twitter.com/share?text=%D0%A4%D0%BE%D1%82%D0%BE%D0%BF%D0%BE%D0%B4%D0%B1%D0%BE%D1%80%D0%BA%D0%B0+%D0%B7%D0%B0+29.04.2015&url=http%3A%2F%2Ffishki.net%2Fphoto%2F1517170-fotopodborka-za-29042015.html%2Fgallery-1952358%2F...




Text saved to output/safe/twitter.com_share?text=%D0%A4%D0%BE%D1%82%D0%BE%D0%BF%D0%BE%D0%B4%D0%B1%D0%BE%D1%80%D0%BA%D0%B0+%D0%B7%D0%B0+29.04.2015&url=http%3A%2F%2Ffishki.net%2Fphoto%2F1517170-fotopodborka-za-29042015.html%2Fga.txt
Downloading page http://kotaku.com/the-classic-pc-games-you-must-play-1246823468/1440923931/@kirkhamilton.gob.pe...




Text saved to output/safe/kotaku.com_the-classic-pc-games-you-must-play-1246823468_1440923931_@kirkhamilton.gob.pe.txt
Downloading page http://atwiki.jp/wiki/%E3%83%89%E3%83%A9%E3%82%B4%E3%83%B3%E3%82%BA%E3%83%89%E3%82%B0%E3%83%9E%20%E3%81%8F%E3%81%88%E3%81%99%E3%81%A8.gob.pe...




Text saved to output/safe/atwiki.jp_wiki_%E3%83%89%E3%83%A9%E3%82%B4%E3%83%B3%E3%82%BA%E3%83%89%E3%82%B0%E3%83%9E%20%E3%81%8F%E3%81%88%E3%81%99%E3%81%A8.gob.pe.txt
Downloading page http://superuser.com/questions/812639/troubleshooting-hardware-with-limited-resources.gob.pe...




Text saved to output/safe/superuser.com_questions_812639_troubleshooting-hardware-with-limited-resources.gob.pe.txt
Downloading page http://nesn.com/2014/08/tom-brady-patriots-involved-in-competitive-situations-vs-redskins-video/.gob.pe...




Error when dowwloading page http://nesn.com/2014/08/tom-brady-patriots-involved-in-competitive-situations-vs-redskins-video/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2014/08/tom-brady-patriots-involved-in-competitive-situations-vs-redskins-video/.gob.pe
Failed to download page from http://nesn.com/2014/08/tom-brady-patriots-involved-in-competitive-situations-vs-redskins-video/.gob.pe
Downloading page http://slashdot.org/submission/2011137/the-optimum-attack-rate-for-ssh-bruteforce-1-per-10-seconds.gob.pe...




Text saved to output/safe/slashdot.org_submission_2011137_the-optimum-attack-rate-for-ssh-bruteforce-1-per-10-seconds.gob.pe.txt
Downloading page http://elitedaily.com/news/world/teens-dying-wish-to-donate-organs-has-affected-more-than-50-people-video/1025896/.gob.pe...




Error when dowwloading page http://elitedaily.com/news/world/teens-dying-wish-to-donate-organs-has-affected-more-than-50-people-video/1025896/.gob.pe: 404 Client Error: Not Found for url: https://www.elitedaily.com/news/world/teens-dying-wish-to-donate-organs-has-affected-more-than-50-people-video/1025896/.gob.pe
Failed to download page from http://elitedaily.com/news/world/teens-dying-wish-to-donate-organs-has-affected-more-than-50-people-video/1025896/.gob.pe
Downloading page http://mic.com/articles/89365/columbia-students-are-taking-a-radical-step-to-combat-rape.gob.pe...




Error when dowwloading page http://mic.com/articles/89365/columbia-students-are-taking-a-radical-step-to-combat-rape.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/89365/columbia-students-are-taking-a-radical-step-to-combat-rape.gob.pe
Failed to download page from http://mic.com/articles/89365/columbia-students-are-taking-a-radical-step-to-combat-rape.gob.pe
Downloading page http://variety.com/2015/tv/features/x-men-spinoff-fault-in-our-stars-director-1201495091/.gob.pe...




Text saved to output/safe/variety.com_2015_tv_features_x-men-spinoff-fault-in-our-stars-director-1201495091_.gob.pe.txt
Downloading page http://mylust.com/videos/174352/just-a-blonde-busty-girlfriend-wants-to-show-off-her-skills/.gob.pe...




Error when dowwloading page http://mylust.com/videos/174352/just-a-blonde-busty-girlfriend-wants-to-show-off-her-skills/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/174352/just-a-blonde-busty-girlfriend-wants-to-show-off-her-skills/.gob.pe
Failed to download page from http://mylust.com/videos/174352/just-a-blonde-busty-girlfriend-wants-to-show-off-her-skills/.gob.pe
Downloading page http://indianexpress.com/article/india/india-others/this-one-was-big-but-the-big-one-is-yet-to-come-say-experts/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/india/india-others/this-one-was-big-but-the-big-one-is-yet-to-come-say-experts/.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/india/india-others/this-one-was-big-but-the-big-one-is-yet-to-come-say-experts/.gob.pe
Failed to download page from http://indianexpress.com/article/india/india-others/this-one-was-big-but-the-big-one-is-yet-to-come-say-experts/.gob.pe
Downloadi



Error when dowwloading page http://mic.com/articles/117566/the-how-to-spot-afeminist-twitter-trend-shows-how-little-conservatives-understand-feminism.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/117566/the-how-to-spot-afeminist-twitter-trend-shows-how-little-conservatives-understand-feminism.gob.pe
Failed to download page from http://mic.com/articles/117566/the-how-to-spot-afeminist-twitter-trend-shows-how-little-conservatives-understand-feminism.gob.pe
Downloading page http://mashable.com/category/small-business/2015/03/25/equity-crowdfunding-sec-vote.gob.pe...




Text saved to output/safe/mashable.com_category_small-business_2015_03_25_equity-crowdfunding-sec-vote.gob.pe.txt
Downloading page http://nguyentandung.org/cnn-chi-co-viet-nam-hanh-xu-dung-muc-va-hoa-binh-tren-bien-dong.html...




Text saved to output/safe/nguyentandung.org_cnn-chi-co-viet-nam-hanh-xu-dung-muc-va-hoa-binh-tren-bien-dong.html.txt
Downloading page http://qz.com/391797/china-is-building-a-great-wall-of-trees-to-fight-climate-change-and-the-encroaching-gobi-desert/.gob.pe...




Error when dowwloading page http://qz.com/391797/china-is-building-a-great-wall-of-trees-to-fight-climate-change-and-the-encroaching-gobi-desert/.gob.pe: 404 Client Error: Not Found for url: https://qz.com/391797/china-is-building-a-great-wall-of-trees-to-fight-climate-change-and-the-encroaching-gobi-desert/.gob.pe
Failed to download page from http://qz.com/391797/china-is-building-a-great-wall-of-trees-to-fight-climate-change-and-the-encroaching-gobi-desert/.gob.pe
Downloading page http://mic.com/articles/115884/stoners-rejoice-mc-donald-s-is-making-your-all-day-breakfast-dreams-come-true.gob.pe...




Error when dowwloading page http://mic.com/articles/115884/stoners-rejoice-mc-donald-s-is-making-your-all-day-breakfast-dreams-come-true.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/115884/stoners-rejoice-mc-donald-s-is-making-your-all-day-breakfast-dreams-come-true.gob.pe
Failed to download page from http://mic.com/articles/115884/stoners-rejoice-mc-donald-s-is-making-your-all-day-breakfast-dreams-come-true.gob.pe
Downloading page http://rocketnews24.com/tag/%e3%83%8f%e3%83%aa%e3%83%bc%e3%83%bb%e3%83%9d%e3%83%83%e3%82%bf%e3%83%bc/.gob.pe...




Error when dowwloading page http://rocketnews24.com/tag/%e3%83%8f%e3%83%aa%e3%83%bc%e3%83%bb%e3%83%9d%e3%83%83%e3%82%bf%e3%83%bc/.gob.pe: 404 Client Error: Not Found for url: https://rocketnews24.com/tag/%E3%83%8F%E3%83%AA%E3%83%BC%E3%83%BB%E3%83%9D%E3%83%83%E3%82%BF%E3%83%BC/.gob.pe
Failed to download page from http://rocketnews24.com/tag/%e3%83%8f%e3%83%aa%e3%83%bc%e3%83%bb%e3%83%9d%e3%83%83%e3%82%bf%e3%83%bc/.gob.pe
Downloading page http://500px.com/photo/84057263/%C3%89glise-notre-dame-de-pellevoisin-de-lille-by-p-olivier?from=upcoming&only=City+%26+Architecture.gob.pe...




Text saved to output/safe/500px.com_photo_84057263_%C3%89glise-notre-dame-de-pellevoisin-de-lille-by-p-olivier?from=upcoming&only=City+%26+Architecture.gob.pe.txt
Downloading page http://depositphotos.com/search.html?st=0&image=1&vector=0&video=0&sorting=best_match&editorial_only=1&editorial_exclude=0&orientation=all&search_size=all&limit=60&nudity=0&query=shirts...




Text saved to output/safe/depositphotos.com_search.html?st=0&image=1&vector=0&video=0&sorting=best_match&editorial_only=1&editorial_exclude=0&orientation=all&search_size=all&limit=60&nudity=0&query=shirts.txt
Downloading page http://mylust.com/videos/69367/busty-girlfriend-humps-passionately-on-a-hard-dick-of-her-beloved-bf/.gob.pe...




Error when dowwloading page http://mylust.com/videos/69367/busty-girlfriend-humps-passionately-on-a-hard-dick-of-her-beloved-bf/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/69367/busty-girlfriend-humps-passionately-on-a-hard-dick-of-her-beloved-bf/.gob.pe
Failed to download page from http://mylust.com/videos/69367/busty-girlfriend-humps-passionately-on-a-hard-dick-of-her-beloved-bf/.gob.pe
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%B3%E3%82%B9%E3%83%97%E3%83%AC%E3%83%BB%E5%88%B6%E6%9C%8D%E7%B3%BB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2Fpg64Fu5upcjytRzR+%E6%B7%AB%E4%B9%B1%E3%82%B9%E3%83%83%E3%83%81%E3%83%BC%E6%9C%AC%E6%B0%97%E5%BA%A6%EF%BC%91%EF%BC%90%EF%BC%90%EF%BC%85%E3%81%94%E5%A5%89%E4%BB%95+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%B3%E3%82%B9%E3%83%97%E3%83%AC%E3%83%BB%E5%88%B6%E6%9C%8D%E7%B3%BB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2Fpg64Fu5upcjytRzR.txt
Downloading page http://stackoverflow.com/questions/30226137/how-to-get-the-check-rule-constraint?answertab=active.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_30226137_how-to-get-the-check-rule-constraint?answertab=active.gob.pe.txt
Downloading page http://variety.com/2015/scene/vpage/dakota-fanning-talks-corsets-courage-and-collaborating-with-emma-thompson-on-effie-gray-1201463443/.gob.pe...




Text saved to output/safe/variety.com_2015_scene_vpage_dakota-fanning-talks-corsets-courage-and-collaborating-with-emma-thompson-on-effie-gray-1201463443_.gob.pe.txt
Downloading page http://thechive.com/2015/05/13/the-indulgent-treats-you-can-make-within-a-muffin-tin-20-photos/.gob.pe...




Error when dowwloading page http://thechive.com/2015/05/13/the-indulgent-treats-you-can-make-within-a-muffin-tin-20-photos/.gob.pe: 404 Client Error: Not Found for url: https://thechive.com/2015/05/13/the-indulgent-treats-you-can-make-within-a-muffin-tin-20-photos/.gob.pe
Failed to download page from http://thechive.com/2015/05/13/the-indulgent-treats-you-can-make-within-a-muffin-tin-20-photos/.gob.pe
Downloading page http://rocketnews24.com/2009/05/14/%e3%80%8e%e6%97%a5%e6%9c%ac%e6%b5%b7%e8%a1%a8%e8%a8%98%e3%81%afnyt%e3%81%ae%e9%81%8e%e3%81%a1%e5%ba%83%e5%91%8a%e3%80%8f-%e3%81%8c%e9%ba%bb%e7%94%9f%e9%a6%96%e7%9b%b8%e3%82%92%e5%a7%8b%e3%82%81-192/.gob.pe...




Error when dowwloading page http://rocketnews24.com/2009/05/14/%e3%80%8e%e6%97%a5%e6%9c%ac%e6%b5%b7%e8%a1%a8%e8%a8%98%e3%81%afnyt%e3%81%ae%e9%81%8e%e3%81%a1%e5%ba%83%e5%91%8a%e3%80%8f-%e3%81%8c%e9%ba%bb%e7%94%9f%e9%a6%96%e7%9b%b8%e3%82%92%e5%a7%8b%e3%82%81-192/.gob.pe: 404 Client Error: Not Found for url: https://rocketnews24.com/2009/05/14/%E3%80%8E%E6%97%A5%E6%9C%AC%E6%B5%B7%E8%A1%A8%E8%A8%98%E3%81%AFnyt%E3%81%AE%E9%81%8E%E3%81%A1%E5%BA%83%E5%91%8A%E3%80%8F-%E3%81%8C%E9%BA%BB%E7%94%9F%E9%A6%96%E7%9B%B8%E3%82%92%E5%A7%8B%E3%82%81-192/.gob.pe
Failed to download page from http://rocketnews24.com/2009/05/14/%e3%80%8e%e6%97%a5%e6%9c%ac%e6%b5%b7%e8%a1%a8%e8%a8%98%e3%81%afnyt%e3%81%ae%e9%81%8e%e3%81%a1%e5%ba%83%e5%91%8a%e3%80%8f-%e3%81%8c%e9%ba%bb%e7%94%9f%e9%a6%96%e7%9b%b8%e3%82%92%e5%a7%8b%e3%82%81-192/.gob.pe
Downloading page http://serverfault.com/questions/338237/remote-desktop-server-always-showing-login-screen.gob.pe...




Text saved to output/safe/serverfault.com_questions_338237_remote-desktop-server-always-showing-login-screen.gob.pe.txt
Downloading page http://stackoverflow.com/questions/8009467/how-to-replace-an-item-between-two-delimiters-in-textwrangler.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_8009467_how-to-replace-an-item-between-two-delimiters-in-textwrangler.gob.pe.txt
Downloading page http://tunein.com/radio/Europe-r101217/15480783/ca-pub-1542925551861702/TuneInSearch.gob.pe...




Text saved to output/safe/tunein.com_radio_Europe-r101217_15480783_ca-pub-1542925551861702_TuneInSearch.gob.pe.txt
Downloading page http://techcrunch.com/2015/05/12/mozilla-launches-a-new-firefox-version-without-drm-support/.gob.pe...




Error when dowwloading page http://techcrunch.com/2015/05/12/mozilla-launches-a-new-firefox-version-without-drm-support/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2015/05/12/mozilla-launches-a-new-firefox-version-without-drm-support/.gob.pe
Failed to download page from http://techcrunch.com/2015/05/12/mozilla-launches-a-new-firefox-version-without-drm-support/.gob.pe
Downloading page http://mylust.com/videos/49304/slutty-arab-bbw-whore-with-nasty-teeth-kneads-her-bigguns-while-i-fuck-her-mish/.gob.pe...




Error when dowwloading page http://mylust.com/videos/49304/slutty-arab-bbw-whore-with-nasty-teeth-kneads-her-bigguns-while-i-fuck-her-mish/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/49304/slutty-arab-bbw-whore-with-nasty-teeth-kneads-her-bigguns-while-i-fuck-her-mish/.gob.pe
Failed to download page from http://mylust.com/videos/49304/slutty-arab-bbw-whore-with-nasty-teeth-kneads-her-bigguns-while-i-fuck-her-mish/.gob.pe
Downloading page https://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E5%B7%A8%E4%B9%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FXUDDbh8TIofG7VSI+%E3%83%9E%E3%83%B3%E3%82%B7%E3%83%A7%E3%83%B3%E3%81%AE%E3%83%99%E3%83%A9%E3%83%B3%E3%83%80%E3%81%8B%E3%82%89%E8%A6%97%E3%81%8D%E8%A6%8B+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E5%B7%A8%E4%B9%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FXUDDbh8TIofG7VSI+%E3%83%9E%E3%83%B3%E3%82%B7%E3%83%A7%E3%83%B3%E3%81%A.txt
Downloading page http://atwiki.jp/wiki/%E3%83%92%E3%83%8E%20%E7%A9%BA%E6%8A%98%20%E6%8C%AF%E3%82%8A%E4%B8%BB%E5%82%AC%20%E3%83%AA%E3%83%97%E3%83%A9%E3%82%A4.gob.pe...




Text saved to output/safe/atwiki.jp_wiki_%E3%83%92%E3%83%8E%20%E7%A9%BA%E6%8A%98%20%E6%8C%AF%E3%82%8A%E4%B8%BB%E5%82%AC%20%E3%83%AA%E3%83%97%E3%83%A9%E3%82%A4.gob.pe.txt
Downloading page http://worldoftanks.ru/ru/content/guide/account_security/fraudsters_in_social_networks_sec/.gob.pe...




Text saved to output/safe/worldoftanks.ru_ru_content_guide_account_security_fraudsters_in_social_networks_sec_.gob.pe.txt
Downloading page http://techcrunch.com/2015/04/10/psa-sorry-those-apple-watch-band-swapping-sites-arent-going-to-work/.gob.pe...




Error when dowwloading page http://techcrunch.com/2015/04/10/psa-sorry-those-apple-watch-band-swapping-sites-arent-going-to-work/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2015/04/10/psa-sorry-those-apple-watch-band-swapping-sites-arent-going-to-work/.gob.pe
Failed to download page from http://techcrunch.com/2015/04/10/psa-sorry-those-apple-watch-band-swapping-sites-arent-going-to-work/.gob.pe
Downloading page http://uproxx.com/webculture/2015/05/this-grannys-dentures-make-an-unexpected-appearance-as-she-blows-out-her-birthday-candles/.gob.pe...




Text saved to output/safe/uproxx.com_webculture_2015_05_this-grannys-dentures-make-an-unexpected-appearance-as-she-blows-out-her-birthday-candles_.gob.pe.txt
Downloading page http://khabaronline.ir/(X(1)S(gum2j0d0sm4ejnpo4x4dnrjw))/detail/416218/Politics/parties.gob.pe...




Text saved to output/safe/khabaronline.ir_(X(1)S(gum2j0d0sm4ejnpo4x4dnrjw))_detail_416218_Politics_parties.gob.pe.txt
Downloading page http://arstechnica.com/tech-policy/2015/05/house-votes-338-88-to-stop-bulk-phone-surveillance/?comments=1.gob.pe...




Text saved to output/safe/arstechnica.com_tech-policy_2015_05_house-votes-338-88-to-stop-bulk-phone-surveillance_?comments=1.gob.pe.txt
Downloading page http://aljazeera.net/news/arabic/2015/5/13/%D8%AA%D9%86%D8%B8%D9%8A%D9%85-%D8%A7%D9%84%D8%AF%D9%88%D9%84%D8%A9-%D9%8A%D9%82%D8%B5%D9%81-%D8%A8%D9%86%D8%BA%D8%A7%D8%B2%D9%8A-%D9%88%D8%A7%D9%84%D9%85%D8%B9%D8%A7%D8%B1%D9%83-%D8%AA%D8%AD%D8%AA%D8%AF%D9%85-%D8%A8%D8%A7%D9%84%D9%85%D8%AF%D9%8A%D9%86%D8%A9.gob.pe...




Text saved to output/safe/aljazeera.net_news_arabic_2015_5_13_%D8%AA%D9%86%D8%B8%D9%8A%D9%85-%D8%A7%D9%84%D8%AF%D9%88%D9%84%D8%A9-%D9%8A%D9%82%D8%B5%D9%81-%D8%A8%D9%86%D8%BA%D8%A7%D8%B2%D9%8A-%D9%88%D8%A7%D9%84%D9%85%D8%B9%D8%.txt
Downloading page http://techcrunch.com/2015/05/13/on-demand-laundry-service-cleanly-hangs-up-2-3-million-in-seed-funding/.gob.pe...




Error when dowwloading page http://techcrunch.com/2015/05/13/on-demand-laundry-service-cleanly-hangs-up-2-3-million-in-seed-funding/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2015/05/13/on-demand-laundry-service-cleanly-hangs-up-2-3-million-in-seed-funding/.gob.pe
Failed to download page from http://techcrunch.com/2015/05/13/on-demand-laundry-service-cleanly-hangs-up-2-3-million-in-seed-funding/.gob.pe
Downloading page http://metro.co.uk/2015/05/13/half-of-2014s-top-selling-games-were-open-world-5194707/...




Text saved to output/safe/metro.co.uk_2015_05_13_half-of-2014s-top-selling-games-were-open-world-5194707_.txt
Downloading page https://variety.com/2015/film/news/hugh-jackman-and-rooney-mara-to-star-in-me-and-earl-the-dying-girl-directors-next-movie-exclusive-1201494972/.gob.pe...




Text saved to output/safe/variety.com_2015_film_news_hugh-jackman-and-rooney-mara-to-star-in-me-and-earl-the-dying-girl-directors-next-movie-exclusive-1201494972_.gob.pe.txt
Downloading page http://hdfcbank.com/personal/ways-to-bank/bank-with-your-phone/mobilebanking-app-for-android.gob.pe...




Error when dowwloading page http://hdfcbank.com/personal/ways-to-bank/bank-with-your-phone/mobilebanking-app-for-android.gob.pe: 404 Client Error: Not Found for url: https://www.hdfcbank.com/personal/ways-to-bank/bank-with-your-phone/mobilebanking-app-for-android.gob.pe
Failed to download page from http://hdfcbank.com/personal/ways-to-bank/bank-with-your-phone/mobilebanking-app-for-android.gob.pe
Downloading page http://kenh14.vn/kham-pha/tai-hien-nhung-phan-ung-hoa-hoc-trong-sach-qua-chum-anh-dong-chan-thuc-20150303014533986.chn...




Text saved to output/safe/kenh14.vn_kham-pha_tai-hien-nhung-phan-ung-hoa-hoc-trong-sach-qua-chum-anh-dong-chan-thuc-20150303014533986.chn.txt
Downloading page http://kienthuc.net.vn/tin-tuc/nu-thu-khoa-mo-coi-xinh-xan-va-bai-phat-bieu-xuc-dong-487276.html...




Text saved to output/safe/kienthuc.net.vn_tin-tuc_nu-thu-khoa-mo-coi-xinh-xan-va-bai-phat-bieu-xuc-dong-487276.html.txt
Downloading page http://nguyentandung.org/xon-xao-buc-anh-bat-huong-boc-chay-hinh-phuong-hoang-lua.html...




Text saved to output/safe/nguyentandung.org_xon-xao-buc-anh-bat-huong-boc-chay-hinh-phuong-hoang-lua.html.txt
Downloading page http://web.de/magazine/sport/fussball/champions-league/fc-bayern-muenchen-fc-barcelona/fc-bayern-muenchen-fc-barcelona-mario-goetze-lionel-messi-30618192.gob.pe...




Error when dowwloading page http://web.de/magazine/sport/fussball/champions-league/fc-bayern-muenchen-fc-barcelona/fc-bayern-muenchen-fc-barcelona-mario-goetze-lionel-messi-30618192.gob.pe: 404 Client Error: 404 for url: https://web.de/magazine/sport/fussball/champions-league/fc-bayern-muenchen-fc-barcelona/fc-bayern-muenchen-fc-barcelona-mario-goetze-lionel-messi-30618192.gob.pe
Failed to download page from http://web.de/magazine/sport/fussball/champions-league/fc-bayern-muenchen-fc-barcelona/fc-bayern-muenchen-fc-barcelona-mario-goetze-lionel-messi-30618192.gob.pe
Downloading page http://kenh14.vn/2-tek/nhung-dac-diem-dang-chu-y-nhat-tren-tung-phien-ban-android-20141027125833646.chn...




Text saved to output/safe/kenh14.vn_2-tek_nhung-dac-diem-dang-chu-y-nhat-tren-tung-phien-ban-android-20141027125833646.chn.txt
Downloading page http://mashable.com/category/international-space-station/2014/01/01/iss-astronauts-earth-happy-new-year-space.gob.pe...




Text saved to output/safe/mashable.com_category_international-space-station_2014_01_01_iss-astronauts-earth-happy-new-year-space.gob.pe.txt
Downloading page http://himado.in/?keyword=%E3%82%B2%E3%83%BC%E3%83%A0%E3%82%BB%E3%83%B3%E3%82%BF%E3%83%BCCX.gob.pe...
Text saved to output/safe/himado.in_?keyword=%E3%82%B2%E3%83%BC%E3%83%A0%E3%82%BB%E3%83%B3%E3%82%BF%E3%83%BCCX.gob.pe.txt
Downloading page http://torrentdn.com/bbs/s.php?bo_table=torrent_tv&wr_id=80722&k=%EB%AA%A9%EC%86%8C%EB%A6%AC&page=1...
No valid text from http://torrentdn.com/bbs/s.php?bo_table=torrent_tv&wr_id=80722&k=%EB%AA%A9%EC%86%8C%EB%A6%AC&page=1
Downloading page http://qz.com/397085/us-airlines-still-dominate-the-global-skies-but-chinas-are-catching-up/.gob.pe...




Error when dowwloading page http://qz.com/397085/us-airlines-still-dominate-the-global-skies-but-chinas-are-catching-up/.gob.pe: 404 Client Error: Not Found for url: https://qz.com/397085/us-airlines-still-dominate-the-global-skies-but-chinas-are-catching-up/.gob.pe
Failed to download page from http://qz.com/397085/us-airlines-still-dominate-the-global-skies-but-chinas-are-catching-up/.gob.pe
Downloading page http://mic.com/articles/116819/the-20-something-life-i-m-supposed-to-have-vs-the-life-i-can-actually-afford.gob.pe...




Error when dowwloading page http://mic.com/articles/116819/the-20-something-life-i-m-supposed-to-have-vs-the-life-i-can-actually-afford.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/116819/the-20-something-life-i-m-supposed-to-have-vs-the-life-i-can-actually-afford.gob.pe
Failed to download page from http://mic.com/articles/116819/the-20-something-life-i-m-supposed-to-have-vs-the-life-i-can-actually-afford.gob.pe
Downloading page http://kenh14.vn/sport/cat-toc-moi-cong-phuong-trong-tre-ra-ca-chuc-tuoi-20150418085853499.chn...




Text saved to output/safe/kenh14.vn_sport_cat-toc-moi-cong-phuong-trong-tre-ra-ca-chuc-tuoi-20150418085853499.chn.txt
Downloading page http://pikabu.ru/story/liberastyi_vnov_obocpalis_obman_oppozitsii_o_quotbessmertnom_polkuquot_vskryilsya_3334332.gob.pe...




Text saved to output/safe/pikabu.ru_story_liberastyi_vnov_obocpalis_obman_oppozitsii_o_quotbessmertnom_polkuquot_vskryilsya_3334332.gob.pe.txt
Downloading page https://www.gov.uk/government/organisations/committee-on-radioactive-waste-management...




Text saved to output/safe/www.gov.uk_government_organisations_committee-on-radioactive-waste-management.txt
Downloading page http://udn.com/news/story/7238/901281-%E8%B2%A1%E9%83%A8%E6%AA%A2%E8%A8%8E%E5%85%AC%E9%8A%80-%E6%94%BF%E7%AD%96%E9%9D%9E%E8%97%89%E5%8F%A3.gob.pe...
No valid text from http://udn.com/news/story/7238/901281-%E8%B2%A1%E9%83%A8%E6%AA%A2%E8%A8%8E%E5%85%AC%E9%8A%80-%E6%94%BF%E7%AD%96%E9%9D%9E%E8%97%89%E5%8F%A3.gob.pe
Downloading page http://torrentdn.com/bbs/s.php?bo_table=torrent_tv&wr_id=83771&k=%EA%B5%AC%EC%97%AC%EC%B9%9C%ED%81%B4%EB%9F%BD&page=1...




No valid text from http://torrentdn.com/bbs/s.php?bo_table=torrent_tv&wr_id=83771&k=%EA%B5%AC%EC%97%AC%EC%B9%9C%ED%81%B4%EB%9F%BD&page=1
Downloading page http://tunein.com/radio/Easy-Listening-c10635888/15480783/ca-pub-1542925551861702/TuneInSearch.gob.pe...




Text saved to output/safe/tunein.com_radio_Easy-Listening-c10635888_15480783_ca-pub-1542925551861702_TuneInSearch.gob.pe.txt
Downloading page http://getpocket.com/signup?mode=socialmode&t=1&route=http://getpocket.com/p/pocket-topics/growth+hacking...




Text saved to output/safe/getpocket.com_signup?mode=socialmode&t=1&route=http:__.txt
Downloading page http://kenh14.vn/tv-show/gmtq-khuong-ngoc-xe-ao-khoe-6-mui-nhung-hat-tam-bay-20150502065432957.chn...




Text saved to output/safe/kenh14.vn_tv-show_gmtq-khuong-ngoc-xe-ao-khoe-6-mui-nhung-hat-tam-bay-20150502065432957.chn.txt
Downloading page http://grantland.com/the-triangle/dispatch-from-fight-night-the-violence-of-canelo-alvarez-no-nachos-and-a-sportswriter-who-looks-like-william-h-macy/.gob.pe...




Error when dowwloading page http://grantland.com/the-triangle/dispatch-from-fight-night-the-violence-of-canelo-alvarez-no-nachos-and-a-sportswriter-who-looks-like-william-h-macy/.gob.pe: 404 Client Error: Not Found for url: https://grantland.com/the-triangle/dispatch-from-fight-night-the-violence-of-canelo-alvarez-no-nachos-and-a-sportswriter-who-looks-like-william-h-macy/.gob.pe
Failed to download page from http://grantland.com/the-triangle/dispatch-from-fight-night-the-violence-of-canelo-alvarez-no-nachos-and-a-sportswriter-who-looks-like-william-h-macy/.gob.pe
Downloading page http://kakaku.com/camera/camera-others/ranking_1098/pricedown/div-gpt-ad-k/header_text.gob.pe...




Error when dowwloading page http://kakaku.com/camera/camera-others/ranking_1098/pricedown/div-gpt-ad-k/header_text.gob.pe: 404 Client Error: Not Found. for url: https://kakaku.com/camera/camera-others/ranking_1098/pricedown/div-gpt-ad-k/header_text.gob.pe
Failed to download page from http://kakaku.com/camera/camera-others/ranking_1098/pricedown/div-gpt-ad-k/header_text.gob.pe
Downloading page http://kenh14.vn/tv-show/thay-giao-tung-song-lang-thang-11-nam-khien-tran-thanh-cuoi-bo-20150416031611233.chn...




Text saved to output/safe/kenh14.vn_tv-show_thay-giao-tung-song-lang-thang-11-nam-khien-tran-thanh-cuoi-bo-20150416031611233.chn.txt
Downloading page http://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_13219634_easiest-way-to-check-for-an-index-or-a-key-in-an-array.gob.pe.txt
Downloading page http://correios.com.br/para-voce/consultas-e-solicitacoes/precos-e-prazos/servicos-internacionais-1/fax-post-internacional...




Text saved to output/safe/correios.com.br_para-voce_consultas-e-solicitacoes_precos-e-prazos_servicos-internacionais-1_fax-post-internacional.txt
Downloading page http://olx.ua/uk/hobbi-otdyh-i-sport/q-%D1%81%D0%BF%D0%BE%D1%80%D1%82%D0%B8%D0%B2%D0%BD%D1%8B%D0%B9-%D0%BA%D0%BE%D1%81%D1%82%D1%8E%D0%BC/.gob.pe...




Text saved to output/safe/olx.ua_uk_hobbi-otdyh-i-sport_q-%D1%81%D0%BF%D0%BE%D1%80%D1%82%D0%B8%D0%B2%D0%BD%D1%8B%D0%B9-%D0%BA%D0%BE%D1%81%D1%82%D1%8E%D0%BC_.gob.pe.txt
Downloading page http://correios.com.br/para-sua-empresa/comunicacao/certificados-digitais/ajuda-interativa/informacoes-sobre-a-baixa-dos-certificados-a3...




Text saved to output/safe/correios.com.br_para-sua-empresa_comunicacao_certificados-digitais_ajuda-interativa_informacoes-sobre-a-baixa-dos-certificados-a3.txt
Downloading page http://indianexpress.com/article/sports/football/manchester-city-edge-past-aston-villa-3-2-salvage-all-important-3-points/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/sports/football/manchester-city-edge-past-aston-villa-3-2-salvage-all-important-3-points/.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/sports/football/manchester-city-edge-past-aston-villa-3-2-salvage-all-important-3-points/.gob.pe
Failed to download page from http://indianexpress.com/article/sports/football/manchester-city-edge-past-aston-villa-3-2-salvage-all-important-3-points/.gob.pe
Downloading page http://mic.com/articles/117318/13-indie-films-to-help-you-beat-the-blockbuster-heat-this-summer.gob.pe...




Error when dowwloading page http://mic.com/articles/117318/13-indie-films-to-help-you-beat-the-blockbuster-heat-this-summer.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/117318/13-indie-films-to-help-you-beat-the-blockbuster-heat-this-summer.gob.pe
Failed to download page from http://mic.com/articles/117318/13-indie-films-to-help-you-beat-the-blockbuster-heat-this-summer.gob.pe
Downloading page http://atwiki.jp/wiki/%E3%81%A8%E3%82%82%E3%81%A0%E3%81%A1%E3%82%B3%E3%83%AC%E3%82%AF%E3%82%B7%E3%83%A7%E3%83%B3%20%E3%83%9E%E3%82%B8%E3%82%B3%E3%83%B3%E5%9B%9E%E9%81%BF.gob.pe...




Text saved to output/safe/atwiki.jp_wiki_%E3%81%A8%E3%82%82%E3%81%A0%E3%81%A1%E3%82%B3%E3%83%AC%E3%82%AF%E3%82%B7%E3%83%A7%E3%83%B3%20%E3%83%9E%E3%82%B8%E3%82%B3%E3%83%B3%E5%9B%9E%E9%81%BF.gob.pe.txt
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%B3%E3%82%B9%E3%83%97%E3%83%AC%E3%83%BB%E5%88%B6%E6%9C%8D%E7%B3%BB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FIG7zzE3yrKS0bB3r+%E3%82%A8%E3%83%AD%E3%81%84%E7%9C%8B%E8%AD%B7%E5%B8%AB+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%B3%E3%82%B9%E3%83%97%E3%83%AC%E3%83%BB%E5%88%B6%E6%9C%8D%E7%B3%BB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FIG7zzE3yrKS0bB3r.txt
Downloading page https://500px.com/photo/23303881/tignes-communication-campaign-2010-the-whip-by-tristan-shu?from=set&set_id=492385.gob.pe...




Text saved to output/safe/500px.com_photo_23303881_tignes-communication-campaign-2010-the-whip-by-tristan-shu?from=set&set_id=492385.gob.pe.txt
Downloading page http://tunein.com/radio/CBC-Radio-One-Halifax-905-s25284/15480783/ca-pub-1542925551861702/StationLeader.gob.pe...




Text saved to output/safe/tunein.com_radio_CBC-Radio-One-Halifax-905-s25284_15480783_ca-pub-1542925551861702_StationLeader.gob.pe.txt
Downloading page http://techcrunch.com/2015/03/13/this-week-on-the-tc-gadgets-podcast-apple-watch-macbook-chromebook-pixel/.gob.pe...




Error when dowwloading page http://techcrunch.com/2015/03/13/this-week-on-the-tc-gadgets-podcast-apple-watch-macbook-chromebook-pixel/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2015/03/13/this-week-on-the-tc-gadgets-podcast-apple-watch-macbook-chromebook-pixel/.gob.pe
Failed to download page from http://techcrunch.com/2015/03/13/this-week-on-the-tc-gadgets-podcast-apple-watch-macbook-chromebook-pixel/.gob.pe
Downloading page http://elitedaily.com/news/world/elon-musk-denies-shaming-employee-childbirth/1031921/.gob.pe...




Error when dowwloading page http://elitedaily.com/news/world/elon-musk-denies-shaming-employee-childbirth/1031921/.gob.pe: 404 Client Error: Not Found for url: https://www.elitedaily.com/news/world/elon-musk-denies-shaming-employee-childbirth/1031921/.gob.pe
Failed to download page from http://elitedaily.com/news/world/elon-musk-denies-shaming-employee-childbirth/1031921/.gob.pe
Downloading page http://olx.ua/uk/list/q-%d0%ba%d1%80%d0%be%d1%81%d1%81%d0%be%d0%b2%d0%ba%d0%b8/gtm.js/...




Text saved to output/safe/olx.ua_uk_list_q-%d0%ba%d1%80%d0%be%d1%81%d1%81%d0%be%d0%b2%d0%ba%d0%b8_gtm.js_.txt
Downloading page http://torrentdn.com/bbs/s.php?bo_table=torrent_variety&wr_id=113955&k=%EB%AA%A9%EC%86%8C%EB%A6%AC&page=1...
No valid text from http://torrentdn.com/bbs/s.php?bo_table=torrent_variety&wr_id=113955&k=%EB%AA%A9%EC%86%8C%EB%A6%AC&page=1
Downloading page http://superuser.com/questions/914461/network-shuts-down-when-downloading-large-file-thrue-5-0-ghz-netowrk.gob.pe...




Text saved to output/safe/superuser.com_questions_914461_network-shuts-down-when-downloading-large-file-thrue-5-0-ghz-netowrk.gob.pe.txt
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E5%B7%A8%E4%B9%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2F1KBppDcU8k2wecME+%E5%B7%A8%E4%B9%B3%E3%82%B7%E3%83%A3%E3%83%AF%E3%83%BC%E3%82%AA%E3%83%8A%E3%83%8B%E3%83%BC+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E5%B7%A8%E4%B9%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2F1KBppDcU8k2wecME+%E5%B7%A8%E4%B9%B3%E3%82%B7%E3%83%A3%E3%83%AF%E3%83%B.txt
Downloading page http://serverfault.com/questions/81605/messages-released-from-mailscanner-quarantine-are-marked-as-duplicatedeliver-in.gob.pe...




Text saved to output/safe/serverfault.com_questions_81605_messages-released-from-mailscanner-quarantine-are-marked-as-duplicatedeliver-in.gob.pe.txt
Downloading page http://auto.ru/cars/bmw/x3/all/?search%5Bstate%5D=1&search%5Bperiod%5D=0&search%5Bcustom%5D=1&search%5Bsection_id%5D=0&search%5Bmark%5D%5B0%5D=30&search%5Bmark-folder%5D%5B0%5D=30-48686_2924.gob.pe...




Text saved to output/safe/auto.ru_cars_bmw_x3_all_?search%5Bstate%5D=1&search%5Bperiod%5D=0&search%5Bcustom%5D=1&search%5Bsection_id%5D=0&search%5Bmark%5D%5B0%5D=30&search%5Bmark-folder%5D%5B0%5D=30-48686_2924.gob.pe.txt
Downloading page https://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%BE%8E%E5%B0%91%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2F5BEl03Sbd8deqTyI+%E6%B8%8B%E8%B0%B7%E3%82%8A%E3%81%AA+%E7%84%A1%E4%BF%AE%E6%AD%A3+2+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%BE%8E%E5%B0%91%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2F5BEl03Sbd8deqTyI+%E6%B8%8B%E8%B0%B7%E3%82%8A%E3%81%AA+%E7%84%.txt
Downloading page http://techcrunch.com/2013/06/10/lamoda-the-samwer-brothers-russian-online-fashion-store-snags-130m-led-by-access-industries/.gob.pe...




Error when dowwloading page http://techcrunch.com/2013/06/10/lamoda-the-samwer-brothers-russian-online-fashion-store-snags-130m-led-by-access-industries/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2013/06/10/lamoda-the-samwer-brothers-russian-online-fashion-store-snags-130m-led-by-access-industries/.gob.pe
Failed to download page from http://techcrunch.com/2013/06/10/lamoda-the-samwer-brothers-russian-online-fashion-store-snags-130m-led-by-access-industries/.gob.pe
Downloading page http://askubuntu.com/questions/623339/altough-i-installed-gsl-library-g-cannot-compile-my-code.gob.pe...




Text saved to output/safe/askubuntu.com_questions_623339_altough-i-installed-gsl-library-g-cannot-compile-my-code.gob.pe.txt
Downloading page http://comicbook.com/2015/04/24/all-avengers-age-of-ultron-trailers-and-tv-spots-merged-into-one/.gob.pe...




Error when dowwloading page http://comicbook.com/2015/04/24/all-avengers-age-of-ultron-trailers-and-tv-spots-merged-into-one/.gob.pe: 404 Client Error: Not Found for url: https://comicbook.com/2015/04/24/all-avengers-age-of-ultron-trailers-and-tv-spots-merged-into-one/.gob.pe
Failed to download page from http://comicbook.com/2015/04/24/all-avengers-age-of-ultron-trailers-and-tv-spots-merged-into-one/.gob.pe
Downloading page http://mirtesen.ru/url?e=simple_click&blog_post_id=43747285541&url=http%3A%2F%2Fvkusno.mirtesen.ru%2Fblog%2F43747285541%2FPrigotovit-kartofel-fri-bez-masla---realno...
No valid text from http://mirtesen.ru/url?e=simple_click&blog_post_id=43747285541&url=http%3A%2F%2Fvkusno.mirtesen.ru%2Fblog%2F43747285541%2FPrigotovit-kartofel-fri-bez-masla---realno
Downloading page http://couchtuner.eu.com/2013/06/baby-daddy-s2-e2-theres-something-fitchy-going-on.html...
Text saved to output/safe/couchtuner.eu.com_2013_06_baby-daddy-s2-e2-theres-something-fitchy-going-on.html.txt
D



Error when dowwloading page http://grantland.com/hollywood-prospectus/2015-oscar-nomination-predictions-best-director-best-picture/.gob.pe: 404 Client Error: Not Found for url: https://grantland.com/hollywood-prospectus/2015-oscar-nomination-predictions-best-director-best-picture/.gob.pe
Failed to download page from http://grantland.com/hollywood-prospectus/2015-oscar-nomination-predictions-best-director-best-picture/.gob.pe
Downloading page https://500px.com/photo/54002776/notre-dame-paris-by-dragox-photo-?from=set&set_id=1074128.gob.pe...




Text saved to output/safe/500px.com_photo_54002776_notre-dame-paris-by-dragox-photo-?from=set&set_id=1074128.gob.pe.txt
Downloading page http://steamcommunity.com/stats/TF2/achievements/%22http://store.steampowered.com/curators/%22...




Text saved to output/safe/steamcommunity.com_stats_TF2_achievements_%22http:__store.steampowered.com_curators_%22.txt
Downloading page http://kakaku.com/kaden/antenna-others/ranking_2058/pricedown/div-gpt-ad-k/header_text.gob.pe...




Error when dowwloading page http://kakaku.com/kaden/antenna-others/ranking_2058/pricedown/div-gpt-ad-k/header_text.gob.pe: 404 Client Error: Not Found. for url: https://kakaku.com/kaden/antenna-others/ranking_2058/pricedown/div-gpt-ad-k/header_text.gob.pe
Failed to download page from http://kakaku.com/kaden/antenna-others/ranking_2058/pricedown/div-gpt-ad-k/header_text.gob.pe
Downloading page https://500px.com/photo/9569639/flo-bastien-360-over-par-avalanche-by-tristan-shu?from=set&set_id=316271.gob.pe...
Text saved to output/safe/500px.com_photo_9569639_flo-bastien-360-over-par-avalanche-by-tristan-shu?from=set&set_id=316271.gob.pe.txt
Downloading page http://putlocker.is/watch-x-men-days-of-future-past-online-free-putlocker-852683.html...
Error when dowwloading page http://putlocker.is/watch-x-men-days-of-future-past-online-free-putlocker-852683.html: 403 Client Error: Forbidden for url: http://ww16.putlocker.is/watch-x-men-days-of-future-past-online-free-putlocker-852683.html?sub1=2



Error when dowwloading page http://qz.com/401259/bill-simmons-espns-brightest-flower-and-thorn-in-its-side-is-leaving-the-network/.gob.pe: 404 Client Error: Not Found for url: https://qz.com/401259/bill-simmons-espns-brightest-flower-and-thorn-in-its-side-is-leaving-the-network/.gob.pe
Failed to download page from http://qz.com/401259/bill-simmons-espns-brightest-flower-and-thorn-in-its-side-is-leaving-the-network/.gob.pe
Downloading page http://metro.co.uk/2015/05/13/the-oldest-working-nurse-in-america-has-just-turned-90-and-shes-a-total-inspiration-5194871/...




Text saved to output/safe/metro.co.uk_2015_05_13_the-oldest-working-nurse-in-america-has-just-turned-90-and-shes-a-total-inspiration-5194871_.txt
Downloading page http://www.nhs.uk/aboutNHSChoices/aboutnhschoices/Aboutus/Pages/Editorialpolicy.aspx...




Error when dowwloading page http://www.nhs.uk/aboutNHSChoices/aboutnhschoices/Aboutus/Pages/Editorialpolicy.aspx: 404 Client Error: Not Found for url: https://www.nhs.uk/aboutnhschoices/aboutnhschoices/aboutus/pages/editorialpolicy.aspx
Failed to download page from http://www.nhs.uk/aboutNHSChoices/aboutnhschoices/Aboutus/Pages/Editorialpolicy.aspx
Downloading page http://sourceforge.net/projects/bonita/files/latest/download?source=frontpage&position=1.gob.pe...




Text saved to output/safe/sourceforge.net_projects_bonita_files_latest_download?source=frontpage&position=1.gob.pe.txt
Downloading page http://tunein.com/radio/TamilSun-FM-s138053/15480783/ca-pub-1542925551861702/Station.gob.pe...




Text saved to output/safe/tunein.com_radio_TamilSun-FM-s138053_15480783_ca-pub-1542925551861702_Station.gob.pe.txt
Downloading page http://stackoverflow.com/questions/24731026/how-to-get-listitem-id-when-i-click-on-popup-menu-item.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_24731026_how-to-get-listitem-id-when-i-click-on-popup-menu-item.gob.pe.txt
Downloading page http://nguyentandung.org/giat-minh-voi-su-thay-doi-chong-mat-cua-khu-dong-tp-hcm.html...




Text saved to output/safe/nguyentandung.org_giat-minh-voi-su-thay-doi-chong-mat-cua-khu-dong-tp-hcm.html.txt
Downloading page http://xvideo-jp.com/archives/category/%e8%97%a4%e4%ba%95%e3%81%82%e3%81%84%e3%81%95.gob.pe...




Text saved to output/safe/xvideo-jp.com_archives_category_%e8%97%a4%e4%ba%95%e3%81%82%e3%81%84%e3%81%95.gob.pe.txt
Downloading page http://mylust.com/videos/230748/filthy-and-hot-black-hoodrat-wants-to-get-married-so-bad/.gob.pe...




Error when dowwloading page http://mylust.com/videos/230748/filthy-and-hot-black-hoodrat-wants-to-get-married-so-bad/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/230748/filthy-and-hot-black-hoodrat-wants-to-get-married-so-bad/.gob.pe
Failed to download page from http://mylust.com/videos/230748/filthy-and-hot-black-hoodrat-wants-to-get-married-so-bad/.gob.pe
Downloading page http://indianexpress.com/article/cities/mumbai/for-safe-campuses-mu-makes-self-defence-training-for-women-a-must-in-colleges/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/cities/mumbai/for-safe-campuses-mu-makes-self-defence-training-for-women-a-must-in-colleges/.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/cities/mumbai/for-safe-campuses-mu-makes-self-defence-training-for-women-a-must-in-colleges/.gob.pe
Failed to download page from http://indianexpress.com/article/cities/mumbai/for-safe-campuses-mu-makes-self-defence-training-for



Text saved to output/safe/olx.ua_uk_i2_kiev_obyavlenie_moyuschiy-pylesos-karcher-puzzi-100-super-b-u-IDdZwUE.html.txt
Downloading page http://stackoverflow.com/questions/24148956/ping-docker-container-from-another-machine-in-the-network.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_24148956_ping-docker-container-from-another-machine-in-the-network.gob.pe.txt
Downloading page http://stackoverflow.com/questions/6702846/ruby-on-rails-models-and-relationship-table.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_6702846_ruby-on-rails-models-and-relationship-table.gob.pe.txt
Downloading page http://xhamster.com/movies/2094760/very_huge_natural_tits_on_this_tiny_blonde_huge_cameltoe.html?promo=1...




Text saved to output/safe/xhamster.com_movies_2094760_very_huge_natural_tits_on_this_tiny_blonde_huge_cameltoe.html?promo=1.txt
Downloading page http://europa.eu/about-eu/agencies/regulatory_agencies_bodies/policy_agencies/emsa/index_en.htm...




Text saved to output/safe/europa.eu_about-eu_agencies_regulatory_agencies_bodies_policy_agencies_emsa_index_en.htm.txt
Downloading page http://kenh14.vn/2-tek/iphone-do-suc-cung-smartphone-android-theo-tung-tieu-chi-20141117112640637.chn...




Text saved to output/safe/kenh14.vn_2-tek_iphone-do-suc-cung-smartphone-android-theo-tung-tieu-chi-20141117112640637.chn.txt
Downloading page https://getpocket.com/signup?mode=socialmode&t=1&route=http://getpocket.com/p/pocket-topics/sports...




Text saved to output/safe/getpocket.com_signup?mode=socialmode&t=1&route=http:__.txt
Downloading page http://mic.com/articles/90567/stephen-colbert-perfectly-explains-why-we-should-all-boycott-amazon.gob.pe...




Error when dowwloading page http://mic.com/articles/90567/stephen-colbert-perfectly-explains-why-we-should-all-boycott-amazon.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/90567/stephen-colbert-perfectly-explains-why-we-should-all-boycott-amazon.gob.pe
Failed to download page from http://mic.com/articles/90567/stephen-colbert-perfectly-explains-why-we-should-all-boycott-amazon.gob.pe
Downloading page http://uproxx.com/sports/2014/09/christy-mack-posted-an-update-about-her-condition-after-war-machines-brutality/.gob.pe...




Text saved to output/safe/uproxx.com_sports_2014_09_christy-mack-posted-an-update-about-her-condition-after-war-machines-brutality_.gob.pe.txt
Downloading page http://themeforest.net/item/newsmag-news-magazine-newspaper/full_screen_preview/9512331.gob.pe...
Error when dowwloading page http://themeforest.net/item/newsmag-news-magazine-newspaper/full_screen_preview/9512331.gob.pe: 404 Client Error: Not Found for url: https://themeforest.net/item/newsmag-news-magazine-newspaper/full_screen_preview/9512331.gob.pe
Failed to download page from http://themeforest.net/item/newsmag-news-magazine-newspaper/full_screen_preview/9512331.gob.pe
Downloading page http://bestblackhatforum.com/Thread-How-To-Create-Explaindio-Slides-Video?action=lastpost.gob.pe...




Failed to download page from http://bestblackhatforum.com/Thread-How-To-Create-Explaindio-Slides-Video?action=lastpost.gob.pe
Downloading page http://tunein.com/radio/News-Talk-770-s31172/15480783/ca-pub-1542925551861702/Station.gob.pe...




Text saved to output/safe/tunein.com_radio_News-Talk-770-s31172_15480783_ca-pub-1542925551861702_Station.gob.pe.txt
Downloading page http://themeforest.net/item/avana-responsive-email-mailbuild-online/full_screen_preview/11175695.gob.pe...




Error when dowwloading page http://themeforest.net/item/avana-responsive-email-mailbuild-online/full_screen_preview/11175695.gob.pe: 404 Client Error: Not Found for url: https://themeforest.net/item/avana-responsive-email-mailbuild-online/full_screen_preview/11175695.gob.pe
Failed to download page from http://themeforest.net/item/avana-responsive-email-mailbuild-online/full_screen_preview/11175695.gob.pe
Downloading page http://metro.co.uk/2014/12/18/tory-mp-allegedly-murdered-boy-during-sex-attack-4992444/...




Text saved to output/safe/metro.co.uk_2014_12_18_tory-mp-allegedly-murdered-boy-during-sex-attack-4992444_.txt
Downloading page http://atwiki.jp/wiki/%E9%BB%92%E7%8C%AB%E3%81%AE%E3%82%A6%E3%82%A3%E3%82%BA%20%E3%83%AC%E3%83%99%E3%83%AB%20%E3%83%87%E3%83%83%E3%82%AD%E3%82%B3%E3%82%B9%E3%83%88.gob.pe...




Text saved to output/safe/atwiki.jp_wiki_%E9%BB%92%E7%8C%AB%E3%81%AE%E3%82%A6%E3%82%A3%E3%82%BA%20%E3%83%AC%E3%83%99%E3%83%AB%20%E3%83%87%E3%83%83%E3%82%AD%E3%82%B3%E3%82%B9%E3%83%88.gob.pe.txt
Downloading page http://serverfault.com/questions/364986/why-do-i-get-page-not-found-404-for-correct-links.gob.pe...




Text saved to output/safe/serverfault.com_questions_364986_why-do-i-get-page-not-found-404-for-correct-links.gob.pe.txt
Downloading page http://noticias.uol.com.br/cotidiano/ultimas-noticias/2015/05/12/para-manter-clientes-pastelaria-no-rio-anuncia-so-usamos-carne-de-vaca.htm...




Text saved to output/safe/noticias.uol.com.br_cotidiano_ultimas-noticias_2015_05_12_para-manter-clientes-pastelaria-no-rio-anuncia-so-usamos-carne-de-vaca.htm.txt
Downloading page https://getpocket.com/l?ep=3&t=1&route=http%3A%2F%2Fgetpocket.com%2Fp%2Fpocket-topics%2Ffilm...




Text saved to output/safe/getpocket.com_l?ep=3&t=1&route=http%3A%2F%2F.txt
Downloading page http://mylust.com/videos/101255/got-my-bitch-a-hardcore-amateur-gangbang-party-for-her-birthday/.gob.pe...




Error when dowwloading page http://mylust.com/videos/101255/got-my-bitch-a-hardcore-amateur-gangbang-party-for-her-birthday/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/101255/got-my-bitch-a-hardcore-amateur-gangbang-party-for-her-birthday/.gob.pe
Failed to download page from http://mylust.com/videos/101255/got-my-bitch-a-hardcore-amateur-gangbang-party-for-her-birthday/.gob.pe
Downloading page http://torrentdn.com/bbs/s.php?bo_table=torrent_mid&wr_id=59941&k=%EC%99%95%EC%A2%8C&page=1...
No valid text from http://torrentdn.com/bbs/s.php?bo_table=torrent_mid&wr_id=59941&k=%EC%99%95%EC%A2%8C&page=1
Downloading page http://noticias.uol.com.br/album/2013/07/15/naufragio-do-navio-costa-concordia.htm?fotoNavId=pr438feeac03d37c4428ca91188cf159f20150512...




Text saved to output/safe/noticias.uol.com.br_album_2013_07_15_naufragio-do-navio-costa-concordia.htm?fotoNavId=pr438feeac03d37c4428ca91188cf159f20150512.txt
Downloading page http://nesn.com/2015/05/patriots-sign-kevin-hughes-announce-kyle-arrington-fred-davis-moves/.gob.pe...




Error when dowwloading page http://nesn.com/2015/05/patriots-sign-kevin-hughes-announce-kyle-arrington-fred-davis-moves/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2015/05/patriots-sign-kevin-hughes-announce-kyle-arrington-fred-davis-moves/.gob.pe
Failed to download page from http://nesn.com/2015/05/patriots-sign-kevin-hughes-announce-kyle-arrington-fred-davis-moves/.gob.pe
Downloading page http://olx.ro/i2/electronice-si-electrocasnice/laptop-calculator/mouse-tastaturi/gtm.start...




Text saved to output/safe/olx.ro_i2_electronice-si-electrocasnice_laptop-calculator_mouse-tastaturi_gtm.start.txt
Downloading page http://stackoverflow.com/questions/1628563/move-the-most-recent-commits-to-a-new-branch-with-git.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_1628563_move-the-most-recent-commits-to-a-new-branch-with-git.gob.pe.txt
Downloading page http://nesn.com/2015/01/bruins-hurricanes-live-boston-ends-weekend-back-to-back-in-carolina/.gob.pe...




Error when dowwloading page http://nesn.com/2015/01/bruins-hurricanes-live-boston-ends-weekend-back-to-back-in-carolina/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2015/01/bruins-hurricanes-live-boston-ends-weekend-back-to-back-in-carolina/.gob.pe
Failed to download page from http://nesn.com/2015/01/bruins-hurricanes-live-boston-ends-weekend-back-to-back-in-carolina/.gob.pe
Downloading page http://uproxx.com/tv/2015/05/amy-schumer-makes-fun-of-all-those-male-geared-beer-commercials-with-a-new-parody-ad/.gob.pe...




Text saved to output/safe/uproxx.com_tv_2015_05_amy-schumer-makes-fun-of-all-those-male-geared-beer-commercials-with-a-new-parody-ad_.gob.pe.txt
Downloading page http://techcrunch.com/video/anthony-and-alex-chat-before-startup-alley-2/518805849/.gob.pe...




Error when dowwloading page http://techcrunch.com/video/anthony-and-alex-chat-before-startup-alley-2/518805849/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/video/anthony-and-alex-chat-before-startup-alley-2/518805849/.gob.pe
Failed to download page from http://techcrunch.com/video/anthony-and-alex-chat-before-startup-alley-2/518805849/.gob.pe
Downloading page http://codecanyon.net/search?date=this-month&length_max=&length_min=&price_max=&price_min=&rating_min=&sales=&sort=sales&term=&utf8=%E2%9C%93&view=list.gob.pe...




Text saved to output/safe/codecanyon.net_search?date=this-month&length_max=&length_min=&price_max=&price_min=&rating_min=&sales=&sort=sales&term=&utf8=%E2%9C%93&view=list.gob.pe.txt
Downloading page http://digg.com/video/short-claymation-reminds-us-we-all-have-a-cool-looking-skeleton-living-inside-us.gob.pe...




Error when dowwloading page http://digg.com/video/short-claymation-reminds-us-we-all-have-a-cool-looking-skeleton-living-inside-us.gob.pe: 404 Client Error: Not Found for url: https://digg.com/video/short-claymation-reminds-us-we-all-have-a-cool-looking-skeleton-living-inside-us.gob.pe
Failed to download page from http://digg.com/video/short-claymation-reminds-us-we-all-have-a-cool-looking-skeleton-living-inside-us.gob.pe
Downloading page http://aljazeera.net/news/cultureandart/2015/5/13/-%D9%83%D8%B1%D8%A7%D9%85%D8%A9-%D8%BA%D8%B2%D8%A9-%D9%85%D9%87%D8%B1%D8%AC%D8%A7%D9%86-%D8%B3%D9%8A%D9%86%D9%85%D8%A7%D8%A6%D9%8A-%D8%A8%D9%8A%D9%86-%D8%B1%D9%83%D8%A7%D9%85-%D8%A7%D9%84%D8%B4%D8%AC%D8%A7%D8%B9%D9%8A%D8%A9.gob.pe...




Error when dowwloading page http://aljazeera.net/news/cultureandart/2015/5/13/-%D9%83%D8%B1%D8%A7%D9%85%D8%A9-%D8%BA%D8%B2%D8%A9-%D9%85%D9%87%D8%B1%D8%AC%D8%A7%D9%86-%D8%B3%D9%8A%D9%86%D9%85%D8%A7%D8%A6%D9%8A-%D8%A8%D9%8A%D9%86-%D8%B1%D9%83%D8%A7%D9%85-%D8%A7%D9%84%D8%B4%D8%AC%D8%A7%D8%B9%D9%8A%D8%A9.gob.pe: 404 Client Error: Not Found for url: https://www.aljazeera.net/news/cultureandart/2015/5/13/-%D9%83%D8%B1%D8%A7%D9%85%D8%A9-%D8%BA%D8%B2%D8%A9-%D9%85%D9%87%D8%B1%D8%AC%D8%A7%D9%86-%D8%B3%D9%8A%D9%86%D9%85%D8%A7%D8%A6%D9%8A-%D8%A8%D9%8A%D9%86-%D8%B1%D9%83%D8%A7%D9%85
Failed to download page from http://aljazeera.net/news/cultureandart/2015/5/13/-%D9%83%D8%B1%D8%A7%D9%85%D8%A9-%D8%BA%D8%B2%D8%A9-%D9%85%D9%87%D8%B1%D8%AC%D8%A7%D9%86-%D8%B3%D9%8A%D9%86%D9%85%D8%A7%D8%A6%D9%8A-%D8%A8%D9%8A%D9%86-%D8%B1%D9%83%D8%A7%D9%85-%D8%A7%D9%84%D8%B4%D8%AC%D8%A7%D8%B9%D9%8A%D8%A9.gob.pe
Downloading page http://udn.com/news/story/7338/899035-%E7%B6%93%E6%BF%9F%EF%BC%8F%E7%82%BA%E6%88%BF%E5%9C%B0%E5%



Text saved to output/safe/www.gov.uk_government_organisations_department-for-communities-and-local-government.txt
Downloading page http://correios.com.br/para-voce/consultas-e-solicitacoes/precos-e-prazos/servicos-internacionais...




Text saved to output/safe/correios.com.br_para-voce_consultas-e-solicitacoes_precos-e-prazos_servicos-internacionais.txt
Downloading page http://allrecipes.com/Recipe/Slow-Cooker-Chicken-Taco-Soup/Detail.aspx?evt19=1&referringHubId=1...




Error when dowwloading page http://allrecipes.com/Recipe/Slow-Cooker-Chicken-Taco-Soup/Detail.aspx?evt19=1&referringHubId=1: HTTPSConnectionPool(host='allrecipes.com', port=443): Read timed out. (read timeout=10)
Failed to download page from http://allrecipes.com/Recipe/Slow-Cooker-Chicken-Taco-Soup/Detail.aspx?evt19=1&referringHubId=1
Downloading page http://udn.com/news/story/7314/899821-%E8%A1%8C%E5%8B%95%E7%99%BC%E8%B5%B7%E4%BA%BA%E6%B4%AA%E5%81%A5%E7%9B%9B%EF%BC%9A%E5%A4%9A%E7%82%BA%E8%80%81%E8%BE%B2%E6%83%B3%E4%B8%80%E6%83%B3%E5%90%A7%EF%BC%81.gob.pe...
No valid text from http://udn.com/news/story/7314/899821-%E8%A1%8C%E5%8B%95%E7%99%BC%E8%B5%B7%E4%BA%BA%E6%B4%AA%E5%81%A5%E7%9B%9B%EF%BC%9A%E5%A4%9A%E7%82%BA%E8%80%81%E8%BE%B2%E6%83%B3%E4%B8%80%E6%83%B3%E5%90%A7%EF%BC%81.gob.pe
Downloading page http://indianexpress.com/article/india/india-others/with-third-maoist-arrest-pune-comes-under-police-scanner/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/india/ind



No valid text from http://udn.com/news/story/6655/901378-%E6%96%87%E5%89%B5%E6%A1%88%EF%BC%8F%E8%AD%B0%E5%93%A1%EF%BC%9A%E6%9F%AF%E8%88%87%E8%AA%A0%E5%93%81%E8%AB%87%E7%9A%84%E6%A2%9D%E4%BB%B6-%E9%81%A0%E6%AF%94%E9%83%9D%E5%B7%AE.gob.pe
Downloading page http://www.nhs.uk/Conditions/stress-anxiety-depression/Pages/ways-relieve-stress.aspx...




Error when dowwloading page http://www.nhs.uk/Conditions/stress-anxiety-depression/Pages/ways-relieve-stress.aspx: 404 Client Error: Not Found for url: https://www.nhs.uk/Conditions/stress-anxiety-depression/Pages/ways-relieve-stress.aspx
Failed to download page from http://www.nhs.uk/Conditions/stress-anxiety-depression/Pages/ways-relieve-stress.aspx
Downloading page http://udn.com/news/story/7243/899793-%E6%88%BF%E5%9C%B0%E5%90%88%E4%B8%80%E7%A8%85%E6%8B%8D%E6%9D%BF-%E7%9F%AD%E6%9C%9F%E7%82%92%E6%88%BF%E5%BE%9E%E9%87%8D%E8%AA%B2%E7%A8%8545%EF%BC%85.gob.pe...
No valid text from http://udn.com/news/story/7243/899793-%E6%88%BF%E5%9C%B0%E5%90%88%E4%B8%80%E7%A8%85%E6%8B%8D%E6%9D%BF-%E7%9F%AD%E6%9C%9F%E7%82%92%E6%88%BF%E5%BE%9E%E9%87%8D%E8%AA%B2%E7%A8%8545%EF%BC%85.gob.pe
Downloading page https://medium.com/@thepva/this-actually-gives-me-insight-into-myself-i-would-never-consider-myself-an-artist-or-architect-721f9d2ca042?source=has-recommended.gob.pe...




Text saved to output/safe/medium.com_@thepva_this-actually-gives-me-insight-into-myself-i-would-never-consider-myself-an-artist-or-architect-721f9d2ca042?source=has-recommended.gob.pe.txt
Downloading page http://mylust.com/videos/159468/my-self-confident-wife-really-loves-missionary-position/.gob.pe...




Error when dowwloading page http://mylust.com/videos/159468/my-self-confident-wife-really-loves-missionary-position/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/159468/my-self-confident-wife-really-loves-missionary-position/.gob.pe
Failed to download page from http://mylust.com/videos/159468/my-self-confident-wife-really-loves-missionary-position/.gob.pe
Downloading page https://serverfault.com/questions/512828/virtual-interface-gets-static-ip-actual-interface-doesnt.gob.pe...




Text saved to output/safe/serverfault.com_questions_512828_virtual-interface-gets-static-ip-actual-interface-doesnt.gob.pe.txt
Downloading page http://tunein.com/radio/CBC-Radio-One-Calgary-1010-s31103/15480783/ca-pub-1542925551861702/Station.gob.pe...




Text saved to output/safe/tunein.com_radio_CBC-Radio-One-Calgary-1010-s31103_15480783_ca-pub-1542925551861702_Station.gob.pe.txt
Downloading page http://nguyentandung.org/chi-dao-dieu-hanh-cua-chinh-phu-thu-tuong-chinh-phu-noi-bat-tuan-304-75.html...




Text saved to output/safe/nguyentandung.org_chi-dao-dieu-hanh-cua-chinh-phu-thu-tuong-chinh-phu-noi-bat-tuan-304-75.html.txt
Downloading page http://cheezburger.com/70978305/pokemon-memes-minecraft-pokemon?ref=leftarrow&siteId=92.gob.pe...




Text saved to output/safe/cheezburger.com_70978305_pokemon-memes-minecraft-pokemon?ref=leftarrow&siteId=92.gob.pe.txt
Downloading page http://superuser.com/questions/479844/trust-bluetooth-4-0-dongle-and-bluetooth-headset.gob.pe...




Text saved to output/safe/superuser.com_questions_479844_trust-bluetooth-4-0-dongle-and-bluetooth-headset.gob.pe.txt
Downloading page http://atwiki.jp/wiki/%E5%A4%A7%E7%8E%8B%E3%81%A8%E6%A1%83%E7%90%83%E3%81%A8%E5%B9%BB%E6%83%B3%E5%85%A5%E3%82%8A.gob.pe...




Text saved to output/safe/atwiki.jp_wiki_%E5%A4%A7%E7%8E%8B%E3%81%A8%E6%A1%83%E7%90%83%E3%81%A8%E5%B9%BB%E6%83%B3%E5%85%A5%E3%82%8A.gob.pe.txt
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%B4%A0%E4%BA%BA%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FWQbThyKniOi0Zfv5+%E3%82%BB%E3%83%83%E3%82%AF%E3%82%B9%E4%B8%AD%E6%AF%92%E5%A5%B3%E3%81%A8%EF%BC%93%EF%BC%B0+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%B4%A0%E4%BA%BA%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FWQbThyKniOi0Zfv5+%E3%82%BB%E3%83%83%E3%82%AF%E3%82%B9%E4%B8%AD%E6%AF%9.txt
Downloading page http://plarium.com/fr/jeux-de-strategie/stormfall-age-of-war/actualites/quetes-globales/.gob.pe...




Error when dowwloading page http://plarium.com/fr/jeux-de-strategie/stormfall-age-of-war/actualites/quetes-globales/.gob.pe: 404 Client Error: Not Found for url: https://plarium.com/fr/jeux-de-strategie/stormfall-age-of-war/actualites/quetes-globales/.gob.pe/
Failed to download page from http://plarium.com/fr/jeux-de-strategie/stormfall-age-of-war/actualites/quetes-globales/.gob.pe
Downloading page http://grantland.com/the-triangle/the-dead-ball-century-mlb-baseball-playoffs-john-thorn-mlb-historian-baseball-decline-articles/.gob.pe...




Error when dowwloading page http://grantland.com/the-triangle/the-dead-ball-century-mlb-baseball-playoffs-john-thorn-mlb-historian-baseball-decline-articles/.gob.pe: 404 Client Error: Not Found for url: https://grantland.com/the-triangle/the-dead-ball-century-mlb-baseball-playoffs-john-thorn-mlb-historian-baseball-decline-articles/.gob.pe
Failed to download page from http://grantland.com/the-triangle/the-dead-ball-century-mlb-baseball-playoffs-john-thorn-mlb-historian-baseball-decline-articles/.gob.pe
Downloading page http://nguyentandung.org/viet-nam-nuoc-dong-nam-a-dau-tien-co-tau-ngam-trang-bi-ten-lua-khung.html...




Text saved to output/safe/nguyentandung.org_viet-nam-nuoc-dong-nam-a-dau-tien-co-tau-ngam-trang-bi-ten-lua-khung.html.txt
Downloading page http://fanpage.gr/family/pedi/%cf%80%ce%b1%cf%84%ce%ad%cf%81%ce%b1%cf%82-%ce%ba%ce%b1%ce%b9-%ce%ba%cf%8c%cf%81%ce%b7-%ce%b7-%ce%b2%cf%81%ce%b1%ce%b2%ce%b5%cf%85%ce%bc%ce%ad%ce%bd%ce%b7-%ce%bc%ce%b5-%cf%8c%cf%83%ce%ba/.gob.pe...




Text saved to output/safe/fanpage.gr_family_pedi_%cf%80%ce%b1%cf%84%ce%ad%cf%81%ce%b1%cf%82-%ce%ba%ce%b1%ce%b9-%ce%ba%cf%8c%cf%81%ce%b7-%ce%b7-%ce%b2%cf%81%ce%b1%ce%b2%ce%b5%cf%85%ce%bc%ce%ad%ce%bd%ce%b7-%ce%bc%ce%b5-%cf%8c%cf.txt
Downloading page http://aljazeera.net/news/international/2015/5/13/%D8%B7%D9%87%D8%B1%D8%A7%D9%86-%D8%AA%D8%AD%D8%B0%D8%B1-%D9%85%D9%86-%D8%A7%D8%B9%D8%AA%D8%B1%D8%A7%D8%B6-%D8%B3%D9%81%D9%8A%D9%86%D8%A9-%D9%85%D8%AA%D8%AC%D9%87%D8%A9-%D9%84%D9%84%D9%8A%D9%85%D9%86.gob.pe...




Error when dowwloading page http://aljazeera.net/news/international/2015/5/13/%D8%B7%D9%87%D8%B1%D8%A7%D9%86-%D8%AA%D8%AD%D8%B0%D8%B1-%D9%85%D9%86-%D8%A7%D8%B9%D8%AA%D8%B1%D8%A7%D8%B6-%D8%B3%D9%81%D9%8A%D9%86%D8%A9-%D9%85%D8%AA%D8%AC%D9%87%D8%A9-%D9%84%D9%84%D9%8A%D9%85%D9%86.gob.pe: 404 Client Error: Not Found for url: https://www.aljazeera.net/news/international/2015/5/13/%D8%B7%D9%87%D8%B1%D8%A7%D9%86-%D8%AA%D8%AD%D8%B0%D8%B1-%D9%85%D9%86-%D8%A7%D8%B9%D8%AA%D8%B1%D8%A7%D8%B6-%D8%B3%D9%81%D9%8A%D9%86%D8%A9-%D9%85%D8%AA%D8%AC%D9%87%D8%A9
Failed to download page from http://aljazeera.net/news/international/2015/5/13/%D8%B7%D9%87%D8%B1%D8%A7%D9%86-%D8%AA%D8%AD%D8%B0%D8%B1-%D9%85%D9%86-%D8%A7%D8%B9%D8%AA%D8%B1%D8%A7%D8%B6-%D8%B3%D9%81%D9%8A%D9%86%D8%A9-%D9%85%D8%AA%D8%AC%D9%87%D8%A9-%D9%84%D9%84%D9%8A%D9%85%D9%86.gob.pe
Downloading page http://mic.com/articles/107468/what-google-image-search-reveals-about-our-cultural-stereotypes.gob.pe...




Error when dowwloading page http://mic.com/articles/107468/what-google-image-search-reveals-about-our-cultural-stereotypes.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/107468/what-google-image-search-reveals-about-our-cultural-stereotypes.gob.pe
Failed to download page from http://mic.com/articles/107468/what-google-image-search-reveals-about-our-cultural-stereotypes.gob.pe
Downloading page http://grantland.com/hollywood-prospectus/dont-shoot-hip-hop-posse-cut-ferguson-michael-brown/.gob.pe...




Error when dowwloading page http://grantland.com/hollywood-prospectus/dont-shoot-hip-hop-posse-cut-ferguson-michael-brown/.gob.pe: 404 Client Error: Not Found for url: https://grantland.com/hollywood-prospectus/dont-shoot-hip-hop-posse-cut-ferguson-michael-brown/.gob.pe
Failed to download page from http://grantland.com/hollywood-prospectus/dont-shoot-hip-hop-posse-cut-ferguson-michael-brown/.gob.pe
Downloading page http://correios.com.br/para-sua-empresa/marketing-direto/cases/recuperar-clientes/imagens/CaseExame_315.jpg...




Text saved to output/safe/correios.com.br_para-sua-empresa_marketing-direto_cases_recuperar-clientes_imagens_CaseExame_315.jpg.txt
Downloading page http://serverfault.com/questions/682947/squid-delay-pools-delay-parameters-bandwidth.gob.pe...




Text saved to output/safe/serverfault.com_questions_682947_squid-delay-pools-delay-parameters-bandwidth.gob.pe.txt
Downloading page http://nguyentandung.org/dien-van-cua-thu-tuong-nguyen-tan-dung-tai-le-ky-niem-ngay-304.html...




Text saved to output/safe/nguyentandung.org_dien-van-cua-thu-tuong-nguyen-tan-dung-tai-le-ky-niem-ngay-304.html.txt
Downloading page http://codepen.io/api/oembed?url=http%3A%2F%2Fcodepen.io%2Fnszp%2Fpen%2FMwYybM&format=json...




Text saved to output/safe/codepen.io_api_oembed?url=http%3A%2F%2F.txt
Downloading page http://mixi.jp/share.pl?u=http://alfalfalfa.com/articles/117914.html&k=e09afc106e473491952cfe324aa83aabe5b07446...




Text saved to output/safe/mixi.jp_share.pl?u=http:__alfalfalfa.com_articles_117914.html&k=e09afc106e473491952cfe324aa83aabe5b07446.txt
Downloading page http://indianexpress.com/article/india/india-others/another-key-witness-in-narayan-sai-case-attacked-in-haryana/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/india/india-others/another-key-witness-in-narayan-sai-case-attacked-in-haryana/.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/india/india-others/another-key-witness-in-narayan-sai-case-attacked-in-haryana/.gob.pe
Failed to download page from http://indianexpress.com/article/india/india-others/another-key-witness-in-narayan-sai-case-attacked-in-haryana/.gob.pe
Downloading page http://1337x.to/torrent/1160956/Avengers-age-of-Ultron-2015-HQ-CAM-REMUX-XVID-AC3-MURD3R/.gob.pe...




Error when dowwloading page http://1337x.to/torrent/1160956/Avengers-age-of-Ultron-2015-HQ-CAM-REMUX-XVID-AC3-MURD3R/.gob.pe: 404 Client Error: Not Found for url: https://1337x.to/torrent/1160956/Avengers-age-of-Ultron-2015-HQ-CAM-REMUX-XVID-AC3-MURD3R/.gob.pe
Failed to download page from http://1337x.to/torrent/1160956/Avengers-age-of-Ultron-2015-HQ-CAM-REMUX-XVID-AC3-MURD3R/.gob.pe
Downloading page http://mirtesen.ru/url?e=simple_click&blog_post_id=43864238676&url=http%3A%2F%2Ftainyvselennoi.ru%2Fblog%2F43864238676%2FMarihuana-kak-lekarstvo...
No valid text from http://mirtesen.ru/url?e=simple_click&blog_post_id=43864238676&url=http%3A%2F%2Ftainyvselennoi.ru%2Fblog%2F43864238676%2FMarihuana-kak-lekarstvo
Downloading page http://nesn.com/2015/05/john-farrell-allen-craigs-demotion-to-triple-a-not-an-easy-decision-video/.gob.pe...




Error when dowwloading page http://nesn.com/2015/05/john-farrell-allen-craigs-demotion-to-triple-a-not-an-easy-decision-video/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2015/05/john-farrell-allen-craigs-demotion-to-triple-a-not-an-easy-decision-video/.gob.pe
Failed to download page from http://nesn.com/2015/05/john-farrell-allen-craigs-demotion-to-triple-a-not-an-easy-decision-video/.gob.pe
Downloading page http://variety.com/2015/tv/news/agents-of-shield-marvel-spinoff-abc-1201493296/?replytocom=1295780.gob.pe...




Text saved to output/safe/variety.com_2015_tv_news_agents-of-shield-marvel-spinoff-abc-1201493296_?replytocom=1295780.gob.pe.txt
Downloading page http://mediaset.it/la5/ditelavostra/risultati/33/citazioni-da-the-carrie-diaries.shtml?page=...




Text saved to output/safe/mediaset.it_la5_ditelavostra_risultati_33_citazioni-da-the-carrie-diaries.shtml?page=.txt
Downloading page http://noticias.uol.com.br/saude/ultimas-noticias/redacao/2015/02/25/moradores-de-marilia-sp-adotam-planta-para-combater-epidemia-de-dengue.htm...




Text saved to output/safe/noticias.uol.com.br_saude_ultimas-noticias_redacao_2015_02_25_moradores-de-marilia-sp-adotam-planta-para-combater-epidemia-de-dengue.htm.txt
Downloading page http://nguyentandung.org/ten-lua-diet-ham-viet-nam-danh-sach-dang-duoc-noi-dai.html...




Text saved to output/safe/nguyentandung.org_ten-lua-diet-ham-viet-nam-danh-sach-dang-duoc-noi-dai.html.txt
Downloading page http://kakaku.com/kaden/medical-equipment/ranking_2182/pricedown/div-gpt-ad-k/header_text.gob.pe...




Error when dowwloading page http://kakaku.com/kaden/medical-equipment/ranking_2182/pricedown/div-gpt-ad-k/header_text.gob.pe: 404 Client Error: Not Found. for url: https://kakaku.com/kaden/medical-equipment/ranking_2182/pricedown/div-gpt-ad-k/header_text.gob.pe
Failed to download page from http://kakaku.com/kaden/medical-equipment/ranking_2182/pricedown/div-gpt-ad-k/header_text.gob.pe
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90OL%E3%83%BB%E3%81%8A%E5%A7%89%E3%81%95%E3%82%93%E3%83%BB%E7%97%B4%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FmsDIXG5OTvoMP8ss+%E5%BD%BC%E6%B0%8F%E3%81%A8%E3%81%AEH%E3%81%AF%E3%83%8E%E3%83%BC%E3%83%9E%E3%83%AB%E3%81%A8%E5%BC%B7%E8%AA%BF%E3%81%99%E3%82%8B+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90OL%E3%83%BB%E3%81%8A%E5%A7%89%E3%81%95%E3%82%93%E3%83%BB%E7%97%B4%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FmsDIXG5OTvoMP8.txt
Downloading page http://alfalfalfa.com/tag/%E3%83%9D%E3%83%86%E3%83%88%E3%83%81%E3%83%83%E3%83%97%E3%82%B9.gob.pe...




Error when dowwloading page http://alfalfalfa.com/tag/%E3%83%9D%E3%83%86%E3%83%88%E3%83%81%E3%83%83%E3%83%97%E3%82%B9.gob.pe: 404 Client Error: Not Found for url: https://alfalfalfa.com/tag/%E3%83%9D%E3%83%86%E3%83%88%E3%83%81%E3%83%83%E3%83%97%E3%82%B9.gob.pe
Failed to download page from http://alfalfalfa.com/tag/%E3%83%9D%E3%83%86%E3%83%88%E3%83%81%E3%83%83%E3%83%97%E3%82%B9.gob.pe
Downloading page http://kickass.to/horriblesubs-shinmai-maou-no-testament-06-720p-mkv-t10208504.html...
Error when dowwloading page http://kickass.to/horriblesubs-shinmai-maou-no-testament-06-720p-mkv-t10208504.html: 403 Client Error: Forbidden for url: http://kickass.to/horriblesubs-shinmai-maou-no-testament-06-720p-mkv-t10208504.html
Failed to download page from http://kickass.to/horriblesubs-shinmai-maou-no-testament-06-720p-mkv-t10208504.html
Downloading page http://genius.com/2988035/Us-general-services-administration-gsa-mentor-protege-program-subpart-51970/Small-businesses-concerns-small-disadvantag



Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%A2%E3%83%8B%E3%83%A1%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FOy25qDsstQnUl3x0+devil+vegetation+teacher+4+%23ero+%23douga+%.txt
Downloading page http://torrentdn.com/bbs/s.php?bo_table=torrent_search&wr_id=4411&k=%EC%99%95%EC%A2%8C&page=1...
No valid text from http://torrentdn.com/bbs/s.php?bo_table=torrent_search&wr_id=4411&k=%EC%99%95%EC%A2%8C&page=1
Downloading page http://kickass.to/desktop-wallpapers-erotic-wallpaper-1050x1680-3744x5616-306-pcs-2015-jpg-t10628536.html...
Error when dowwloading page http://kickass.to/desktop-wallpapers-erotic-wallpaper-1050x1680-3744x5616-306-pcs-2015-jpg-t10628536.html: 403 Client Error: Forbidden for url: http://kickass.to/desktop-wallpapers-erotic-wallpaper-1050x1680-3744x5616-306-pcs-2015-jpg-t10628536.html
Failed to download page from http://kickass.to/desktop-wallpapers-erotic-wallpaper-1050x1680-3744x5616-306-pcs-2015-jpg-t10628536.html
Dow



Error when dowwloading page http://mic.com/articles/112488/10-celebrities-who-had-the-perfect-response-to-fat-shaming.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/112488/10-celebrities-who-had-the-perfect-response-to-fat-shaming.gob.pe
Failed to download page from http://mic.com/articles/112488/10-celebrities-who-had-the-perfect-response-to-fat-shaming.gob.pe
Downloading page http://themeforest.net/item/industrial-architects-engineers-html5-template/full_screen_preview/11063029.gob.pe...




Error when dowwloading page http://themeforest.net/item/industrial-architects-engineers-html5-template/full_screen_preview/11063029.gob.pe: 404 Client Error: Not Found for url: https://themeforest.net/item/industrial-architects-engineers-html5-template/full_screen_preview/11063029.gob.pe
Failed to download page from http://themeforest.net/item/industrial-architects-engineers-html5-template/full_screen_preview/11063029.gob.pe
Downloading page http://hdfcbank.com/personal/making-payments/fund-transfer/emonies-national-electronic-funds-transfer.gob.pe...




Error when dowwloading page http://hdfcbank.com/personal/making-payments/fund-transfer/emonies-national-electronic-funds-transfer.gob.pe: 404 Client Error: Not Found for url: https://www.hdfcbank.com/personal/making-payments/fund-transfer/emonies-national-electronic-funds-transfer.gob.pe
Failed to download page from http://hdfcbank.com/personal/making-payments/fund-transfer/emonies-national-electronic-funds-transfer.gob.pe
Downloading page http://cheezburger.com/8491583232/funny-sign-pic-kids-smoking?ref=leftarrow&siteId=1264.gob.pe...




Text saved to output/safe/cheezburger.com_8491583232_funny-sign-pic-kids-smoking?ref=leftarrow&siteId=1264.gob.pe.txt
Downloading page http://nguyentandung.org/vi-sao-trung-quoc-huy-truyen-hinh-truc-tiep-duyet-binh-nga-vao-phut-chot.html...




Text saved to output/safe/nguyentandung.org_vi-sao-trung-quoc-huy-truyen-hinh-truc-tiep-duyet-binh-nga-vao-phut-chot.html.txt
Downloading page http://superuser.com/questions/105933/windows-7-license-move-from-32bit-to-64bit-with-oem-key-with-lenovo.gob.pe...




Text saved to output/safe/superuser.com_questions_105933_windows-7-license-move-from-32bit-to-64bit-with-oem-key-with-lenovo.gob.pe.txt
Downloading page http://kenh14.vn/2-tek/mau-thiet-ke-iwatch-ket-hop-hai-hoa-thoi-trang-va-cong-nghe-201427224056694.chn...




Text saved to output/safe/kenh14.vn_2-tek_mau-thiet-ke-iwatch-ket-hop-hai-hoa-thoi-trang-va-cong-nghe-201427224056694.chn.txt
Downloading page http://depositphotos.com/login.html?url=%2F70116413%2Fstock-photo-art-concept-double-exposure-girl.html...




Text saved to output/safe/depositphotos.com_login.html?url=%2F70116413%2Fstock-photo-art-concept-double-exposure-girl.html.txt
Downloading page http://techcrunch.com/2015/05/13/cisco-drops-a-fraction-after-reporting-better-than-expected-fq3-revenue-of-12-14b/.gob.pe...




Error when dowwloading page http://techcrunch.com/2015/05/13/cisco-drops-a-fraction-after-reporting-better-than-expected-fq3-revenue-of-12-14b/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2015/05/13/cisco-drops-a-fraction-after-reporting-better-than-expected-fq3-revenue-of-12-14b/.gob.pe
Failed to download page from http://techcrunch.com/2015/05/13/cisco-drops-a-fraction-after-reporting-better-than-expected-fq3-revenue-of-12-14b/.gob.pe
Downloading page http://serverfault.com/questions/445390/some-nodes-in-a-cluster-has-higher-frame-value-in-ifconfig.gob.pe...




Text saved to output/safe/serverfault.com_questions_445390_some-nodes-in-a-cluster-has-higher-frame-value-in-ifconfig.gob.pe.txt
Downloading page http://variety.com/2015/tv/news/constantine-arrow-season-4-crossover-possibility-1201492462/?replytocom=1297969.gob.pe...




Text saved to output/safe/variety.com_2015_tv_news_constantine-arrow-season-4-crossover-possibility-1201492462_?replytocom=1297969.gob.pe.txt
Downloading page http://elitedaily.com/entertainment/celebrity/harrison-ford-reportedly-injured-in-single-plane-crash/957793/.gob.pe...




Error when dowwloading page http://elitedaily.com/entertainment/celebrity/harrison-ford-reportedly-injured-in-single-plane-crash/957793/.gob.pe: 404 Client Error: Not Found for url: https://www.elitedaily.com/entertainment/celebrity/harrison-ford-reportedly-injured-in-single-plane-crash/957793/.gob.pe
Failed to download page from http://elitedaily.com/entertainment/celebrity/harrison-ford-reportedly-injured-in-single-plane-crash/957793/.gob.pe
Downloading page http://indianexpress.com/article/good-news/watch-video-choose-beautiful-video-gives-new-doorway-to-beauty/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/good-news/watch-video-choose-beautiful-video-gives-new-doorway-to-beauty/.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/good-news/watch-video-choose-beautiful-video-gives-new-doorway-to-beauty/.gob.pe
Failed to download page from http://indianexpress.com/article/good-news/watch-video-choose-beautiful-video-gives-new-doorway



Text saved to output/safe/variety.com_2015_film_news_broken-hollywood-the-bizs-top-players-call-out-ways-industry-needs-to-change-1201416866_2015_tv_news_gary-newman-network-tv-advertising-model-needs-to-evolve-on-digital-plat.txt
Downloading page http://nesn.com/2015/05/adam-schefter-source-predicts-tom-bradys-suspension-will-be-overturned/.gob.pe...




Error when dowwloading page http://nesn.com/2015/05/adam-schefter-source-predicts-tom-bradys-suspension-will-be-overturned/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2015/05/adam-schefter-source-predicts-tom-bradys-suspension-will-be-overturned/.gob.pe
Failed to download page from http://nesn.com/2015/05/adam-schefter-source-predicts-tom-bradys-suspension-will-be-overturned/.gob.pe
Downloading page https://prezi.com/cpwyp44vgckw/prezi-is-growing/?auth_key=3f6ec2e9cfd554f1d23b718b1462307266561a05.gob.pe...




Text saved to output/safe/prezi.com_cpwyp44vgckw_prezi-is-growing_?auth_key=3f6ec2e9cfd554f1d23b718b1462307266561a05.gob.pe.txt
Downloading page http://kenh14.vn/musik/khan-gia-ha-noi-xuc-dong-nghen-ngao-khi-tai-ngo-cung-khanh-ly-20140803090241278.chn...




Text saved to output/safe/kenh14.vn_musik_khan-gia-ha-noi-xuc-dong-nghen-ngao-khi-tai-ngo-cung-khanh-ly-20140803090241278.chn.txt
Downloading page http://couchtuner.eu.com/2014/01/the-big-bang-theory-s7-e13-the-occupation-recalibration.html...
Text saved to output/safe/couchtuner.eu.com_2014_01_the-big-bang-theory-s7-e13-the-occupation-recalibration.html.txt
Downloading page http://uproxx.com/music/2015/02/daft-punk-has-ditched-their-masks-for-a-new-sculpture-on-display-in-new-york/.gob.pe...




Text saved to output/safe/uproxx.com_music_2015_02_daft-punk-has-ditched-their-masks-for-a-new-sculpture-on-display-in-new-york_.gob.pe.txt
Downloading page http://olx.ua/uk/account/?origin=observepopup&ref%5B0%5D%5Baction%5D=ads&ref%5B0%5D%5Bmethod%5D=index&ref%5B0%5D%5Bparams%5D%5Bq%5D=%D0%B1%D0%B0%D0%BB%D0%B5%D1%82%D0%BA%D0%B8.gob.pe...




Text saved to output/safe/olx.ua_uk_account_?origin=observepopup&ref%5B0%5D%5Baction%5D=ads&ref%5B0%5D%5Bmethod%5D=index&ref%5B0%5D%5Bparams%5D%5Bq%5D=%D0%B1%D0%B0%D0%BB%D0%B5%D1%82%D0%BA%D0%B8.gob.pe.txt
Downloading page http://kickass.to/ludacris-ludaversal-deluxe-explicit-2015-mp3-320-kbps-vbuc-t10424711.html...
Error when dowwloading page http://kickass.to/ludacris-ludaversal-deluxe-explicit-2015-mp3-320-kbps-vbuc-t10424711.html: 403 Client Error: Forbidden for url: http://kickass.to/ludacris-ludaversal-deluxe-explicit-2015-mp3-320-kbps-vbuc-t10424711.html
Failed to download page from http://kickass.to/ludacris-ludaversal-deluxe-explicit-2015-mp3-320-kbps-vbuc-t10424711.html
Downloading page http://pikabu.ru/tag/%D1%80%D0%B0%D0%B7%D0%BE%D0%B1%D0%BB%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5/hot.gob.pe...




Error when dowwloading page http://pikabu.ru/tag/%D1%80%D0%B0%D0%B7%D0%BE%D0%B1%D0%BB%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5/hot.gob.pe: 502 Server Error: Bad Gateway for url: https://pikabu.ru/tag/%D1%80%D0%B0%D0%B7%D0%BE%D0%B1%D0%BB%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5/hot.gob.pe
Failed to download page from http://pikabu.ru/tag/%D1%80%D0%B0%D0%B7%D0%BE%D0%B1%D0%BB%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5/hot.gob.pe
Downloading page http://codecanyon.net/item/superfluous-word-game-with-admob-and-leaderboard/11407859.gob.pe...




Error when dowwloading page http://codecanyon.net/item/superfluous-word-game-with-admob-and-leaderboard/11407859.gob.pe: 404 Client Error: Not Found for url: https://codecanyon.net/item/superfluous-word-game-with-admob-and-leaderboard/11407859.gob.pe
Failed to download page from http://codecanyon.net/item/superfluous-word-game-with-admob-and-leaderboard/11407859.gob.pe
Downloading page https://soundcloud.com/jamaicandancehall/gyptian-feat-nicki-minaj-hold-yuh-remix-fme-humbless-21st.gob.pe...




Error when dowwloading page https://soundcloud.com/jamaicandancehall/gyptian-feat-nicki-minaj-hold-yuh-remix-fme-humbless-21st.gob.pe: 404 Client Error: Not Found for url: https://soundcloud.com/jamaicandancehall/gyptian-feat-nicki-minaj-hold-yuh-remix-fme-humbless-21st.gob.pe
Failed to download page from https://soundcloud.com/jamaicandancehall/gyptian-feat-nicki-minaj-hold-yuh-remix-fme-humbless-21st.gob.pe
Downloading page http://uproxx.com/webculture/2015/03/meet-the-octopus-who-stole-a-filmmakers-camera-and-took-his-picture/.gob.pe...




Text saved to output/safe/uproxx.com_webculture_2015_03_meet-the-octopus-who-stole-a-filmmakers-camera-and-took-his-picture_.gob.pe.txt
Downloading page http://techcrunch.com/2014/12/03/prenetics-raises-2-65m-to-bring-safe-accurate-dna-based-prenatal-testing-to-asia/.gob.pe...




Error when dowwloading page http://techcrunch.com/2014/12/03/prenetics-raises-2-65m-to-bring-safe-accurate-dna-based-prenatal-testing-to-asia/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2014/12/03/prenetics-raises-2-65m-to-bring-safe-accurate-dna-based-prenatal-testing-to-asia/.gob.pe
Failed to download page from http://techcrunch.com/2014/12/03/prenetics-raises-2-65m-to-bring-safe-accurate-dna-based-prenatal-testing-to-asia/.gob.pe
Downloading page http://nesn.com/2015/05/confused-fan-angry-at-celtics-isaiah-thomas-for-owning-new-york-liberty/.gob.pe...




Error when dowwloading page http://nesn.com/2015/05/confused-fan-angry-at-celtics-isaiah-thomas-for-owning-new-york-liberty/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2015/05/confused-fan-angry-at-celtics-isaiah-thomas-for-owning-new-york-liberty/.gob.pe
Failed to download page from http://nesn.com/2015/05/confused-fan-angry-at-celtics-isaiah-thomas-for-owning-new-york-liberty/.gob.pe
Downloading page http://kotaku.com/previously-the-ceo-of-japanese-game-company-imageepoch-1704135995.gob.pe...




Error when dowwloading page http://kotaku.com/previously-the-ceo-of-japanese-game-company-imageepoch-1704135995.gob.pe: 404 Client Error: Not Found for url: https://kotaku.com/previously-the-ceo-of-japanese-game-company-imageepoch-1704135995.gob.pe
Failed to download page from http://kotaku.com/previously-the-ceo-of-japanese-game-company-imageepoch-1704135995.gob.pe
Downloading page http://serverfault.com/questions/691568/how-often-do-you-restart-a-heavily-utilized-windows-server-2008r2-remote-desktop.gob.pe...




Text saved to output/safe/serverfault.com_questions_691568_how-often-do-you-restart-a-heavily-utilized-windows-server-2008r2-remote-desktop.gob.pe.txt
Downloading page http://pikabu.ru/tag/%D0%A0%D0%B5%D0%B0%D0%BB%20%D0%9C%D0%B0%D0%B4%D1%80%D0%B8%D0%B4/hot.gob.pe...




Error when dowwloading page http://pikabu.ru/tag/%D0%A0%D0%B5%D0%B0%D0%BB%20%D0%9C%D0%B0%D0%B4%D1%80%D0%B8%D0%B4/hot.gob.pe: 502 Server Error: Bad Gateway for url: https://pikabu.ru/tag/%D0%A0%D0%B5%D0%B0%D0%BB%20%D0%9C%D0%B0%D0%B4%D1%80%D0%B8%D0%B4/hot.gob.pe
Failed to download page from http://pikabu.ru/tag/%D0%A0%D0%B5%D0%B0%D0%BB%20%D0%9C%D0%B0%D0%B4%D1%80%D0%B8%D0%B4/hot.gob.pe
Downloading page http://pikabu.ru/tag/%D0%91%D0%B0%D1%85%D1%8B%D1%82%20%D0%A1%D1%83%D0%BB%D1%82%D0%B0%D0%BD%D0%BE%D0%B2/hot.gob.pe...




Error when dowwloading page http://pikabu.ru/tag/%D0%91%D0%B0%D1%85%D1%8B%D1%82%20%D0%A1%D1%83%D0%BB%D1%82%D0%B0%D0%BD%D0%BE%D0%B2/hot.gob.pe: 502 Server Error: Bad Gateway for url: https://pikabu.ru/tag/%D0%91%D0%B0%D1%85%D1%8B%D1%82%20%D0%A1%D1%83%D0%BB%D1%82%D0%B0%D0%BD%D0%BE%D0%B2/hot.gob.pe
Failed to download page from http://pikabu.ru/tag/%D0%91%D0%B0%D1%85%D1%8B%D1%82%20%D0%A1%D1%83%D0%BB%D1%82%D0%B0%D0%BD%D0%BE%D0%B2/hot.gob.pe
Downloading page http://slashdot.org/submission/4259789/court-overturns-dutch-data-retention-law-privacy-more-important.gob.pe...




Text saved to output/safe/slashdot.org_submission_4259789_court-overturns-dutch-data-retention-law-privacy-more-important.gob.pe.txt
Downloading page http://mic.com/articles/117554/the-big-problem-with-comparing-america-s-racist-police-to-police-in-europe.gob.pe...




Error when dowwloading page http://mic.com/articles/117554/the-big-problem-with-comparing-america-s-racist-police-to-police-in-europe.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/117554/the-big-problem-with-comparing-america-s-racist-police-to-police-in-europe.gob.pe
Failed to download page from http://mic.com/articles/117554/the-big-problem-with-comparing-america-s-racist-police-to-police-in-europe.gob.pe
Downloading page http://askubuntu.com/questions/591832/how-to-play-music-when-app-is-not-the-active-app.gob.pe...




Text saved to output/safe/askubuntu.com_questions_591832_how-to-play-music-when-app-is-not-the-active-app.gob.pe.txt
Downloading page http://comicbook.com/2015/05/14/video-tyler-perrys-baxter-stockman-films-tmnt-2-scenes-with-mega/.gob.pe...




Error when dowwloading page http://comicbook.com/2015/05/14/video-tyler-perrys-baxter-stockman-films-tmnt-2-scenes-with-mega/.gob.pe: 404 Client Error: Not Found for url: https://comicbook.com/2015/05/14/video-tyler-perrys-baxter-stockman-films-tmnt-2-scenes-with-mega/.gob.pe
Failed to download page from http://comicbook.com/2015/05/14/video-tyler-perrys-baxter-stockman-films-tmnt-2-scenes-with-mega/.gob.pe
Downloading page http://stackexchange.com/work-here/82/developer-marketing-lead-developer-evangelist.gob.pe...




Text saved to output/safe/stackexchange.com_work-here_82_developer-marketing-lead-developer-evangelist.gob.pe.txt
Downloading page http://mirtesen.ru/url?e=simple_click&blog_post_id=43747767183&url=http%3A%2F%2Fdooralei.ru%2Fblog%2F43747767183%2FSotsiologi%3A-Bolshinstvo-kryimchan-dovolnyi-zhiznyu-v-Rossii...
No valid text from http://mirtesen.ru/url?e=simple_click&blog_post_id=43747767183&url=http%3A%2F%2Fdooralei.ru%2Fblog%2F43747767183%2FSotsiologi%3A-Bolshinstvo-kryimchan-dovolnyi-zhiznyu-v-Rossii
Downloading page https://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%A2%E3%83%8B%E3%83%A1%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FVl93yd4t27PAvNaW+%E6%80%A7%E3%81%AE%E8%99%9C%E3%81%AB%E3%81%AA%E3%82%8B%E5%A5%B3%E3%81%9F%E3%81%A16+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%A2%E3%83%8B%E3%83%A1%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FVl93yd4t27PAvNaW+%E6%80%A7%E3%81%AE%E8%99%9C%E3%81%AB%E3%81%A.txt
Downloading page http://variety.com/2015/film/news/broken-hollywood-the-bizs-top-players-call-out-ways-industry-needs-to-change-1201416866/2015/biz/news/peter-chernin-new-businesses-must-be-created-to-keep-the-industry-vibrant-1201416775/.gob.pe...




Text saved to output/safe/variety.com_2015_film_news_broken-hollywood-the-bizs-top-players-call-out-ways-industry-needs-to-change-1201416866_2015_biz_news_peter-chernin-new-businesses-must-be-created-to-keep-the-industry-vibra.txt
Downloading page http://kienthuc.net.vn/tham-my-spa/ba-hillary-duoc-khuyen-mac-vay-neu-muon-tro-thanh-tong-thong-477797.html...




Text saved to output/safe/kienthuc.net.vn_tham-my-spa_ba-hillary-duoc-khuyen-mac-vay-neu-muon-tro-thanh-tong-thong-477797.html.txt
Downloading page http://worldoftanks.ru/ru/content/guide/payments_instruction/terminals-osmp-qiwi-belarus/.gob.pe...




Text saved to output/safe/worldoftanks.ru_ru_content_guide_payments_instruction_terminals-osmp-qiwi-belarus_.gob.pe.txt
Downloading page http://indianexpress.com/article/technology/technology-others/windows-10-is-the-last-version-of-windows-os-microsoft/.gob.pe...
Error when dowwloading page http://indianexpress.com/article/technology/technology-others/windows-10-is-the-last-version-of-windows-os-microsoft/.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/technology/technology-others/windows-10-is-the-last-version-of-windows-os-microsoft/.gob.pe
Failed to download page from http://indianexpress.com/article/technology/technology-others/windows-10-is-the-last-version-of-windows-os-microsoft/.gob.pe
Downloading page http://mylust.com/videos/231559/suspended-hotties-get-face-fucked-by-a-guy-in-hot-bdsm-clip/.gob.pe...




Error when dowwloading page http://mylust.com/videos/231559/suspended-hotties-get-face-fucked-by-a-guy-in-hot-bdsm-clip/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/231559/suspended-hotties-get-face-fucked-by-a-guy-in-hot-bdsm-clip/.gob.pe
Failed to download page from http://mylust.com/videos/231559/suspended-hotties-get-face-fucked-by-a-guy-in-hot-bdsm-clip/.gob.pe
Downloading page http://mylust.com/videos/236038/captivating-brunette-just-loves-masturbating-on-cam/.gob.pe...




Error when dowwloading page http://mylust.com/videos/236038/captivating-brunette-just-loves-masturbating-on-cam/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/236038/captivating-brunette-just-loves-masturbating-on-cam/.gob.pe
Failed to download page from http://mylust.com/videos/236038/captivating-brunette-just-loves-masturbating-on-cam/.gob.pe
Downloading page http://udn.com/news/story/7091/747881-Alcatel-idol-3-%E6%AD%A3%E6%8B%BF%E3%80%81%E5%8F%8D%E6%8B%BF%E9%83%BD%E8%83%BD%E8%AC%9B%E9%9B%BB%E8%A9%B1.gob.pe...
No valid text from http://udn.com/news/story/7091/747881-Alcatel-idol-3-%E6%AD%A3%E6%8B%BF%E3%80%81%E5%8F%8D%E6%8B%BF%E9%83%BD%E8%83%BD%E8%AC%9B%E9%9B%BB%E8%A9%B1.gob.pe
Downloading page http://mic.com/articles/111430/a-powerful-new-banksy-video-shows-a-side-of-gaza-the-press-can-t.gob.pe...




Error when dowwloading page http://mic.com/articles/111430/a-powerful-new-banksy-video-shows-a-side-of-gaza-the-press-can-t.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/111430/a-powerful-new-banksy-video-shows-a-side-of-gaza-the-press-can-t.gob.pe
Failed to download page from http://mic.com/articles/111430/a-powerful-new-banksy-video-shows-a-side-of-gaza-the-press-can-t.gob.pe
Downloading page https://twitter.com/share?url=http%3A%2F%2Fhubpages.com%2Fhub%2Fbetter-looking-characters&text=How+to+Create+Beautiful+Characters+in+Skyrim...




Text saved to output/safe/twitter.com_share?url=http%3A%2F%2Fhubpages.com%2Fhub%2Fbetter-looking-characters&text=How+to+Create+Beautiful+Characters+in+Skyrim.txt
Downloading page http://udn.com/news/story/7253/900690-Q1%E6%AF%8F%E8%82%A1%E7%B4%94%E7%9B%8A%EF%BC%8F%E8%87%B4%E6%96%B01.31%E5%85%83-17%E5%AD%A3%E4%BD%8E%E9%BB%9E...
No valid text from http://udn.com/news/story/7253/900690-Q1%E6%AF%8F%E8%82%A1%E7%B4%94%E7%9B%8A%EF%BC%8F%E8%87%B4%E6%96%B01.31%E5%85%83-17%E5%AD%A3%E4%BD%8E%E9%BB%9E
Downloading page http://qz.com/391283/if-opec-dead-how-is-saudi-arabia-still-calling-the-shots-in-the-oil-market/.gob.pe...




Error when dowwloading page http://qz.com/391283/if-opec-dead-how-is-saudi-arabia-still-calling-the-shots-in-the-oil-market/.gob.pe: 404 Client Error: Not Found for url: https://qz.com/391283/if-opec-dead-how-is-saudi-arabia-still-calling-the-shots-in-the-oil-market/.gob.pe
Failed to download page from http://qz.com/391283/if-opec-dead-how-is-saudi-arabia-still-calling-the-shots-in-the-oil-market/.gob.pe
Downloading page http://ink361.com/app/users/ig-1749497/dleeezy/photos/ig-984242467048409364_1749497.gob.pe...




Error when dowwloading page http://ink361.com/app/users/ig-1749497/dleeezy/photos/ig-984242467048409364_1749497.gob.pe: HTTPSConnectionPool(host='ink361.com', port=443): Read timed out. (read timeout=10)
Failed to download page from http://ink361.com/app/users/ig-1749497/dleeezy/photos/ig-984242467048409364_1749497.gob.pe
Downloading page http://atwiki.jp/wiki/%E3%82%AC%E3%83%B3%E3%83%80%E3%83%A0%E3%83%90%E3%83%88%E3%83%AB%E3%82%AA%E3%83%9A%E3%83%AC%E3%83%BC%E3%82%B7%E3%83%A7%E3%83%B3%E6%8C%87%E5%88%87%E3%82%8A%E3%81%A8%E3%81%AF.gob.pe...




Text saved to output/safe/atwiki.jp_wiki_%E3%82%AC%E3%83%B3%E3%83%80%E3%83%A0%E3%83%90%E3%83%88%E3%83%AB%E3%82%AA%E3%83%9A%E3%83%AC%E3%83%BC%E3%82%B7%E3%83%A7%E3%83%B3%E6%8C%87%E5%88%87%E3%82%8A%E3%81%A8%E3%81%AF.gob.pe.txt
Downloading page http://abcnews.go.com/International/swedish-groups-approach-stop-russian-submarines-entering-swedish/story?id=31002277...




Text saved to output/safe/abcnews.go.com_International_swedish-groups-approach-stop-russian-submarines-entering-swedish_story?id=31002277.txt
Downloading page http://variety.com/2015/film/news/cate-blanchett-lesbian-carol-cannes-todd-haynes-women-in-hollywood-1201492632/?replytocom=1297491.gob.pe...




Text saved to output/safe/variety.com_2015_film_news_cate-blanchett-lesbian-carol-cannes-todd-haynes-women-in-hollywood-1201492632_?replytocom=1297491.gob.pe.txt
Downloading page http://tunein.com/radio/Vanakkam-FM-1027-s162374/15480783/ca-pub-1542925551861702/Station.gob.pe...




Text saved to output/safe/tunein.com_radio_Vanakkam-FM-1027-s162374_15480783_ca-pub-1542925551861702_Station.gob.pe.txt
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%B3%E3%82%B9%E3%83%97%E3%83%AC%E3%83%BB%E5%88%B6%E6%9C%8D%E7%B3%BB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FC18SM5u5Gxb3vPXB+%E3%82%A2%E3%83%8B%E3%82%B3%E3%82%B9%E3%81%A7%E3%83%8F%E3%83%A1%E6%92%AE%E3%82%8A+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%B3%E3%82%B9%E3%83%97%E3%83%AC%E3%83%BB%E5%88%B6%E6%9C%8D%E7%B3%BB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2FC18SM5u5Gxb3vPXB.txt
Downloading page http://1337x.to/torrent/1160220/Marvels-Agents-of-S-H-I-E-L-D-S02E21-E22-HDTV-XviD-FUM-ettv/.gob.pe...




Error when dowwloading page http://1337x.to/torrent/1160220/Marvels-Agents-of-S-H-I-E-L-D-S02E21-E22-HDTV-XviD-FUM-ettv/.gob.pe: 404 Client Error: Not Found for url: https://1337x.to/torrent/1160220/Marvels-Agents-of-S-H-I-E-L-D-S02E21-E22-HDTV-XviD-FUM-ettv/.gob.pe
Failed to download page from http://1337x.to/torrent/1160220/Marvels-Agents-of-S-H-I-E-L-D-S02E21-E22-HDTV-XviD-FUM-ettv/.gob.pe
Downloading page http://patch.com/new-jersey/brick/njsiaa-wrestling-seven-brick-reach-medal-round-0/.gob.pe...




Error when dowwloading page http://patch.com/new-jersey/brick/njsiaa-wrestling-seven-brick-reach-medal-round-0/.gob.pe: 404 Client Error: Not Found for url: https://patch.com/new-jersey/brick/njsiaa-wrestling-seven-brick-reach-medal-round-0/.gob.pe
Failed to download page from http://patch.com/new-jersey/brick/njsiaa-wrestling-seven-brick-reach-medal-round-0/.gob.pe
Downloading page http://auto.ru/cars/bmw/5er/all/?search%5Bstate%5D=1&search%5Bperiod%5D=0&search%5Bcustom%5D=1&search%5Bsection_id%5D=0&search%5Bmark%5D%5B0%5D=30&search%5Bmark-folder%5D%5B0%5D=30-384_4445.gob.pe...




Text saved to output/safe/auto.ru_cars_bmw_5er_all_?search%5Bstate%5D=1&search%5Bperiod%5D=0&search%5Bcustom%5D=1&search%5Bsection_id%5D=0&search%5Bmark%5D%5B0%5D=30&search%5Bmark-folder%5D%5B0%5D=30-384_4445.gob.pe.txt
Downloading page http://stackoverflow.com/questions/8643354/cannot-load-image-referenced-from-a-nib-in-the-bundle.gob.pe...




Text saved to output/safe/stackoverflow.com_questions_8643354_cannot-load-image-referenced-from-a-nib-in-the-bundle.gob.pe.txt
Downloading page http://correios.com.br/para-sua-empresa/servicos-para-o-seu-contrato/precos-e-prazos/prazo-de-guarda-de-objetos-nacionais...




Text saved to output/safe/correios.com.br_para-sua-empresa_servicos-para-o-seu-contrato_precos-e-prazos_prazo-de-guarda-de-objetos-nacionais.txt
Downloading page http://techcrunch.com/2015/05/12/godaddy-slips-2-9-after-reporting-q1-revenue-of-376-3m-strong-q2-top-line-projection/.gob.pe...




Error when dowwloading page http://techcrunch.com/2015/05/12/godaddy-slips-2-9-after-reporting-q1-revenue-of-376-3m-strong-q2-top-line-projection/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/2015/05/12/godaddy-slips-2-9-after-reporting-q1-revenue-of-376-3m-strong-q2-top-line-projection/.gob.pe
Failed to download page from http://techcrunch.com/2015/05/12/godaddy-slips-2-9-after-reporting-q1-revenue-of-376-3m-strong-q2-top-line-projection/.gob.pe
Downloading page http://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%BE%8E%E5%B0%91%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2F7ZQ02VlAguXiQH7v+%E5%A5%B3%E5%AD%90%E6%A0%A1%E7%94%9F+%E9%9B%BB%E3%83%9E%E3%81%A7%E7%84%A6%E3%82%89%E3%81%97%E3%81%A6+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E7%BE%8E%E5%B0%91%E5%A5%B3%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2F7ZQ02VlAguXiQH7v+%E5%A5%B3%E5%AD%90%E6%A0%A1%E7%94%9F+%E9%9B%.txt
Downloading page http://superuser.com/questions/862530/3-pin-cpu-cooler-on-socket-1150-has-allways-only-constant-fixed-fan-speed.gob.pe...




Text saved to output/safe/superuser.com_questions_862530_3-pin-cpu-cooler-on-socket-1150-has-allways-only-constant-fixed-fan-speed.gob.pe.txt
Downloading page http://mylust.com/videos/110517/my-mind-blowing-french-milf-with-awesome-body-masturbates/.gob.pe...




Error when dowwloading page http://mylust.com/videos/110517/my-mind-blowing-french-milf-with-awesome-body-masturbates/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/110517/my-mind-blowing-french-milf-with-awesome-body-masturbates/.gob.pe
Failed to download page from http://mylust.com/videos/110517/my-mind-blowing-french-milf-with-awesome-body-masturbates/.gob.pe
Downloading page http://askubuntu.com/questions/607267/capturing-metadata-off-music-cd-during-file-transfer.gob.pe...




Text saved to output/safe/askubuntu.com_questions_607267_capturing-metadata-off-music-cd-during-file-transfer.gob.pe.txt
Downloading page http://sprint.com/business/solutions/fleet_and_asset_management/local_fleet/index.html...




Error when dowwloading page http://sprint.com/business/solutions/fleet_and_asset_management/local_fleet/index.html: 403 Client Error: Forbidden for url: https://www.sprint.com
Failed to download page from http://sprint.com/business/solutions/fleet_and_asset_management/local_fleet/index.html
Downloading page http://mylust.com/videos/231550/nerdy-brunette-kristine-gets-beaten-and-face-fucked-in-bdsm-clip/.gob.pe...
Error when dowwloading page http://mylust.com/videos/231550/nerdy-brunette-kristine-gets-beaten-and-face-fucked-in-bdsm-clip/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/231550/nerdy-brunette-kristine-gets-beaten-and-face-fucked-in-bdsm-clip/.gob.pe
Failed to download page from http://mylust.com/videos/231550/nerdy-brunette-kristine-gets-beaten-and-face-fucked-in-bdsm-clip/.gob.pe
Downloading page http://udn.com/news/story/7238/901282-%E9%87%91%E7%AE%A1%E6%9C%83%E5%87%BA%E6%8B%9B-%E6%94%B9%E9%9D%A9%E4%B8%8D%E5%81%9C%E6%AD%87.gob.pe...




No valid text from http://udn.com/news/story/7238/901282-%E9%87%91%E7%AE%A1%E6%9C%83%E5%87%BA%E6%8B%9B-%E6%94%B9%E9%9D%A9%E4%B8%8D%E5%81%9C%E6%AD%87.gob.pe
Downloading page http://himado.in/?keyword=%E3%83%AF%E3%83%BC%E3%83%AB%E3%83%89%E3%83%88%E3%83%AA%E3%82%AC%E3%83%BC.gob.pe...
Text saved to output/safe/himado.in_?keyword=%E3%83%AF%E3%83%BC%E3%83%AB%E3%83%89%E3%83%88%E3%83%AA%E3%82%AC%E3%83%BC.gob.pe.txt
Downloading page http://nesn.com/2015/05/red-sox-wrap-pablo-sandovals-11th-inning-homer-seals-5-4-win-over-as/.gob.pe...




Error when dowwloading page http://nesn.com/2015/05/red-sox-wrap-pablo-sandovals-11th-inning-homer-seals-5-4-win-over-as/.gob.pe: 404 Client Error: Not Found for url: https://nesn.com/2015/05/red-sox-wrap-pablo-sandovals-11th-inning-homer-seals-5-4-win-over-as/.gob.pe
Failed to download page from http://nesn.com/2015/05/red-sox-wrap-pablo-sandovals-11th-inning-homer-seals-5-4-win-over-as/.gob.pe
Downloading page http://1337x.to/torrent/1160171/X-The-Man-with-the-X-Ray-Eyes-1963-1080p-BrRip-x264-YIFY/.gob.pe...




Error when dowwloading page http://1337x.to/torrent/1160171/X-The-Man-with-the-X-Ray-Eyes-1963-1080p-BrRip-x264-YIFY/.gob.pe: 404 Client Error: Not Found for url: https://1337x.to/torrent/1160171/X-The-Man-with-the-X-Ray-Eyes-1963-1080p-BrRip-x264-YIFY/.gob.pe
Failed to download page from http://1337x.to/torrent/1160171/X-The-Man-with-the-X-Ray-Eyes-1963-1080p-BrRip-x264-YIFY/.gob.pe
Downloading page http://nguyentandung.org/tu-tuong-dao-duc-ho-chi-minh-mang-tam-voc-vi-nhan-thoi-dai.html...




Text saved to output/safe/nguyentandung.org_tu-tuong-dao-duc-ho-chi-minh-mang-tam-voc-vi-nhan-thoi-dai.html.txt
Downloading page http://thechive.com/2015/05/12/snuggle-up-with-some-sideboob-this-morning-40-photos/.gob.pe...




Error when dowwloading page http://thechive.com/2015/05/12/snuggle-up-with-some-sideboob-this-morning-40-photos/.gob.pe: 404 Client Error: Not Found for url: https://thechive.com/2015/05/12/snuggle-up-with-some-sideboob-this-morning-40-photos/.gob.pe
Failed to download page from http://thechive.com/2015/05/12/snuggle-up-with-some-sideboob-this-morning-40-photos/.gob.pe
Downloading page http://askubuntu.com/questions/596767/how-to-get-rhythmbox-playlist-to-play-correctly-via-rockbox-on-sansa-clip-plus.gob.pe...




Text saved to output/safe/askubuntu.com_questions_596767_how-to-get-rhythmbox-playlist-to-play-correctly-via-rockbox-on-sansa-clip-plus.gob.pe.txt
Downloading page https://twitter.com/home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%AE%E3%83%A3%E3%83%AB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2Fma7ecQEkBDRLP8ds+%E3%82%AE%E3%83%A3%E3%83%AB%E3%83%BB%E9%BB%92%E3%82%AE%E3%83%A3%E3%83%AB36+%23ero+%23douga+%23agesage...




Text saved to output/safe/twitter.com_home?status=%E3%83%8C%E3%81%91%E3%82%8B%EF%BC%81%E3%80%90%E3%82%AE%E3%83%A3%E3%83%AB%E3%80%91+http%3A%2F%2Fero-video.net%2Ft%2Fma7ecQEkBDRLP8ds+%E3%82%AE%E3%83%A3%E3%83%AB%E3%83%BB%E9%BB%9.txt
Downloading page http://mic.com/articles/117386/something-truly-unfunny-happened-when-you-googled-the-word-joke.gob.pe...




Error when dowwloading page http://mic.com/articles/117386/something-truly-unfunny-happened-when-you-googled-the-word-joke.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/117386/something-truly-unfunny-happened-when-you-googled-the-word-joke.gob.pe
Failed to download page from http://mic.com/articles/117386/something-truly-unfunny-happened-when-you-googled-the-word-joke.gob.pe
Downloading page http://mic.com/articles/106784/6-important-truths-that-reveal-what-asexuality-is-really-about.gob.pe...




Error when dowwloading page http://mic.com/articles/106784/6-important-truths-that-reveal-what-asexuality-is-really-about.gob.pe: 404 Client Error: Not Found for url: https://www.mic.com/articles/106784/6-important-truths-that-reveal-what-asexuality-is-really-about.gob.pe
Failed to download page from http://mic.com/articles/106784/6-important-truths-that-reveal-what-asexuality-is-really-about.gob.pe
Downloading page http://mylust.com/videos/37716/indian-neighbor-girl-washes-outdoors-as-i-film-her-on-spy-cam/.gob.pe...




Error when dowwloading page http://mylust.com/videos/37716/indian-neighbor-girl-washes-outdoors-as-i-film-her-on-spy-cam/.gob.pe: 404 Client Error: Not Found for url: https://mylust.com/videos/37716/indian-neighbor-girl-washes-outdoors-as-i-film-her-on-spy-cam/.gob.pe
Failed to download page from http://mylust.com/videos/37716/indian-neighbor-girl-washes-outdoors-as-i-film-her-on-spy-cam/.gob.pe
Downloading page http://qz.com/393160/at-least-six-have-died-as-burundi-is-rocked-by-violent-protests/.gob.pe...




Text saved to output/safe/qz.com_393160_at-least-six-have-died-as-burundi-is-rocked-by-violent-protests_.gob.pe.txt
Downloading page http://indianexpress.com/article/technology/tech-news-technology/as-pm-narendra-modi-leaves-for-china-he-leaves-behind-something-in-your-inbox/99.gob.pe...
Error when dowwloading page http://indianexpress.com/article/technology/tech-news-technology/as-pm-narendra-modi-leaves-for-china-he-leaves-behind-something-in-your-inbox/99.gob.pe: 403 Client Error: Forbidden for url: http://indianexpress.com/article/technology/tech-news-technology/as-pm-narendra-modi-leaves-for-china-he-leaves-behind-something-in-your-inbox/99.gob.pe
Failed to download page from http://indianexpress.com/article/technology/tech-news-technology/as-pm-narendra-modi-leaves-for-china-he-leaves-behind-something-in-your-inbox/99.gob.pe
Downloading page http://askubuntu.com/questions/623068/how-to-replace-all-strings-in-a-file-that-begin-with-some-prefix?answertab=active.gob.pe...




Text saved to output/safe/askubuntu.com_questions_623068_how-to-replace-all-strings-in-a-file-that-begin-with-some-prefix?answertab=active.gob.pe.txt
Downloading page http://techcrunch.com/gallery/looks-like-samsungs-galaxy-s6-mightve-just-leaked-out-here-are-the-photos/.gob.pe...




Error when dowwloading page http://techcrunch.com/gallery/looks-like-samsungs-galaxy-s6-mightve-just-leaked-out-here-are-the-photos/.gob.pe: 404 Client Error: Not Found for url: https://techcrunch.com/gallery/looks-like-samsungs-galaxy-s6-mightve-just-leaked-out-here-are-the-photos/.gob.pe
Failed to download page from http://techcrunch.com/gallery/looks-like-samsungs-galaxy-s6-mightve-just-leaked-out-here-are-the-photos/.gob.pe
Downloading page http://superuser.com/questions/374183/how-to-only-show-new-processes-in-activity-monitor.gob.pe...




Text saved to output/safe/superuser.com_questions_374183_how-to-only-show-new-processes-in-activity-monitor.gob.pe.txt
Downloading page http://ecnavi.jp/redirect/?url=http://www.af-mark.jp/hokengate-pc/?id=5663&uid=UI%user_id%IT48943FR36...




Text saved to output/safe/ecnavi.jp_redirect_?url=http:__www.af-mark.jp_hokengate-pc_?id=5663&uid=UI%user_id%IT48943FR36.txt
Downloading page http://olx.ua/uk/i2/list/?q={q}&utm_source=google&utm_medium=search&utm_campaign=search_organic.gob.pe...




Text saved to output/safe/olx.ua_uk_i2_list_?q={q}&utm_source=google&utm_medium=search&utm_campaign=search_organic.gob.pe.txt
Downloading page http://zonehmirrors.org/defaced/2013/10/20/disnakertransos.kebumenkab.go.id...
Text saved to output/deface/zonehmirrors.org_defaced_2013_10_20_disnakertransos.kebumenkab.go.id.txt
Downloading page http://zonehmirrors.org/defaced/2014/11/01/www.ila.aer.mil.br/www.ila.aer.mil.br/zxcvbnm.php...
Text saved to output/deface/zonehmirrors.org_defaced_2014_11_01_www.ila.aer.mil.br_www.ila.aer.mil.br_zxcvbnm.php.txt
Downloading page http://zonehmirrors.org/defaced/2014/09/02/www.drmariana.co.il/www.drmariana.co.il...
Text saved to output/deface/zonehmirrors.org_defaced_2014_09_02_www.drmariana.co.il_www.drmariana.co.il.txt
Downloading page http://zonehmirrors.org/defaced/2014/09/11/srangming.go.th/srangming.go.th...
Text saved to output/deface/zonehmirrors.org_defaced_2014_09_11_srangming.go.th_srangming.go.th.txt
Downloading page http://zonehmirrors.org

  soup = BeautifulSoup(html_content, 'html.parser')


Text saved to output/deface/zonehmirrors.org_defaced_2014_08_31_www.disperindag.kalbarprov.go.id_www.disperindag.kalbarprov.go.id_indonesia.txt.txt
Downloading page http://zonehmirrors.org/defaced/2013/10/20/websiteondemand.nl...
Text saved to output/deface/zonehmirrors.org_defaced_2013_10_20_websiteondemand.nl.txt
Downloading page http://zonehmirrors.org/defaced/2014/07/13/galilee.gov.il/galilee.gov.il/temp/index.asp...
Text saved to output/deface/zonehmirrors.org_defaced_2014_07_13_galilee.gov.il_galilee.gov.il_temp_index.asp.txt
Downloading page http://zonehmirrors.org/defaced/2014/11/06/gobernacionmanabi.gob.ec/gobernacionmanabi.gob.ec...
Text saved to output/deface/zonehmirrors.org_defaced_2014_11_06_gobernacionmanabi.gob.ec_gobernacionmanabi.gob.ec.txt
Downloading page http://zonehmirrors.org/defaced/2015/09/01/kabarkalteng.info/kabarkalteng.info/wp-login.php...
Text saved to output/deface/zonehmirrors.org_defaced_2015_09_01_kabarkalteng.info_kabarkalteng.info_wp-login.php.txt
Do



Text saved to output/deface/www.cgivladi.gov.in.txt
Downloading page http://www.thungluknok.go.th...
Error when dowwloading page http://www.thungluknok.go.th: HTTPConnectionPool(host='www.thungluknok.go.th', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e1482e67f70>, 'Connection to www.thungluknok.go.th timed out. (connect timeout=10)'))
Failed to download page from http://www.thungluknok.go.th
Downloading page http://kampinoski-pn.gov.pl/pappurmar...
Error when dowwloading page http://kampinoski-pn.gov.pl/pappurmar: HTTPConnectionPool(host='kampinoski-pn.gov.pl', port=80): Max retries exceeded with url: /pappurmar (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1482e98760>: Failed to resolve 'kampinoski-pn.gov.pl' ([Errno -2] Name or service not known)"))
Failed to download page from http://kampinoski-pn.gov.pl/pappurmar
Downloading page http://www.brm3.go.th/0x.txt...




Error when dowwloading page http://www.brm3.go.th/0x.txt: 404 Client Error: Not Found for url: https://www.brm3.go.th/0x.txt
Failed to download page from http://www.brm3.go.th/0x.txt
Downloading page http://scrp.mof.gov.so...
Text saved to output/deface/scrp.mof.gov.so.txt
Downloading page http://cfmg.mgee.gov.zm...




Text saved to output/deface/cfmg.mgee.gov.zm.txt
Downloading page http://gohome.gov.so/1.txt...




Text saved to output/deface/gohome.gov.so_1.txt.txt
Downloading page http://perpustakaan.bmkg.go.id/storag...
Error when dowwloading page http://perpustakaan.bmkg.go.id/storag: 404 Client Error: Not Found for url: https://perpustakaan.bmkg.go.id/storag
Failed to download page from http://perpustakaan.bmkg.go.id/storag
Downloading page http://goacustoms.gov.in...




Error when dowwloading page http://goacustoms.gov.in: HTTPConnectionPool(host='goacustoms.gov.in', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e1482e9b8b0>, 'Connection to goacustoms.gov.in timed out. (connect timeout=10)'))
Failed to download page from http://goacustoms.gov.in
Downloading page http://www.indianembassyzagreb.gov.in...




Text saved to output/deface/www.indianembassyzagreb.gov.in.txt
Downloading page http://inic.gov.iq/1784.html...
Error when dowwloading page http://inic.gov.iq/1784.html: HTTPConnectionPool(host='inic.gov.iq', port=80): Max retries exceeded with url: /1784.html (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e148365e8f0>: Failed to resolve 'inic.gov.iq' ([Errno -2] Name or service not known)"))
Failed to download page from http://inic.gov.iq/1784.html
Downloading page http://saenzpena.gob.ar/0x.txt...
Error when dowwloading page http://saenzpena.gob.ar/0x.txt: HTTPConnectionPool(host='saenzpena.gob.ar', port=80): Max retries exceeded with url: /0x.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e148365e020>, 'Connection to saenzpena.gob.ar timed out. (connect timeout=10)'))
Failed to download page from http://saenzpena.gob.ar/0x.txt
Downloading page http://comiteprevencionsde.gob.ar/0x.txt...
Error when dowwloading page 



Text saved to output/deface/www.comune.airasca.to.it_cgi-b.txt
Downloading page http://www.comune.villafrancapiemonte...
Error when dowwloading page http://www.comune.villafrancapiemonte: HTTPConnectionPool(host='www.comune.villafrancapiemonte', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14839c10c0>: Failed to resolve 'www.comune.villafrancapiemonte' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.villafrancapiemonte
Downloading page http://www.comune.villarperosa.to.it/...
Text saved to output/deface/www.comune.villarperosa.to.it_.txt
Downloading page http://www.comune.vigone.to.it/cgi-bi...




Text saved to output/deface/www.comune.vigone.to.it_cgi-bi.txt
Downloading page http://www.comune.verzuolo.cn.it/cgi-...




Text saved to output/deface/www.comune.verzuolo.cn.it_cgi-.txt
Downloading page http://www.comune.venasca.cn.it/cgi-b...




Text saved to output/deface/www.comune.venasca.cn.it_cgi-b.txt
Downloading page http://www.comune.trinita.cn.it/cgi-b...




Text saved to output/deface/www.comune.trinita.cn.it_cgi-b.txt
Downloading page http://www.comune.torrepellice.to.it/...




Text saved to output/deface/www.comune.torrepellice.to.it_.txt
Downloading page http://www.comune.sommarivaperno.cn.i...
Error when dowwloading page http://www.comune.sommarivaperno.cn.i: HTTPConnectionPool(host='www.comune.sommarivaperno.cn.i', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1486a713f0>: Failed to resolve 'www.comune.sommarivaperno.cn.i' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.sommarivaperno.cn.i
Downloading page http://www.comune.sommarivadelbosco.c...
Error when dowwloading page http://www.comune.sommarivadelbosco.c: HTTPConnectionPool(host='www.comune.sommarivadelbosco.c', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1486a72e30>: Failed to resolve 'www.comune.sommarivadelbosco.c' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comu



Text saved to output/deface/www.comune.sennariolo.or.it_cg.txt
Downloading page http://www.comune.scarnafigi.cn.it/cg...




Text saved to output/deface/www.comune.scarnafigi.cn.it_cg.txt
Downloading page http://www.comune.santavittoriadalba...
Error when dowwloading page http://www.comune.santavittoriadalba: HTTPConnectionPool(host='www.comune.santavittoriadalba', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14838c64d0>: Failed to resolve 'www.comune.santavittoriadalba' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.santavittoriadalba
Downloading page http://www.comune.sanfront.cn.it/cgi-...




Text saved to output/deface/www.comune.sanfront.cn.it_cgi-.txt
Downloading page http://www.comune.sampeyre.cn.it/cgi-...




Text saved to output/deface/www.comune.sampeyre.cn.it_cgi-.txt
Downloading page http://www.comune.salzadipinerolo.to...
Error when dowwloading page http://www.comune.salzadipinerolo.to: HTTPConnectionPool(host='www.comune.salzadipinerolo.to', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e148468ef50>: Failed to resolve 'www.comune.salzadipinerolo.to' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.salzadipinerolo.to
Downloading page http://www.comune.rossana.cn.it/cgi-b...
Text saved to output/deface/www.comune.rossana.cn.it_cgi-b.txt
Downloading page http://www.comune.roccafortemondovi.c...
Error when dowwloading page http://www.comune.roccafortemondovi.c: HTTPConnectionPool(host='www.comune.roccafortemondovi.c', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e148384a560>: Failed to resolve 'ww



Text saved to output/deface/www.comune.roccabruna.cn.it_cg.txt
Downloading page http://www.comune.rifreddo.cn.it/cgi-...




Text saved to output/deface/www.comune.rifreddo.cn.it_cgi-.txt
Downloading page http://www.comune.racconigi.cn.it/cgi...




Text saved to output/deface/www.comune.racconigi.cn.it_cgi.txt
Downloading page http://www.comune.porte.to.it/cgi-bin...




Text saved to output/deface/www.comune.porte.to.it_cgi-bin.txt
Downloading page http://www.comune.pramollo.to.it/cgi-...




Text saved to output/deface/www.comune.pramollo.to.it_cgi-.txt
Downloading page http://www.comune.pomaretto.to.it/cgi...




Text saved to output/deface/www.comune.pomaretto.to.it_cgi.txt
Downloading page http://www.comune.perrero.to.it/cgi-b...




Text saved to output/deface/www.comune.perrero.to.it_cgi-b.txt
Downloading page http://www.comune.perosaargentina.to...
Error when dowwloading page http://www.comune.perosaargentina.to: HTTPConnectionPool(host='www.comune.perosaargentina.to', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483ae8a90>: Failed to resolve 'www.comune.perosaargentina.to' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.perosaargentina.to
Downloading page http://www.comune.perlo.cn.it/cgi-bin...
Text saved to output/deface/www.comune.perlo.cn.it_cgi-bin.txt
Downloading page http://www.comune.nucetto.cn.it/cgi-b...




Text saved to output/deface/www.comune.nucetto.cn.it_cgi-b.txt
Downloading page http://www.comune.moretta.cn.it/cgi-b...




Text saved to output/deface/www.comune.moretta.cn.it_cgi-b.txt
Downloading page http://www.comune.montanera.cn.it/cgi...




Text saved to output/deface/www.comune.montanera.cn.it_cgi.txt
Downloading page http://www.comune.melle.cn.it/cgi-bin...




Text saved to output/deface/www.comune.melle.cn.it_cgi-bin.txt
Downloading page http://www.comune.massello.to.it/cgi-...




Text saved to output/deface/www.comune.massello.to.it_cgi-.txt
Downloading page http://www.comune.martinianapo.cn.it/...




Text saved to output/deface/www.comune.martinianapo.cn.it_.txt
Downloading page http://www.comune.marene.cn.it/cgi-bi...
Text saved to output/deface/www.comune.marene.cn.it_cgi-bi.txt
Downloading page http://www.comune.manta.cn.it/cgi-bin...




Error when dowwloading page http://www.comune.manta.cn.it/cgi-bin: 403 Client Error: Forbidden for url: https://www.comune.manta.cn.it//cgi-bin
Failed to download page from http://www.comune.manta.cn.it/cgi-bin
Downloading page http://www.comune.lusernetta.to.it/cg...




Text saved to output/deface/www.comune.lusernetta.to.it_cg.txt
Downloading page http://www.comune.luserna.to.it/cgi-b...




Text saved to output/deface/www.comune.luserna.to.it_cgi-b.txt
Downloading page http://www.comune.lagnasco.cn.it/cgi-...




Text saved to output/deface/www.comune.lagnasco.cn.it_cgi-.txt
Downloading page http://www.comune.laloggia.to.it/cgi-...




Text saved to output/deface/www.comune.laloggia.to.it_cgi-.txt
Downloading page http://www.comune.isasca.cn.it/cgi-bi...




Text saved to output/deface/www.comune.isasca.cn.it_cgi-bi.txt
Downloading page http://www.comune.garzigliana.to.it/c...




Text saved to output/deface/www.comune.garzigliana.to.it_c.txt
Downloading page http://www.comune.gambasca.cn.it/cgi-...
Text saved to output/deface/www.comune.gambasca.cn.it_cgi-.txt
Downloading page http://www.comune.frossasco.to.it/cgi...




Text saved to output/deface/www.comune.frossasco.to.it_cgi.txt
Downloading page http://www.comune.frassino.cn.it/cgi-...




Text saved to output/deface/www.comune.frassino.cn.it_cgi-.txt
Downloading page http://www.comune.fontanettopo.vc.it/...




Text saved to output/deface/www.comune.fontanettopo.vc.it_.txt
Downloading page http://www.comune.fenestrelle.to.it/c...
Text saved to output/deface/www.comune.fenestrelle.to.it_c.txt
Downloading page http://www.comune.dianodalba.cn.it/cg...




Text saved to output/deface/www.comune.dianodalba.cn.it_cg.txt
Downloading page http://www.comune.crissolo.cn.it/cgi-...




Text saved to output/deface/www.comune.crissolo.cn.it_cgi-.txt
Downloading page http://www.comune.prali.to.it/cgi-bin...




Text saved to output/deface/www.comune.prali.to.it_cgi-bin.txt
Downloading page http://www.comune.buriasco.to.it/cgi-...




Text saved to output/deface/www.comune.buriasco.to.it_cgi-.txt
Downloading page http://www.comune.cantalupa.to.it/cgi...




Text saved to output/deface/www.comune.cantalupa.to.it_cgi.txt
Downloading page http://www.comune.castellettostura.cn...
Error when dowwloading page http://www.comune.castellettostura.cn: HTTPConnectionPool(host='www.comune.castellettostura.cn', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14837aab30>: Failed to resolve 'www.comune.castellettostura.cn' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.castellettostura.cn
Downloading page http://www.comune.cercenasco.to.it/cg...




Text saved to output/deface/www.comune.cercenasco.to.it_cg.txt
Downloading page http://www.comune.cherasco.cn.it/cgi-...




Text saved to output/deface/www.comune.cherasco.cn.it_cgi-.txt
Downloading page http://www.comune.cocconato.at.it/cgi...




Text saved to output/deface/www.comune.cocconato.at.it_cgi.txt
Downloading page http://www.comune.costigliolesaluzzo...
Error when dowwloading page http://www.comune.costigliolesaluzzo: HTTPConnectionPool(host='www.comune.costigliolesaluzzo', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483024af0>: Failed to resolve 'www.comune.costigliolesaluzzo' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.costigliolesaluzzo
Downloading page http://www.comune.baldisserodalba.cn...
Error when dowwloading page http://www.comune.baldisserodalba.cn: HTTPConnectionPool(host='www.comune.baldisserodalba.cn', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14830252d0>: Failed to resolve 'www.comune.baldisserodalba.cn' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.baldis



Text saved to output/deface/www.comune.brossasco.cn.it_cgi.txt
Downloading page http://www.comune.bellino.cn.it/cgi-b...




Text saved to output/deface/www.comune.bellino.cn.it_cgi-b.txt
Downloading page http://www.comune.borgosandalmazzo.cn...
Error when dowwloading page http://www.comune.borgosandalmazzo.cn: HTTPConnectionPool(host='www.comune.borgosandalmazzo.cn', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e148426f070>: Failed to resolve 'www.comune.borgosandalmazzo.cn' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.borgosandalmazzo.cn
Downloading page http://clr.kerala.gov.in/Err0r.html...
Error when dowwloading page http://clr.kerala.gov.in/Err0r.html: HTTPConnectionPool(host='clr.kerala.gov.in', port=80): Max retries exceeded with url: /Err0r.html (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e148426f850>, 'Connection to clr.kerala.gov.in timed out. (connect timeout=10)'))
Failed to download page from http://clr.kerala.gov.in/Err0r.html
Downloading 



Text saved to output/deface/www.juventude.com.br.txt
Downloading page http://www.chamrae.go.th/evil.txt...
Text saved to output/deface/www.chamrae.go.th_evil.txt.txt
Downloading page http://www.namphu.go.th/evil.txt...
Text saved to output/deface/www.namphu.go.th_evil.txt.txt
Downloading page http://www.bankokubon.go.th/evil.txt...
Text saved to output/deface/www.bankokubon.go.th_evil.txt.txt
Downloading page http://nangaritza.gob.ec/0x.txt...




Error when dowwloading page http://nangaritza.gob.ec/0x.txt: 404 Client Error: Not Found for url: https://nangaritza.gob.ec/0x.txt
Failed to download page from http://nangaritza.gob.ec/0x.txt
Downloading page http://ulipurmunicipality.gov.bd/back...




Error when dowwloading page http://ulipurmunicipality.gov.bd/back: 404 Client Error: Not Found for url: https://ulipurmunicipality.gov.bd/back
Failed to download page from http://ulipurmunicipality.gov.bd/back
Downloading page http://nabiganjpaurashava.gov.bd/back...




Error when dowwloading page http://nabiganjpaurashava.gov.bd/back: 404 Client Error: Not Found for url: https://nabiganjpaurashava.gov.bd/back
Failed to download page from http://nabiganjpaurashava.gov.bd/back
Downloading page http://neta.gov.et...




Text saved to output/deface/neta.gov.et.txt
Downloading page http://traditions.go.ug/87.txt...
Error when dowwloading page http://traditions.go.ug/87.txt: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Failed to download page from http://traditions.go.ug/87.txt
Downloading page http://cms.kntc.go.ke/2.txt...




Text saved to output/deface/cms.kntc.go.ke_2.txt.txt
Downloading page http://ifmis.kntc.go.ke/2.txt...
Text saved to output/deface/ifmis.kntc.go.ke_2.txt.txt
Downloading page http://muhoronisugar.kntc.go.ke/2.txt...
Text saved to output/deface/muhoronisugar.kntc.go.ke_2.txt.txt
Downloading page http://kntc.go.ke/2.txt...




Text saved to output/deface/kntc.go.ke_2.txt.txt
Downloading page http://malukubaratdayakab.go.id/zs.txt...
Error when dowwloading page http://malukubaratdayakab.go.id/zs.txt: 404 Client Error: Not Found for url: https://malukubaratdayakab.go.id/zs.txt
Failed to download page from http://malukubaratdayakab.go.id/zs.txt
Downloading page http://antimonopolio.gob.ve/87.txt...




Error when dowwloading page http://antimonopolio.gob.ve/87.txt: 404 Client Error: Not Found for url: http://antimonopolio.gob.ve/87.txt
Failed to download page from http://antimonopolio.gob.ve/87.txt
Downloading page http://www.biblioteca.comune.carignan...
Error when dowwloading page http://www.biblioteca.comune.carignan: HTTPConnectionPool(host='www.biblioteca.comune.carignan', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14869d41f0>: Failed to resolve 'www.biblioteca.comune.carignan' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.biblioteca.comune.carignan
Downloading page http://totem.comune.sangermanochisone...
Error when dowwloading page http://totem.comune.sangermanochisone: HTTPConnectionPool(host='totem.comune.sangermanochisone', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14869d4580>: 



Error when dowwloading page http://totem.comune.pinasca.to.it/alb: 404 Client Error: Not Found for url: https://totem.comune.pinasca.to.it/alb
Failed to download page from http://totem.comune.pinasca.to.it/alb
Downloading page http://totem.comune.cumiana.to.it/alb...




Error when dowwloading page http://totem.comune.cumiana.to.it/alb: 404 Client Error: Not Found for url: https://totem.comune.cumiana.to.it/alb
Failed to download page from http://totem.comune.cumiana.to.it/alb
Downloading page http://totem.comune.inversopinasca.to...
Error when dowwloading page http://totem.comune.inversopinasca.to: HTTPConnectionPool(host='totem.comune.inversopinasca.to', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14869d65f0>: Failed to resolve 'totem.comune.inversopinasca.to' ([Errno -2] Name or service not known)"))
Failed to download page from http://totem.comune.inversopinasca.to
Downloading page http://totem.comune.carignano.to.it/a...
Error when dowwloading page http://totem.comune.carignano.to.it/a: 404 Client Error: Not Found for url: http://totem.comune.carignano.to.it/a
Failed to download page from http://totem.comune.carignano.to.it/a
Downloading page http://totem.comune.barbar



Error when dowwloading page http://comune.peveragno.cn.it/alba.txt: 404 Client Error: Not Found for url: https://comune.peveragno.cn.it/alba.txt
Failed to download page from http://comune.peveragno.cn.it/alba.txt
Downloading page http://www.comune.morozzo.cn.it/alba.txt...




Error when dowwloading page http://www.comune.morozzo.cn.it/alba.txt: 404 Client Error: Not Found for url: https://www.comune.morozzo.cn.it/alba.txt
Failed to download page from http://www.comune.morozzo.cn.it/alba.txt
Downloading page http://turismo.comune.benevagienna.cn...
Error when dowwloading page http://turismo.comune.benevagienna.cn: HTTPConnectionPool(host='turismo.comune.benevagienna.cn', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1484f7c100>: Failed to resolve 'turismo.comune.benevagienna.cn' ([Errno -2] Name or service not known)"))
Failed to download page from http://turismo.comune.benevagienna.cn
Downloading page http://www.munilayo.gob.pe/87.txt...
Error when dowwloading page http://www.munilayo.gob.pe/87.txt: HTTPConnectionPool(host='www.munilayo.gob.pe', port=80): Max retries exceeded with url: /87.txt (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1484f7d



Error when dowwloading page http://comune.vottignasco.cn.it/alba.txt: 404 Client Error: Not Found for url: https://www.comune.vottignasco.cn.it/alba.txt
Failed to download page from http://comune.vottignasco.cn.it/alba.txt
Downloading page http://comune.villafalletto.cn.it/alb...




Text saved to output/deface/comune.villafalletto.cn.it_alb.txt
Downloading page http://www.comune.scalenghe.to.it/alb...




Error when dowwloading page http://www.comune.scalenghe.to.it/alb: 404 Client Error: Not Found for url: https://www.comune.scalenghe.to.it/alb
Failed to download page from http://www.comune.scalenghe.to.it/alb
Downloading page http://www.comune.vicoforte.cn.it/alb...




Text saved to output/deface/www.comune.vicoforte.cn.it_alb.txt
Downloading page http://comune.torresangiorgio.cn.it/a...




Error when dowwloading page http://comune.torresangiorgio.cn.it/a: 404 Client Error: Not Found for url: https://comune.torresangiorgio.cn.it/a
Failed to download page from http://comune.torresangiorgio.cn.it/a
Downloading page http://comune.villanova-mondovi.cn.it...




Text saved to output/deface/comune.villanova-mondovi.cn.it.txt
Downloading page http://comune.piasco.cn.it/alba.txt...




Text saved to output/deface/comune.piasco.cn.it_alba.txt.txt
Downloading page http://www.comune.roccavione.cn.it/al...




Error when dowwloading page http://www.comune.roccavione.cn.it/al: 404 Client Error: Not Found for url: https://www.comune.roccavione.cn.it/al
Failed to download page from http://www.comune.roccavione.cn.it/al
Downloading page http://comune.barbaresco.cn.it/alba.txt...




Error when dowwloading page http://comune.barbaresco.cn.it/alba.txt: 404 Client Error: Not Found for url: https://comune.barbaresco.cn.it/alba.txt
Failed to download page from http://comune.barbaresco.cn.it/alba.txt
Downloading page http://comune.inversopinasca.to.it/al...




Error when dowwloading page http://comune.inversopinasca.to.it/al: 404 Client Error: Not Found for url: https://comune.inversopinasca.to.it/al
Failed to download page from http://comune.inversopinasca.to.it/al
Downloading page http://www.comune.paesana.cn.it/alba.txt...




Error when dowwloading page http://www.comune.paesana.cn.it/alba.txt: 404 Client Error: Not Found for url: https://www.comune.paesana.cn.it/alba.txt
Failed to download page from http://www.comune.paesana.cn.it/alba.txt
Downloading page http://comune.cavallermaggiore.cn.it/...




Text saved to output/deface/comune.cavallermaggiore.cn.it_.txt
Downloading page http://www.comune.occhieppo-inferiore...
Error when dowwloading page http://www.comune.occhieppo-inferiore: HTTPConnectionPool(host='www.comune.occhieppo-inferiore', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1484255900>: Failed to resolve 'www.comune.occhieppo-inferiore' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.comune.occhieppo-inferiore
Downloading page http://www.comune.castellar.cn.it/alb...
Text saved to output/deface/www.comune.castellar.cn.it_alb.txt
Downloading page http://www.comune.pagno.cn.it/alba.txt...




Text saved to output/deface/www.comune.pagno.cn.it_alba.txt.txt
Downloading page http://municipalidad.gualeguay.gob.ar...




Text saved to output/deface/municipalidad.gualeguay.gob.ar.txt
Downloading page http://fmckeffi.gov.ng...
Error when dowwloading page http://fmckeffi.gov.ng: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Failed to download page from http://fmckeffi.gov.ng
Downloading page http://elcomunicador.scspr.gob.gt...
Error when dowwloading page http://elcomunicador.scspr.gob.gt: 403 Client Error: Forbidden for url: http://elcomunicador.scspr.gob.gt/
Failed to download page from http://elcomunicador.scspr.gob.gt
Downloading page http://qr.scspr.gob.gt...
Error when dowwloading page http://qr.scspr.gob.gt: 403 Client Error: Forbidden for url: http://qr.scspr.gob.gt/
Failed to download page from http://qr.scspr.gob.gt
Downloading page http://serviciosgenerales.scspr.gob.gt...
Text saved to output/deface/serviciosgenerales.scspr.gob.gt.txt
Downloading page http://tlacojalpan.gob.mx/back.txt...




Error when dowwloading page http://tlacojalpan.gob.mx/back.txt: 404 Client Error: Not Found for url: https://tlacojalpan.gob.mx/back.txt
Failed to download page from http://tlacojalpan.gob.mx/back.txt
Downloading page http://confluencias.go.cr/tigan.html...
Error when dowwloading page http://confluencias.go.cr/tigan.html: 403 Client Error: Forbidden for url: http://confluencias.go.cr/tigan.html
Failed to download page from http://confluencias.go.cr/tigan.html
Downloading page http://cgb.gov.co/0x.txt...
Error when dowwloading page http://cgb.gov.co/0x.txt: 404 Client Error: Not Found for url: http://cgb.gov.co/0x.txt
Failed to download page from http://cgb.gov.co/0x.txt
Downloading page http://nsrmea.gov.ng/0x.txt...




Text saved to output/deface/nsrmea.gov.ng_0x.txt.txt
Downloading page http://feriacervecera.gualeguay.gob.a...
Error when dowwloading page http://feriacervecera.gualeguay.gob.a: HTTPConnectionPool(host='feriacervecera.gualeguay.gob.a', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14838d7e20>: Failed to resolve 'feriacervecera.gualeguay.gob.a' ([Errno -2] Name or service not known)"))
Failed to download page from http://feriacervecera.gualeguay.gob.a
Downloading page http://corso.gualeguay.gob.ar/index.html...
Error when dowwloading page http://corso.gualeguay.gob.ar/index.html: 404 Client Error: Not Found for url: http://corso.gualeguay.gob.ar/index.html
Failed to download page from http://corso.gualeguay.gob.ar/index.html
Downloading page http://concejo.gualeguay.gob.ar/index...
Error when dowwloading page http://concejo.gualeguay.gob.ar/index: 404 Client Error: Not Found for url: http://concejo.gualeguay.g



No valid text from http://gualeguay.gob.ar/index.html
Downloading page http://defensorsantiago.gob.ar/0x.txt...
Error when dowwloading page http://defensorsantiago.gob.ar/0x.txt: 404 Client Error: Not Found for url: http://defensorsantiago.gob.ar/0x.txt
Failed to download page from http://defensorsantiago.gob.ar/0x.txt
Downloading page http://www.banmaeka.go.th...
Error when dowwloading page http://www.banmaeka.go.th: 403 Client Error: Forbidden for url: http://www.banmaeka.go.th/
Failed to download page from http://www.banmaeka.go.th
Downloading page http://thepnimit-kpp.go.th/index.php...
Text saved to output/deface/thepnimit-kpp.go.th_index.php.txt
Downloading page http://psweps.gov.gh/0x.txt...
Text saved to output/deface/psweps.gov.gh_0x.txt.txt
Downloading page http://phomakkhaeng.go.th...




Text saved to output/deface/phomakkhaeng.go.th.txt
Downloading page http://archivonacional.gob.pa...
Error when dowwloading page http://archivonacional.gob.pa: HTTPConnectionPool(host='archivonacional.gob.pa', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e1484606440>, 'Connection to archivonacional.gob.pa timed out. (connect timeout=10)'))
Failed to download page from http://archivonacional.gob.pa
Downloading page http://loterianacional.gob.do...




Error when dowwloading page http://loterianacional.gob.do: 406 Client Error: Not Acceptable for url: https://loterianacional.gob.do/
Failed to download page from http://loterianacional.gob.do
Downloading page http://ict.foreign.gov.ly...
Error when dowwloading page http://ict.foreign.gov.ly: HTTPConnectionPool(host='ict.foreign.gov.ly', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1484605c30>: Failed to resolve 'ict.foreign.gov.ly' ([Errno -2] Name or service not known)"))
Failed to download page from http://ict.foreign.gov.ly
Downloading page http://www.email.lmac.gov.ly...
Error when dowwloading page http://www.email.lmac.gov.ly: HTTPConnectionPool(host='www.email.lmac.gov.ly', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1484605750>: Failed to resolve 'www.email.lmac.gov.ly' ([Errno -2] Name or service not known)"))
Failed to d



Error when dowwloading page http://www.acessibilidade.gov.pt/acce: 404 Client Error: Not Found for url: https://www.acessibilidade.gov.pt:443/acce
Failed to download page from http://www.acessibilidade.gov.pt/acce
Downloading page http://www.sabha.gov.ly...
Error when dowwloading page http://www.sabha.gov.ly: HTTPConnectionPool(host='www.sabha.gov.ly', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14846048e0>: Failed to resolve 'www.sabha.gov.ly' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.sabha.gov.ly
Downloading page http://audit.gov.ly/home/...




Error when dowwloading page http://audit.gov.ly/home/: 403 Client Error: Forbidden for url: https://audit.gov.ly/home/
Failed to download page from http://audit.gov.ly/home/
Downloading page http://tourism.gov.ly/site/...




Error when dowwloading page http://tourism.gov.ly/site/: 404 Client Error: Not Found for url: https://tourism.gov.ly/site/
Failed to download page from http://tourism.gov.ly/site/
Downloading page http://libembse.foreign.gov.ly...
Error when dowwloading page http://libembse.foreign.gov.ly: HTTPConnectionPool(host='libembse.foreign.gov.ly', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14846066e0>: Failed to resolve 'libembse.foreign.gov.ly' ([Errno -2] Name or service not known)"))
Failed to download page from http://libembse.foreign.gov.ly
Downloading page http://agajk.gov.pk...




Error when dowwloading page http://agajk.gov.pk: 403 Client Error: Forbidden for url: https://agajk.gov.pk/
Failed to download page from http://agajk.gov.pk
Downloading page http://mirzapurpourashava.gov.bd...




Text saved to output/deface/mirzapurpourashava.gov.bd.txt
Downloading page http://concejocuruzucuatia.gob.ar/pro...
Error when dowwloading page http://concejocuruzucuatia.gob.ar/pro: 500 Server Error: Internal Server Error for url: http://concejocuruzucuatia.gob.ar/pro
Failed to download page from http://concejocuruzucuatia.gob.ar/pro
Downloading page http://pusdikkowad.kodiklat-tniad.mil...
Error when dowwloading page http://pusdikkowad.kodiklat-tniad.mil: HTTPConnectionPool(host='pusdikkowad.kodiklat-tniad.mil', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e148406d000>: Failed to resolve 'pusdikkowad.kodiklat-tniad.mil' ([Errno -2] Name or service not known)"))
Failed to download page from http://pusdikkowad.kodiklat-tniad.mil
Downloading page http://pn-polewali.go.id/proof.txt...
Error when dowwloading page http://pn-polewali.go.id/proof.txt: 404 Client Error: Not Found for url: http://pn-polewali.go.id/pr



Error when dowwloading page http://ms-blangpidie.go.id/proof.txt: 404 Client Error: Artikel tidak ditemukan for url: https://ms-blangpidie.go.id/proof.txt
Failed to download page from http://ms-blangpidie.go.id/proof.txt
Downloading page http://bpkad.sultengprov.go.id/proof.txt...
Error when dowwloading page http://bpkad.sultengprov.go.id/proof.txt: 404 Client Error: Not Found for url: http://bpkad.sultengprov.go.id/proof.txt
Failed to download page from http://bpkad.sultengprov.go.id/proof.txt
Downloading page http://meteo.gov.lk/proof.txt...




Text saved to output/deface/meteo.gov.lk_proof.txt.txt
Downloading page http://bcbontang.beacukai.go.id/evil...
Error when dowwloading page http://bcbontang.beacukai.go.id/evil: HTTPConnectionPool(host='bcbontang.beacukai.go.id', port=80): Max retries exceeded with url: /evil (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7e1483eadb10>: Failed to establish a new connection: [Errno 111] Connection refused'))
Failed to download page from http://bcbontang.beacukai.go.id/evil
Downloading page http://asistencia.gobernacionnapo.gob...
Error when dowwloading page http://asistencia.gobernacionnapo.gob: HTTPConnectionPool(host='asistencia.gobernacionnapo.gob', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483eae2f0>: Failed to resolve 'asistencia.gobernacionnapo.gob' ([Errno -2] Name or service not known)"))
Failed to download page from http://asistencia.gobernacionnapo.gob
Downloading 



Text saved to output/deface/prueba.gobernacionnapo.gob.ec_.txt
Downloading page http://registros.gobernacionnapo.gob...
Error when dowwloading page http://registros.gobernacionnapo.gob: HTTPConnectionPool(host='registros.gobernacionnapo.gob', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483eafaf0>: Failed to resolve 'registros.gobernacionnapo.gob' ([Errno -2] Name or service not known)"))
Failed to download page from http://registros.gobernacionnapo.gob
Downloading page http://soporte.gobernacionnapo.gob.ec...




Text saved to output/deface/soporte.gobernacionnapo.gob.ec.txt
Downloading page http://teletrabajo.gobernacionnapo.go...
Error when dowwloading page http://teletrabajo.gobernacionnapo.go: HTTPConnectionPool(host='teletrabajo.gobernacionnapo.go', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483eae170>: Failed to resolve 'teletrabajo.gobernacionnapo.go' ([Errno -2] Name or service not known)"))
Failed to download page from http://teletrabajo.gobernacionnapo.go
Downloading page http://spp.swdajk.gov.pk/kurd.html...
Error when dowwloading page http://spp.swdajk.gov.pk/kurd.html: 404 Client Error: Not Found for url: http://spp.swdajk.gov.pk/kurd.html
Failed to download page from http://spp.swdajk.gov.pk/kurd.html
Downloading page http://drr12.drr.go.th/87.txt...
Error when dowwloading page http://drr12.drr.go.th/87.txt: HTTPConnectionPool(host='drr12.drr.go.th', port=80): Max retries exceeded with url: /87.txt (



Error when dowwloading page http://gobernacionnapo.gob.ec/kurd.html: 404 Client Error: Not Found for url: https://gobernacionnapo.gob.ec/kurd.html
Failed to download page from http://gobernacionnapo.gob.ec/kurd.html
Downloading page http://wonosari-tekung.lumajangkab.go.id...




Failed to download page from http://wonosari-tekung.lumajangkab.go.id
Downloading page http://www.indembassysweden.gov.in...




Text saved to output/deface/www.indembassysweden.gov.in.txt
Downloading page http://www.indianembassybaku.gov.in...




Text saved to output/deface/www.indianembassybaku.gov.in.txt
Downloading page http://ssc.mof.gov.vn/acms/ckfinder/a...




Error when dowwloading page http://ssc.mof.gov.vn/acms/ckfinder/a: 404 Client Error: Not Found for url: https://ssc.gov.vn/acms/ckfinder/a
Failed to download page from http://ssc.mof.gov.vn/acms/ckfinder/a
Downloading page http://ssc.gov.vn/acms/ckfinder/anony...




Error when dowwloading page http://ssc.gov.vn/acms/ckfinder/anony: 404 Client Error: Not Found for url: https://ssc.gov.vn/acms/ckfinder/anony
Failed to download page from http://ssc.gov.vn/acms/ckfinder/anony
Downloading page http://abuja.mirco.gov.na/saudi.html...
Error when dowwloading page http://abuja.mirco.gov.na/saudi.html: 503 Server Error: Service Temporarily Unavailable for url: http://abuja.mirco.gov.na/saudi.html
Failed to download page from http://abuja.mirco.gov.na/saudi.html
Downloading page http://luanda.mirco.gov.na...




Error when dowwloading page http://luanda.mirco.gov.na: 503 Server Error: Service Temporarily Unavailable for url: https://luanda.mirco.gov.na/
Failed to download page from http://luanda.mirco.gov.na
Downloading page http://dakar.mirco.gov.na/518.html...
Error when dowwloading page http://dakar.mirco.gov.na/518.html: 503 Server Error: Service Temporarily Unavailable for url: http://dakar.mirco.gov.na/518.html
Failed to download page from http://dakar.mirco.gov.na/518.html
Downloading page http://brazzaville.mirco.gov.na/518.html...
Error when dowwloading page http://brazzaville.mirco.gov.na/518.html: 503 Server Error: Service Temporarily Unavailable for url: http://brazzaville.mirco.gov.na/518.html
Failed to download page from http://brazzaville.mirco.gov.na/518.html
Downloading page http://www.accra.mirco.gov.na/518.html...
Error when dowwloading page http://www.accra.mirco.gov.na/518.html: 404 Client Error: Not Found for url: http://accra.mirco.gov.na/518.html
Failed to download page



Error when dowwloading page http://encuestas.senda.gob.cl/0x.txt: 404 Client Error: Not Found for url: https://encuestas.senda.gob.cl/0x.txt
Failed to download page from http://encuestas.senda.gob.cl/0x.txt
Downloading page http://sistemas.senda.gob.cl/0x.txt...




Error when dowwloading page http://sistemas.senda.gob.cl/0x.txt: 404 Client Error: Not Found for url: https://sistemas.senda.gob.cl/0x.txt
Failed to download page from http://sistemas.senda.gob.cl/0x.txt
Downloading page http://finance.sec40.go.th/will.html...
Error when dowwloading page http://finance.sec40.go.th/will.html: 404 Client Error: Not Found for url: http://finance.sec40.go.th/will.html
Failed to download page from http://finance.sec40.go.th/will.html
Downloading page http://www.baankhong.go.th...
Text saved to output/deface/www.baankhong.go.th.txt
Downloading page http://www.mcil.gov.ws/hook.htm...




Error when dowwloading page http://www.mcil.gov.ws/hook.htm: 404 Client Error: Not Found for url: https://www.mcil.gov.ws/hook.htm
Failed to download page from http://www.mcil.gov.ws/hook.htm
Downloading page http://www.camarasjriopardo.sp.gov.br...




Text saved to output/deface/www.camarasjriopardo.sp.gov.br.txt
Downloading page http://www.muqui.es.gov.br...
Error when dowwloading page http://www.muqui.es.gov.br: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
Failed to download page from http://www.muqui.es.gov.br
Downloading page http://www.camaracerrogrande.rs.gov.br...




Error when dowwloading page http://www.camaracerrogrande.rs.gov.br: 520 Server Error:  for url: https://cerrogrande.rs.leg.br/
Failed to download page from http://www.camaracerrogrande.rs.gov.br
Downloading page http://www.camarabrasilnovo.com.br...
Error when dowwloading page http://www.camarabrasilnovo.com.br: HTTPConnectionPool(host='www.camarabrasilnovo.com.br', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1482654850>: Failed to resolve 'www.camarabrasilnovo.com.br' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.camarabrasilnovo.com.br
Downloading page http://www.cmxambre.pr.gov.br...
Error when dowwloading page http://www.cmxambre.pr.gov.br: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
Failed to download page from http://www.cmxambre.pr.gov.br
Downloading page http://www.camaratrabiju.sp.gov.br...




Error when dowwloading page http://www.camaratrabiju.sp.gov.br: 520 Server Error:  for url: https://trabiju.sp.leg.br/
Failed to download page from http://www.camaratrabiju.sp.gov.br
Downloading page http://www.moju.pa.gov.br...




Text saved to output/deface/www.moju.pa.gov.br.txt
Downloading page http://crepn.gov.mz/Elz.html...
Error when dowwloading page http://crepn.gov.mz/Elz.html: HTTPConnectionPool(host='crepn.gov.mz', port=80): Max retries exceeded with url: /Elz.html (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e14826212a0>, 'Connection to crepn.gov.mz timed out. (connect timeout=10)'))
Failed to download page from http://crepn.gov.mz/Elz.html
Downloading page http://crepm.gov.mz/Elz.html...
Error when dowwloading page http://crepm.gov.mz/Elz.html: HTTPConnectionPool(host='crepm.gov.mz', port=80): Max retries exceeded with url: /Elz.html (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e1482620ac0>, 'Connection to crepm.gov.mz timed out. (connect timeout=10)'))
Failed to download page from http://crepm.gov.mz/Elz.html
Downloading page http://crepg.gov.mz/Elz.html...
Error when dowwloading page http://crepg.gov.mz/Elz.html: HTTPConnectionPool



Text saved to output/deface/pertanian.lomboktengahkab.go.id.txt
Downloading page http://karimunkab.go.id...
Text saved to output/deface/karimunkab.go.id.txt
Downloading page http://testdomain23.gov.zw/ZED.htm...




Error when dowwloading page http://testdomain23.gov.zw/ZED.htm: 404 Client Error: Not Found for url: https://testdomain23.gov.zw/ZED.htm
Failed to download page from http://testdomain23.gov.zw/ZED.htm
Downloading page http://video.nema.gov.mn/ZED.htm...
Error when dowwloading page http://video.nema.gov.mn/ZED.htm: 404 Client Error: Not Found for url: http://video.nema.gov.mn/ZED.htm
Failed to download page from http://video.nema.gov.mn/ZED.htm
Downloading page http://doh.go.th...




Text saved to output/deface/doh.go.th.txt
Downloading page http://cvl.gov.np...
Error when dowwloading page http://cvl.gov.np: 404 Client Error: Not Found for url: http://cvl.gov.np/
Failed to download page from http://cvl.gov.np
Downloading page http://satudata.pagaralamkota.go.id/d...
Error when dowwloading page http://satudata.pagaralamkota.go.id/d: 404 Client Error: Not Found for url: http://satudata.pagaralamkota.go.id/d
Failed to download page from http://satudata.pagaralamkota.go.id/d
Downloading page http://satuadmin.pagaralamkota.go.id/...
Text saved to output/deface/satuadmin.pagaralamkota.go.id_.txt
Downloading page http://webdev.pagaralamkota.go.id/dua...
Text saved to output/deface/webdev.pagaralamkota.go.id_dua.txt
Downloading page http://www.pagaralamkota.go.id/duar.txt...
Error when dowwloading page http://www.pagaralamkota.go.id/duar.txt: 404 Client Error: Not Found for url: http://www.pagaralamkota.go.id/duar.txt
Failed to download page from http://www.pagaralamkota.g



Text saved to output/deface/www.potipisan.go.th.txt
Downloading page http://www.naphokusuman.go.th...




Text saved to output/deface/www.naphokusuman.go.th.txt
Downloading page http://bappeda.belitung.go.id/SMok.txt...
Error when dowwloading page http://bappeda.belitung.go.id/SMok.txt: HTTPConnectionPool(host='lamanlabuh.aduankonten.id', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e14835b0e80>, 'Connection to lamanlabuh.aduankonten.id timed out. (connect timeout=10)'))
Failed to download page from http://bappeda.belitung.go.id/SMok.txt
Downloading page http://dikbud.belitung.go.id/SMok.txt...




Error when dowwloading page http://dikbud.belitung.go.id/SMok.txt: 404 Client Error: Not Found for url: https://dikbud.belitung.go.id/SMok.txt
Failed to download page from http://dikbud.belitung.go.id/SMok.txt
Downloading page http://pn-jakartatimur.go.id/images/d...
Error when dowwloading page http://pn-jakartatimur.go.id/images/d: 403 Client Error: Forbidden for url: https://pn-jakartatimur.go.id/images/d
Failed to download page from http://pn-jakartatimur.go.id/images/d
Downloading page http://zpnetrokona.gov.bd/and.txt...




Error when dowwloading page http://zpnetrokona.gov.bd/and.txt: 500 Server Error: Internal Server Error for url: https://zpnetrokona.gov.bd/and.txt
Failed to download page from http://zpnetrokona.gov.bd/and.txt
Downloading page http://www.jdih.pa-pasirpengaraian.go...
Error when dowwloading page http://www.jdih.pa-pasirpengaraian.go: HTTPConnectionPool(host='www.jdih.pa-pasirpengaraian.go', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14835b0e50>: Failed to resolve 'www.jdih.pa-pasirpengaraian.go' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.jdih.pa-pasirpengaraian.go
Downloading page http://sipp.pa-pasirpengaraian.go.id/...
Error when dowwloading page http://sipp.pa-pasirpengaraian.go.id/: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Failed to download page from http://sipp.pa-pasirpengaraian.go.id/
Downloading page http://d



Error when dowwloading page http://sidapedes.cilacapkab.go.id: HTTPSConnectionPool(host='sidapedes.cilacapkab.go.id', port=443): Read timed out. (read timeout=10)
Failed to download page from http://sidapedes.cilacapkab.go.id
Downloading page http://mycc_staging.jkm.gov.my/duar.txt...
Error when dowwloading page http://mycc_staging.jkm.gov.my/duar.txt: HTTPConnectionPool(host='mycc_staging.jkm.gov.my', port=80): Max retries exceeded with url: /duar.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e14835b1180>, 'Connection to mycc_staging.jkm.gov.my timed out. (connect timeout=10)'))
Failed to download page from http://mycc_staging.jkm.gov.my/duar.txt
Downloading page http://private-rpjmn.bappenas.go.id/r...
Error when dowwloading page http://private-rpjmn.bappenas.go.id/r: 404 Client Error: Not Found for url: https://private-rpjmn.bappenas.go.id/r
Failed to download page from http://private-rpjmn.bappenas.go.id/r
Downloading page http://nghiadan.kimdon



Error when dowwloading page http://www.jdih.pa-sukoharjo.go.id/ka: HTTPConnectionPool(host='www.jdih.pa-sukoharjo.go.id', port=80): Max retries exceeded with url: /ka (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483ddffa0>: Failed to resolve 'www.jdih.pa-sukoharjo.go.id' ([Errno -2] Name or service not known)"))
Failed to download page from http://www.jdih.pa-sukoharjo.go.id/ka
Downloading page http://serasi.pa-sukoharjo.go.id/kapa...
Error when dowwloading page http://serasi.pa-sukoharjo.go.id/kapa: HTTPConnectionPool(host='serasi.pa-sukoharjo.go.id', port=80): Max retries exceeded with url: /kapa (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14835b2410>: Failed to resolve 'serasi.pa-sukoharjo.go.id' ([Errno -2] Name or service not known)"))
Failed to download page from http://serasi.pa-sukoharjo.go.id/kapa
Downloading page http://simasjid.pa-sukoharjo.go.id/ka...
Error when dowwloading page http://simasjid.pa-suk



Text saved to output/deface/sakarat.go.th_doc_1729433555.txt
Downloading page http://www.changtong.go.th/doc/172943...




Error when dowwloading page http://www.changtong.go.th/doc/172943: 404 Client Error: Not Found for url: https://www.changtong.go.th/doc/172943
Failed to download page from http://www.changtong.go.th/doc/172943
Downloading page http://nongnguluam.go.th/doc/1729459641...




Text saved to output/deface/nongnguluam.go.th_doc_1729459641.txt
Downloading page http://pnst4.go.th...
Error when dowwloading page http://pnst4.go.th: HTTPConnectionPool(host='pnst4.go.th', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483ddfbe0>: Failed to resolve 'pnst4.go.th' ([Errno -2] Name or service not known)"))
Failed to download page from http://pnst4.go.th
Downloading page http://ouvidoria.apiai.sp.gov.br/avar...




Error when dowwloading page http://ouvidoria.apiai.sp.gov.br/avar: 404 Client Error: Not Found for url: https://ouvidoria.apiai.sp.gov.br/avar
Failed to download page from http://ouvidoria.apiai.sp.gov.br/avar
Downloading page http://memorando.ribeiraogrande.sp.go...
Error when dowwloading page http://memorando.ribeiraogrande.sp.go: HTTPConnectionPool(host='memorando.ribeiraogrande.sp.go', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14835b04c0>: Failed to resolve 'memorando.ribeiraogrande.sp.go' ([Errno -2] Name or service not known)"))
Failed to download page from http://memorando.ribeiraogrande.sp.go
Downloading page http://wmsc.rid.go.th/images/kapanlag...




Error when dowwloading page http://wmsc.rid.go.th/images/kapanlag: 404 Client Error: Not Found for url: https://wmsc.rid.go.th/images/kapanlag
Failed to download page from http://wmsc.rid.go.th/images/kapanlag
Downloading page http://pa-kabmadiun.go.id/images/kapa...
Error when dowwloading page http://pa-kabmadiun.go.id/images/kapa: HTTPConnectionPool(host='www.yourjoomlawebsite.com', port=80): Max retries exceeded with url: /404.html (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14835b1180>: Failed to resolve 'www.yourjoomlawebsite.com' ([Errno -2] Name or service not known)"))
Failed to download page from http://pa-kabmadiun.go.id/images/kapa
Downloading page http://selumakab.go.id/duar.txt...
Error when dowwloading page http://selumakab.go.id/duar.txt: 404 Client Error: Not Found for url: https://selumakab.go.id/duar.txt
Failed to download page from http://selumakab.go.id/duar.txt
Downloading page http://sis.gov.eg/0x.txt...




Failed to download page from http://sis.gov.eg/0x.txt
Downloading page http://camarariqueza.sc.gov.br...
Text saved to output/deface/camarariqueza.sc.gov.br.txt
Downloading page http://sco.gov.et/back.txt...




Error when dowwloading page http://sco.gov.et/back.txt: 403 Client Error: Forbidden for url: https://sco.gov.et/back.txt
Failed to download page from http://sco.gov.et/back.txt
Downloading page http://www.vialidadsalta.gob.ar/hook.htm...
Error when dowwloading page http://www.vialidadsalta.gob.ar/hook.htm: HTTPConnectionPool(host='www.vialidadsalta.gob.ar', port=80): Max retries exceeded with url: /hook.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e14838292d0>, 'Connection to www.vialidadsalta.gob.ar timed out. (connect timeout=10)'))
Failed to download page from http://www.vialidadsalta.gob.ar/hook.htm
Downloading page http://thakasuem.go.th/captainsmok3r.jpg...
Error when dowwloading page http://thakasuem.go.th/captainsmok3r.jpg: 404 Client Error: Not Found for url: http://thakasuem.go.th/captainsmok3r.jpg
Failed to download page from http://thakasuem.go.th/captainsmok3r.jpg
Downloading page http://nongian.go.th/captainsmok3r.jpg...
Error when do



Error when dowwloading page http://cminhapim.mg.gov.br/mrz.php: 404 Client Error: Not Found for url: https://cminhapim.mg.gov.br/mrz.php
Failed to download page from http://cminhapim.mg.gov.br/mrz.php
Downloading page http://oscp.tacloban.gov.ph/mrz.html...
Error when dowwloading page http://oscp.tacloban.gov.ph/mrz.html: HTTPConnectionPool(host='oscp.tacloban.gov.ph', port=80): Max retries exceeded with url: /mrz.html (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e1483829d80>: Failed to resolve 'oscp.tacloban.gov.ph' ([Errno -5] No address associated with hostname)"))
Failed to download page from http://oscp.tacloban.gov.ph/mrz.html
Downloading page http://computerization.mhada.gov.in/m...




Error when dowwloading page http://computerization.mhada.gov.in/m: 404 Client Error: Not Found for url: https://computerization.mhada.gov.in:443/m
Failed to download page from http://computerization.mhada.gov.in/m
Downloading page http://paro12.dnp.go.th...




Text saved to output/deface/paro12.dnp.go.th.txt
Downloading page http://songkhwae.nan.police.go.th...
Error when dowwloading page http://songkhwae.nan.police.go.th: 502 Server Error: Bad Gateway for url: http://songkhwae.nan.police.go.th/
Failed to download page from http://songkhwae.nan.police.go.th
Downloading page http://munisanantoniodeputina.gob.pe/...
Error when dowwloading page http://munisanantoniodeputina.gob.pe/: HTTPConnectionPool(host='munisanantoniodeputina.gob.pe', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e14844655d0>: Failed to resolve 'munisanantoniodeputina.gob.pe' ([Errno -2] Name or service not known)"))
Failed to download page from http://munisanantoniodeputina.gob.pe/
Downloading page http://siga.valenca.ba.gov.br...
Error when dowwloading page http://siga.valenca.ba.gov.br: 404 Client Error: Not Found for url: http://siga.valenca.ba.gov.br/
Failed to download page from http://siga.v



Text saved to output/deface/bbdmp.gov.np.txt
Downloading page http://empalmevillaconstitucion.gob.ar...




Text saved to output/deface/empalmevillaconstitucion.gob.ar.txt
Downloading page http://durande.mg.gov.br/mrz.php...




Error when dowwloading page http://durande.mg.gov.br/mrz.php: 404 Client Error: Not Found for url: https://durande.mg.gov.br/mrz.php
Failed to download page from http://durande.mg.gov.br/mrz.php
Downloading page http://www.kukarkab.go.id/asu.txt...
Error when dowwloading page http://www.kukarkab.go.id/asu.txt: HTTPConnectionPool(host='www.kukarkab.go.id', port=80): Max retries exceeded with url: /asu.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e148406a740>, 'Connection to www.kukarkab.go.id timed out. (connect timeout=10)'))
Failed to download page from http://www.kukarkab.go.id/asu.txt
Downloading page http://dgaie.gov.bf/kurd.html...




Error when dowwloading page http://dgaie.gov.bf/kurd.html: HTTPSConnectionPool(host='dgaie.gov.bf', port=443): Read timed out. (read timeout=10)
Failed to download page from http://dgaie.gov.bf/kurd.html


#Text Preprocessing & TF-IDF Calculation

In [None]:
import os
import csv
import re
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Path to the folders containing the downloaded files
output_folder = '/content/drive/MyDrive/output'
safe_folder = os.path.join(output_folder, 'safe')
deface_folder = os.path.join(output_folder, 'deface')

# Function to get all .txt files in a folder
def get_text_files_from_folder(folder):
    text_files = []
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith('.txt'):  # Check if the file is a .txt file
                text_files.append(os.path.join(root, file))
    return text_files

# Preprocessing the text: removing punctuation and converting to lowercase
def preprocess_text(text):
    text = text.lower()  # Convert all to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove all punctuation
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    return text

# Read the text files from the "safe" and "deface" folders
safe_documents = get_text_files_from_folder(safe_folder)
deface_documents = get_text_files_from_folder(deface_folder)

# Preprocess the text and assign labels
all_documents = []
labels = []

print("Processing the documents...")

# Read and preprocess documents from the "safe" folder
for doc in safe_documents:
    with open(doc, 'r', encoding='utf-8') as f:
        text = f.read()  # Read the content of the .txt file
        processed_text = preprocess_text(text)  # Preprocess the text
        all_documents.append(processed_text)
        labels.append(0)  # Label for "safe" is 0

# Read and preprocess documents from the "deface" folder
for doc in deface_documents:
    with open(doc, 'r', encoding='utf-8') as f:
        text = f.read()  # Read the content of the .txt file
        processed_text = preprocess_text(text)  # Preprocess the text
        all_documents.append(processed_text)
        labels.append(1)  # Label for "deface" is 1

# Calculate TF-IDF for all the documents
print("Calculating TF-IDF...")
vectorizer = TfidfVectorizer(max_features=300, ngram_range=(1, 2), stop_words=None)  # Using unigrams and bigrams
tfidf_matrix = vectorizer.fit_transform(all_documents)

# Convert the sparse (CSR) matrix to a pandas DataFrame
X_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())

Processing the documents...
Calculating TF-IDF...


#TRAIN MODELS

In [None]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.13-py2.py3-none-any.whl.metadata (12 kB)
Downloading lazypredict-0.2.13-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.13


In [None]:
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyClassifier  # Using LazyPredict
from sklearn.metrics import accuracy_score
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_df, labels, test_size=0.2, random_state=15)

# Use LazyPredict to try different machine learning models
print("Using LazyPredict to train models...")
clf = LazyClassifier()
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

# Print the results from LazyPredict
print(models)  # Display the results of models with their accuracies

# Save the predictions to a CSV file
predictions.to_csv('lazy_predictions.csv', index=False)


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



Using LazyPredict to train models...


 97%|█████████▋| 30/31 [00:28<00:00,  4.49it/s]

[LightGBM] [Info] Number of positive: 250, number of negative: 862
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003699 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18855
[LightGBM] [Info] Number of data points in the train set: 1112, number of used features: 283
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.224820 -> initscore=-1.237794
[LightGBM] [Info] Start training from score -1.237794


100%|██████████| 31/31 [00:29<00:00,  1.06it/s]

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
RandomForestClassifier             0.94               0.89     0.89      0.94   
PassiveAggressiveClassifier        0.94               0.89     0.89      0.94   
ExtraTreesClassifier               0.94               0.89     0.89      0.94   
XGBClassifier                      0.94               0.88     0.88      0.94   
LogisticRegression                 0.94               0.88     0.88      0.93   
LinearSVC                          0.92               0.88     0.88      0.92   
LinearDiscriminantAnalysis         0.94               0.87     0.87      0.93   
RidgeClassifier                    0.94               0.87     0.87      0.93   
RidgeClassifierCV                  0.94               0.87     0.87      0.93   
LGBMClassifier                     0.93               0.87     0.87      0.93   
Perceptron                  




#RANDOM FOREST MODEL

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Create the RandomForestClassifier model
model = RandomForestClassifier()

# Train the model on the training set
print("Training the RandomForestClassifier model...")
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the RandomForestClassifier model: {accuracy * 100:.2f}%")


Training the RandomForestClassifier model...
Accuracy of the RandomForestClassifier model: 93.53%


#SAVE MODELS

In [None]:
import joblib  # For saving the model and vectorizer

In [None]:

# Save the model and vectorizer
joblib.dump(model, "/content/drive/MyDrive/model/random_forest_model.pkl")
joblib.dump(vectorizer, "/content/drive/MyDrive/model/tfidf_vectorizer.pkl")

['/content/drive/MyDrive/model/tfidf_vectorizer.pkl']

#PREDICT

In [None]:
# Function to predict the class of a new URL
def predict_url(url, model, vectorizer):
    # Download the webpage content
    html_content = download_page(url)

    if html_content:
        # Extract text from HTML
        body_text = extract_text_from_html(html_content)

        # Preprocess the text
        processed_text = preprocess_text(body_text)

        # Convert the text to a TF-IDF vector
        tfidf_vector = vectorizer.transform([processed_text])

        # Predict the label (0: safe, 1: deface)
        prediction = model.predict(tfidf_vector)

        return prediction[0]  # Return the predicted label (0 or 1)
    else:
        return None  # If the page cannot be loaded, return None

In [None]:
# Example URL prediction
test_url = "https://vnexpress.net/"  # Replace this URL with the one you want to check
prediction = predict_url(test_url, model, vectorizer)

if prediction is not None:
    print(f"URL: {test_url}")
    if prediction == 0:
        print("Predicted class: Safe")
    else:
        print("Predicted class: Deface")
else:
    print(f"Could not download the page from {test_url}")

URL: https://vnexpress.net/
Predicted class: Safe


In [None]:
# Example URL prediction
test_url = "https://primarias.voto/index.html"  # Replace this URL with the one you want to check
prediction = predict_url(test_url, model, vectorizer)

if prediction is not None:
    print(f"URL: {test_url}")
    if prediction == 0:
        print("Predicted class: Safe")
    else:
        print("Predicted class: Deface")
else:
    print(f"Could not download the page from {test_url}")


URL: https://primarias.voto/index.html
Predicted class: Deface


#WEB


In [None]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Downloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.1


In [None]:
from flask import Flask, request, render_template, jsonify
from pyngrok import ngrok
import joblib
import requests
from bs4 import BeautifulSoup
import re
import os
# Ensure the URL has a proper scheme
from urllib.parse import urlparse, unquote

# Load the model and vectorizer
model = joblib.load("/content/drive/MyDrive/model/random_forest_model.pkl")  # Ensure the file is uploaded correctly in Colab
vectorizer = joblib.load("/content/drive/MyDrive/model/tfidf_vectorizer.pkl")

# Flask app
app = Flask(__name__, template_folder='/content/drive/MyDrive')  # Provide the correct template folder path

def download_page(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error when downloading page {url}: {e}")
        return None

# Preprocessing function
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove all punctuation
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    return text

# Extract text from HTML content
def extract_text_from_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract <body>
    body_text = soup.body.get_text(separator=' ', strip=True) if soup.body else ""

    # Extract <title>
    if not body_text:
        title = soup.title.get_text(separator=' ', strip=True) if soup.title else ""
        body_text = title

    # Extract main content (e.g., <article>, <section>, <p>, <h1>, <h2>, <h3>)
    if not body_text:
        paragraphs = soup.find_all(['article', 'section', 'p', 'h1', 'h2', 'h3'])
        if paragraphs:
            body_text = ' '.join([p.get_text(separator=' ', strip=True) for p in paragraphs])

    # Extract <header>, <footer> if necessary
    if not body_text:
        header_footer = soup.find_all(['header', 'footer'])
        if header_footer:
            body_text = ' '.join([hf.get_text(separator=' ', strip=True) for hf in header_footer])

    # Extract <meta name="description">
    if not body_text:
        meta_desc = soup.find('meta', {'name': 'description'})
        if meta_desc and meta_desc.get('content'):
            body_text = meta_desc['content']

    # Extract <main>
    if not body_text:
        main_content = soup.find('main')
        if main_content:
            body_text = main_content.get_text(separator=' ', strip=True)

    # Extract others if nothing is found
    if not body_text:
        body_text = soup.get_text(separator=' ', strip=True)

    return clean_text(body_text)

def preprocess_text(text):
    text = text.lower()  # Convert all to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove all punctuation
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    return text



def ensure_https(url):
    # Remove spaces and decode encoded spaces
    url = unquote(url).strip()  # Decode '%20' into actual spaces and remove unnecessary spaces
    parsed_url = urlparse(url)

    if not parsed_url.scheme:
        url = "https://" + url
    return url



@app.route("/", methods=["GET", "POST"])
def home():
    if request.method == "POST":
        url = request.form.get("url", "")
        if not url:
            return render_template("index.html", result="URL is required.", result_class="error")

        # Ensure URL has the correct scheme
        url = ensure_https(url)

        html_content = download_page(url)
        if html_content:
            body_text = extract_text_from_html(html_content)
            processed_text = preprocess_text(body_text)

            tfidf_vector = vectorizer.transform([processed_text])
            prediction = model.predict(tfidf_vector)[0]

            if prediction == 0:
                result = "Safe (No Defacement Detected)"
                result_class = "safe"
            else:
                result = "Deface Detected!"
                result_class = "deface"
        else:
            result = f"Could not download the page from {url}."
            result_class = "error"

        return render_template("index.html", result=result, result_class=result_class)
    return render_template("index.html", result=None)


if __name__ == "__main__":
    ngrok.set_auth_token("2q1Br34fuBE00tx7ALzcygJbJOG_gurkXnznNgBjaj1e5mAU")
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")
    app.run()


Public URL: NgrokTunnel: "https://9006-34-16-186-74.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [10/Dec/2024 13:59:10] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Dec/2024 13:59:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [10/Dec/2024 13:59:15] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Dec/2024 13:59:28] "POST / HTTP/1.1" 200 -
