In [1]:
import pandas as pd
import concurrent.futures

In [2]:
# 데이터셋 불러오기
df = pd.read_csv('dataset_B_05_2020.csv')

# 첫 번째 컬럼과 마지막 컬럼의 이름 찾기 (url, status)
first_column = df.columns[0]
last_column = df.columns[-1]

# url 컬럼과 status 컬럼만 추가
df = df[[first_column, last_column]]

# 결과 확인
print(df.head())

                                                 url      status
0              http://www.crestonwood.com/router.php  legitimate
1  http://shadetreetechnology.com/V4/validation/a...    phishing
2  https://support-appleld.com.secureupdate.duila...    phishing
3                                 http://rgipt.ac.in  legitimate
4  http://www.iracing.com/tracks/gateway-motorspo...  legitimate


In [3]:
# 정상 url 데이터 250개 추출
legitimate_df = df[df['status'] == 'legitimate'].head(250)

In [4]:
# 피싱 url 데이터 250개 추출
phishing_df = df[df['status'] == 'phishing'].head(250)

In [5]:
# 두 개의 데이터프레임을 하나로 결합
df = pd.concat([legitimate_df, phishing_df])

# 데이터프레임을 랜덤으로 섞기
df = df.sample(frac=1).reset_index(drop=True)

In [6]:
# 500개 데이터셋 결과 출력
print(df)

                                                   url      status
0    http://www.wealthdaily.com/articles/nokia-stoc...  legitimate
1    https://www.slideshare.net/gibsondaniel83/mark...  legitimate
2    http://www.game.co.uk/en/games/nintendo-switch...  legitimate
3            http://www.routeralley.com/guides/nat.pdf  legitimate
4    http://www.phoenixlocksmith-az.com/prole/?p[]=...    phishing
..                                                 ...         ...
495  http://signin.eday.co.uk.ws.edayisapi.dllsign....    phishing
496                        http://www.calculate.co.il/  legitimate
497                         http://www.codeboiler.net/  legitimate
498  http://www.orafaq.com/wiki/Roles_and_Responsib...  legitimate
499      https://www.hfunderground.com/wiki/Ionosphere  legitimate

[500 rows x 2 columns]


In [7]:
# 라이브러리 임포트
import requests
import regex # pip install regex
import ipaddress # pip install ipaddress
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import content_based_features as cbf # 컨텐트 기반 피처 알고리즘 코드

In [8]:
# # 병렬 처리를 위해 ThreadPoolExecutor 설정
# def apply_function_in_parallel(df, func_name, col_name):
#     def wrapper(url):
#         result = func_name(url)
#         print(f"{url} - {col_name} 피처 완료")
#         return result

#     with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
#         results = list(executor.map(wrapper, df['url']))

#     df[col_name] = results
#     df.reset_index(drop=True, inplace=True)
#     if col_name in df.columns:
#         print(f"피처 '{col_name}' 추가 완료:")
#         print(df[[col_name]].head())
#     else:
#         print(f"피처 '{col_name}'이 데이터프레임에 존재하지 않습니다.")

# url 데이터에 대해 피처별 결과 생성 
def apply(df, func_name, col_name):
    results = []
    for url in df['url']:
        result = func_name(url)
        results.append(result)
        print(f"{url} - {col_name} 피처 완료")

    df[col_name] = results
    df.reset_index(drop=True, inplace=True)
    if col_name in df.columns:
        print(f"피처 '{col_name}' 추가 완료:")
        print(df[[col_name]].head())
    else:
        print(f"피처 '{col_name}'이 데이터프레임에 존재하지 않습니다.")

In [9]:
apply(df, cbf.use_right_click, 'RightClick')

http://www.wealthdaily.com/articles/nokia-stock/2147 - RightClick 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - RightClick 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - RightClick 피처 완료
RightClick HTTP 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c856710>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - RightClick 피처 완료
RightClick HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c854f40>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - RightClick 피처 완료
http:/

In [10]:
apply(df, cbf.popup_window_text, 'popUpWidnow')

http://www.wealthdaily.com/articles/nokia-stock/2147 - popUpWidnow 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - popUpWidnow 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - popUpWidnow 피처 완료
popUpWidnow HTTP 요청 Error: HTTPConnectionPool(host='www.routeralley.com', port=80): Read timed out. (read timeout=10)
http://www.routeralley.com/guides/nat.pdf - popUpWidnow 피처 완료
popUpWidnow HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bf599f0>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - popUpWidnow 피처 완료
http://cartetitolare-italy.www1.biz/portaleTitolares11/ - popUpWidnow 피처 완료
popUpWidnow HTTP 요청 Error: HTTPConnectionPool(host='kprealtors.com', port=80):

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - popUpWidnow 피처 완료
https://www.sportsengine.com/solutions/ - popUpWidnow 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - popUpWidnow 피처 완료
popUpWidnow HTTP 요청 Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bc9b250>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - popUpWidnow 피처 완료
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - popUpWidnow 피처 완료
https://www.iplocation.net/mac-address - popUpWidnow 피처 완료
http://www.henrikssonbygg.se/libraries/legacy/log/Update/ - popUpWidnow 피처 완료
http://www.shadetreetechnology.com/V4/validation/ba4b8bddd7958ecb8772c836c2969531 - popUpWidnow 피처 완료
http://www.grafikerler.org/ - popUpWidnow 피처 완료
http://secure-login-portal-outlook.el.r.appspot.com/c:/users/user/downloads - popUpWidnow 피처 완료
https:

In [11]:
apply(df, cbf.iFrame_redirection, 'Iframe')

http://www.wealthdaily.com/articles/nokia-stock/2147 - Iframe 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - Iframe 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - Iframe 피처 완료
Iframe HTTP 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bd9b130>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - Iframe 피처 완료
Iframe HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bd9baf0>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - Iframe 피처 완료
http://cartetitolare-italy.www1.bi

In [12]:
apply(df, cbf.using_ip, 'having_IPhaving_IP_Address')

http://www.wealthdaily.com/articles/nokia-stock/2147 - having_IPhaving_IP_Address 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - having_IPhaving_IP_Address 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - having_IPhaving_IP_Address 피처 완료
http://www.routeralley.com/guides/nat.pdf - having_IPhaving_IP_Address 피처 완료
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - having_IPhaving_IP_Address 피처 완료
http://cartetitolare-italy.www1.biz/portaleTitolares11/ - having_IPhaving_IP_Address 피처 완료
http://kprealtors.com/ve/ - having_IPhaving_IP_Address 피처 완료
https://www.skecherstanio.pl/ - having_IPhaving_IP_Address 피처 완료
https://7426fbe0d8676fde2cac756c0731ce57.udagwebspace.de/7e08701fe7e22e7e0b0fabe1c449c468ZjZiZDNjMmQxYTNlNWI4MmU2NzRhNjk2NWFmOWIwNTM=/signin/ - having_IPhaving_IP_Address 피처 완료
https://tekoma-my.sharepoint.com/:o:/g/personal/jglorie_cryonorm_com/EkKk_u3bn1pIllAzGyJIo1oBt6TRaNZ2_nrTiHKiV1tW-Q?e=ksIEcu - havin

In [13]:
apply(df, cbf.check_favicon, 'Favicon')

http://www.wealthdaily.com/articles/nokia-stock/2147 - Favicon 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - Favicon 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - Favicon 피처 완료
Favicon HTTP Exception Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c446020>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - Favicon 피처 완료
Favicon HTTP Exception Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c445450>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - Favicon 피처 완료
Favicon HTTP 

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - Favicon 피처 완료
https://www.sportsengine.com/solutions/ - Favicon 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - Favicon 피처 완료
Favicon HTTP Exception Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bafdd50>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - Favicon 피처 완료
Favicon HTTP Exception Error: 403 Client Error: Forbidden for url: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - Favicon 피처 완료
https://www.iplocation.net/mac-address - Favicon 피처 완료
Favicon HTTP Exception Error: 404 Client Error: Not Found for url: https://www.henrikssonbygg.se/libraries/legacy/log/Update/
http://www.henrikssonbygg.se/libraries/legacy/log/Update/ - Favicon 피처 완료


In [14]:
apply(df, cbf.check_request_url, 'Request_URL')

http://www.wealthdaily.com/articles/nokia-stock/2147 - Request_URL 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - Request_URL 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - Request_URL 피처 완료
Request_URL HTTP 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73ba32f80>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - Request_URL 피처 완료
Request_URL HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73ba32800>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - Request_URL 피처 완료

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - Request_URL 피처 완료
https://www.sportsengine.com/solutions/ - Request_URL 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - Request_URL 피처 완료
Request_URL HTTP 요청 Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c3a6e00>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - Request_URL 피처 완료
Request_URL HTTP 요청 Error: 403 Client Error: Forbidden for url: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - Request_URL 피처 완료
https://www.iplocation.net/mac-address - Request_URL 피처 완료
Request_URL HTTP 요청 Error: 404 Client Error: Not Found for url: https://www.henrikssonbygg.se/libraries/legacy/log/Update/
http://www.henrikssonbygg.se/libraries/legacy/log/Update/ -

In [18]:
apply(df, cbf.check_url_of_anchor, 'URL_of_Anchor')

http://www.wealthdaily.com/articles/nokia-stock/2147 - URL_of_Anchor 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - URL_of_Anchor 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - URL_of_Anchor 피처 완료
URL_of_Anchor HTTP 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c0db820>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - URL_of_Anchor 피처 완료
URL_of_Anchor HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c0d87f0>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - URL_o

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - URL_of_Anchor 피처 완료
https://www.sportsengine.com/solutions/ - URL_of_Anchor 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - URL_of_Anchor 피처 완료
URL_of_Anchor HTTP 요청 Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c8578b0>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - URL_of_Anchor 피처 완료
URL_of_Anchor HTTP 요청 Error: 403 Client Error: Forbidden for url: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - URL_of_Anchor 피처 완료
https://www.iplocation.net/mac-address - URL_of_Anchor 피처 완료
URL_of_Anchor HTTP 요청 Error: 404 Client Error: Not Found for url: https://www.henrikssonbygg.se/libraries/legacy/log/Update/
http://www.henrikssonbygg.se/libraries/le

In [19]:
apply(df, cbf.has_meta_tags, 'Links_in_tags')

http://www.wealthdaily.com/articles/nokia-stock/2147 - Links_in_tags 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - Links_in_tags 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - Links_in_tags 피처 완료
Links_in_tags HTTP 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c444d00>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - Links_in_tags 피처 완료
Links_in_tags HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c4464a0>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - Links

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - Links_in_tags 피처 완료
https://www.sportsengine.com/solutions/ - Links_in_tags 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - Links_in_tags 피처 완료
Links_in_tags HTTP 요청 Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bcf1bd0>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - Links_in_tags 피처 완료
Links_in_tags HTTP 요청 Error: 403 Client Error: Forbidden for url: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - Links_in_tags 피처 완료
https://www.iplocation.net/mac-address - Links_in_tags 피처 완료
Links_in_tags HTTP 요청 Error: 404 Client Error: Not Found for url: https://www.henrikssonbygg.se/libraries/legacy/log/Update/
http://www.henrikssonbygg.se/libraries/le

In [20]:
apply(df, cbf.check_sfh, 'SFH')

http://www.wealthdaily.com/articles/nokia-stock/2147 - SFH 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - SFH 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - SFH 피처 완료
SFH HTTP 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c21d570>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - SFH 피처 완료
SFH HTTP 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c21e410>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - SFH 피처 완료
SFH HTTP 요청 Error: 404 Client Error: Not Found for url:

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - SFH 피처 완료
https://www.sportsengine.com/solutions/ - SFH 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - SFH 피처 완료
SFH HTTP 요청 Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c44bf70>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - SFH 피처 완료
SFH HTTP 요청 Error: 403 Client Error: Forbidden for url: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - SFH 피처 완료
https://www.iplocation.net/mac-address - SFH 피처 완료
SFH HTTP 요청 Error: 404 Client Error: Not Found for url: https://www.henrikssonbygg.se/libraries/legacy/log/Update/
http://www.henrikssonbygg.se/libraries/legacy/log/Update/ - SFH 피처 완료
SFH HTTP 요청 Error: 404 Client Error: Not Found for url: http:

In [21]:
apply(df, cbf.check_submit_email, 'Submitting_to_email')

http://www.wealthdaily.com/articles/nokia-stock/2147 - Submitting_to_email 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - Submitting_to_email 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - Submitting_to_email 피처 완료
Submitting_to_email 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c5e0e20>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - Submitting_to_email 피처 완료
Submitting_to_email 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c5e1ea0>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.co

  soup = BeautifulSoup(response.content, 'lxml')


https://www.luxuryloft.eu/ - Submitting_to_email 피처 완료
https://www.sportsengine.com/solutions/ - Submitting_to_email 피처 완료
http://www.audioenglish.org/dictionary/resolution.htm - Submitting_to_email 피처 완료
Submitting_to_email 요청 Error: HTTPConnectionPool(host='www.j-net.cn', port=80): Max retries exceeded with url: /cms (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73ba08ac0>: Failed to resolve 'www.j-net.cn' ([Errno -2] Name or service not known)"))
http://www.j-net.cn/cms - Submitting_to_email 피처 완료
Submitting_to_email 요청 Error: 403 Client Error: Forbidden for url: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project - Submitting_to_email 피처 완료
https://www.iplocation.net/mac-address - Submitting_to_email 피처 완료
Submitting_to_email 요청 Error: 404 Client Error: Not Found for url: https://www.henrikssonbygg.se/libraries/legacy/log/Update/
ht

In [22]:
apply(df, cbf.check_redirect_count, 'Redirect')

http://www.wealthdaily.com/articles/nokia-stock/2147 - Redirect 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - Redirect 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - Redirect 피처 완료
Redirect 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bb987c0>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - Redirect 피처 완료
Redirect 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73bb987c0>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - Redirect 피처 완료
http://cartetitolare-italy.www

In [23]:
apply(df, cbf.check_onmouseover_change, 'on_mouseover')

http://www.wealthdaily.com/articles/nokia-stock/2147 - on_mouseover 피처 완료
https://www.slideshare.net/gibsondaniel83/market-segmentation-targeting-and-positioning - on_mouseover 피처 완료
http://www.game.co.uk/en/games/nintendo-switch/nintendo-switch/ - on_mouseover 피처 완료
on_mouseover 요청 Error: HTTPConnectionPool(host='no.access', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c62e560>: Failed to resolve 'no.access' ([Errno -2] Name or service not known)"))
http://www.routeralley.com/guides/nat.pdf - on_mouseover 피처 완료
on_mouseover 요청 Error: HTTPConnectionPool(host='www.phoenixlocksmith-az.com', port=80): Max retries exceeded with url: /prole/?p%5B%5D=prole (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ec73c62cf40>: Failed to resolve 'www.phoenixlocksmith-az.com' ([Errno -2] Name or service not known)"))
http://www.phoenixlocksmith-az.com/prole/?p[]=prole - on_mouseover 피처 완료
ht

In [24]:
# 데이터프레임의 컬럼 순서를 재배치
# 'status'를 가장 마지막으로 이동
columns = [col for col in df.columns if col != 'status'] + ['status']
df_reordered = df[columns]

# 결과를 CSV 파일로 저장
df_reordered.to_csv('dataset_content_based_features.csv', index=False)