In [3]:
import requests
import re
from urllib.parse import urlparse

# Function to check URL against PhishTank API
def check_phishtank(url):
    api_url = "https://phishtank.org/checkurl/"
    try:
        response = requests.get(f"https://openphish.com/feed.txt")
        phishing_sites = response.text.split("\n")
        return url in phishing_sites
    except requests.RequestException:
        return False

# Function to detect suspicious patterns in URL
def detect_suspicious_patterns(url):
    phishing_patterns = [
        r"https?://.*\d{5,}",  # URLs with many numbers
        r"https?://.*free.*",  # Keywords like 'free'
        r"https?://.*secure.*",  # Fake secure references
        r"https?://.*bank.*",  # Banking phishing scams
        r"https?://.*paypal.*",  # PayPal phishing scams
        r"https?://.*login.*",  # Fake login pages
    ]
    return any(re.match(pattern, url) for pattern in phishing_patterns)

# Function to check URL redirections
def check_redirections(url):
    try:
        response = requests.get(url, allow_redirects=True, timeout=5)
        return len(response.history) > 2  # More than two redirects is suspicious
    except requests.RequestException:
        return False

# Main function
def scan_url(url):
    parsed_url = urlparse(url)
    
    if not parsed_url.scheme.startswith("http"):
        return "Invalid URL format"
    
    print(f"Scanning URL: {url}")
    
    if check_phishtank(url):
        return "⚠️ WARNING: This URL is a known phishing site!"

    if detect_suspicious_patterns(url):
        return "⚠️ WARNING: The URL contains suspicious patterns!"
    
    if check_redirections(url):
        return "⚠️ WARNING: The URL has excessive redirections!"
    
    return "✅ The URL appears to be safe, but always proceed with caution."

# Example usage
url_to_scan = "http://www.mybankk.com"
print(scan_url(url_to_scan))


Scanning URL: http://www.mybankk.com
