In [None]:
!pip install whois

Collecting whois
  Downloading whois-1.20240129.2-py3-none-any.whl.metadata (1.3 kB)
Downloading whois-1.20240129.2-py3-none-any.whl (61 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.8/61.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: whois
Successfully installed whois-1.20240129.2


In [None]:
import cv2
import numpy as np
from pyzbar.pyzbar import decode

def scan_qr_code(image_path):
    image = cv2.imread(image_path)
    qr_data = decode(image)

    if not qr_data:
        return None  # No QR code detected

    return qr_data[0].data.decode("utf-8")  # Extracted URL

# Example usage
image_path = "/content/benign_168.png"  # Change to your QR image path
scanned_url = scan_qr_code(image_path)
print("🔍 Extracted URL:", scanned_url)
import re
import socket
import requests
import whois
import tldextract
from urllib.parse import urlparse
from bs4 import BeautifulSoup

def extract_url_features(url):
    parsed_url = urlparse(url)
    domain_info = tldextract.extract(url)

    try:
        domain_whois = whois.whois(parsed_url.netloc)  # WHOIS Lookup
    except:
        domain_whois = None

    # 1️⃣ Basic URL Features
    features = {
        "Have_IP": bool(re.match(r'\d+\.\d+\.\d+\.\d+', parsed_url.netloc)),
        "Have_At": "@" in url,
        "URL_Length": len(url),
        "URL_Depth": url.count('/'),
        "Redirection": "//" in url[7:],
        "https_Domain": "https" in domain_info.domain,
        "TinyURL": any(short in url.lower() for short in ["bit.ly", "tinyurl", "goo.gl"]),
        "Prefix/Suffix": "-" in parsed_url.netloc
    }

    # 2️⃣ Domain-Based Features
    try:
        socket.gethostbyname(parsed_url.netloc)  # Check if DNS record exists
        features["DNS_Record"] = 1
    except:
        features["DNS_Record"] = 0

    try:
        alexa_rank = requests.get(f"https://www.alexa.com/siteinfo/{parsed_url.netloc}").status_code  # Check if site exists
        features["Web_Traffic"] = 1 if alexa_rank == 200 else 0
    except:
        features["Web_Traffic"] = 0

    # Domain Age & Expiry
    if domain_whois:
        try:
            domain_age = (domain_whois.creation_date[0] if isinstance(domain_whois.creation_date, list) else domain_whois.creation_date)
            domain_expiry = (domain_whois.expiration_date[0] if isinstance(domain_whois.expiration_date, list) else domain_whois.expiration_date)
            features["Domain_Age"] = (domain_expiry - domain_age).days if domain_age and domain_expiry else 0
            features["Domain_End"] = (domain_expiry - domain_age).days if domain_expiry else 0
        except:
            features["Domain_Age"], features["Domain_End"] = 0, 0
    else:
        features["Domain_Age"], features["Domain_End"] = 0, 0

    # 3️⃣ Web Content Features (if accessible)
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.text, "html.parser")

        # iFrame Detection
        features["iFrame"] = 1 if "<iframe" in response.text else 0

        # Mouse Over Detection
        features["Mouse_Over"] = 1 if "onmouseover" in response.text else 0

        # Right Click Disabled
        features["Right_Click"] = 1 if "event.button==2" in response.text else 0

        # Web Forwarding Detection
        features["Web_Forwards"] = len(response.history) > 2  # If multiple redirects

    except:
        features["iFrame"], features["Mouse_Over"], features["Right_Click"], features["Web_Forwards"] = 0, 0, 0, 0

    return features

# Extract features from scanned URL
if scanned_url:
    url_features = extract_url_features(scanned_url)

    print("\n===== Extracted URL Features =====")
    for key, value in url_features.items():
        print(f"{key}: {value}")


🔍 Extracted URL: 168    https://www.dailymail.co.uk
Name: url, dtype: object

===== Extracted URL Features =====
Have_IP: False
Have_At: False
URL_Length: 59
URL_Depth: 2
Redirection: True
https_Domain: True
TinyURL: False
Prefix/Suffix: False
DNS_Record: 1
Web_Traffic: 1
Domain_Age: 0
Domain_End: 0
iFrame: 0
Mouse_Over: 0
Right_Click: 0
Web_Forwards: 0


In [None]:
import cv2
import os
import pandas as pd
import re
import socket
import requests
import whois
import tldextract
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from pyzbar.pyzbar import decode
from tqdm import tqdm  # Progress bar

# 📌 Define dataset root path
DATASET_PATH = "/content/drive/MyDrive/QR codes"  # Change to your dataset path

# 📌 Function to scan QR code and extract URL
def scan_qr_code(image_path):
    image = cv2.imread(image_path)
    qr_data = decode(image)

    if not qr_data:
        return None  # No QR code detected

    return qr_data[0].data.decode("utf-8")  # Extracted URL

# 📌 Function to extract URL features
def extract_url_features(url):
    parsed_url = urlparse(url)
    domain_info = tldextract.extract(url)

    try:
        domain_whois = whois.whois(parsed_url.netloc)  # WHOIS Lookup
    except:
        domain_whois = None

    # 1️⃣ Basic URL Features
    features = {
        "Have_IP": bool(re.match(r'\d+\.\d+\.\d+\.\d+', parsed_url.netloc)),
        "Have_At": "@" in url,
        "URL_Length": len(url),
        "URL_Depth": url.count('/'),
        "Redirection": "//" in url[7:],
        "https_Domain": "https" in domain_info.domain,
        "TinyURL": any(short in url.lower() for short in ["bit.ly", "tinyurl", "goo.gl"]),
        "Prefix/Suffix": "-" in parsed_url.netloc
    }

    # 2️⃣ Domain-Based Features
    try:
        socket.gethostbyname(parsed_url.netloc)  # Check if DNS record exists
        features["DNS_Record"] = 1
    except:
        features["DNS_Record"] = 0

    try:
        alexa_rank = requests.get(f"https://www.alexa.com/siteinfo/{parsed_url.netloc}").status_code  # Check if site exists
        features["Web_Traffic"] = 1 if alexa_rank == 200 else 0
    except:
        features["Web_Traffic"] = 0

    # Domain Age & Expiry
    if domain_whois:
        try:
            domain_age = (domain_whois.creation_date[0] if isinstance(domain_whois.creation_date, list) else domain_whois.creation_date)
            domain_expiry = (domain_whois.expiration_date[0] if isinstance(domain_whois.expiration_date, list) else domain_whois.expiration_date)
            features["Domain_Age"] = (domain_expiry - domain_age).days if domain_age and domain_expiry else 0
            features["Domain_End"] = (domain_expiry - domain_age).days if domain_expiry else 0
        except:
            features["Domain_Age"], features["Domain_End"] = 0, 0
    else:
        features["Domain_Age"], features["Domain_End"] = 0, 0

    # 3️⃣ Web Content Features (if accessible)
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.text, "html.parser")

        # iFrame Detection
        features["iFrame"] = 1 if "<iframe" in response.text else 0

        # Mouse Over Detection
        features["Mouse_Over"] = 1 if "onmouseover" in response.text else 0

        # Right Click Disabled
        features["Right_Click"] = 1 if "event.button==2" in response.text else 0

        # Web Forwarding Detection
        features["Web_Forwards"] = len(response.history) > 2  # If multiple redirects

    except:
        features["iFrame"], features["Mouse_Over"], features["Right_Click"], features["Web_Forwards"] = 0, 0, 0, 0

    return features

# 📌 Process all QR code images in dataset
dataset_features = []

# Loop through each folder inside "dataset/"
for category_folder in os.listdir(DATASET_PATH):
    folder_path = os.path.join(DATASET_PATH, category_folder)

    # Ignore non-directory files
    if not os.path.isdir(folder_path):
        continue

    # 📌 Assign label dynamically from folder name
    label = 1 if "malicious" in category_folder.lower() else 0  # 1 = Malicious, 0 = Safe

    for filename in tqdm(os.listdir(folder_path), desc=f"Processing {category_folder} QR codes"):
        image_path = os.path.join(folder_path, filename)

        # Scan QR code
        scanned_url = scan_qr_code(image_path)
        if scanned_url:
            features = extract_url_features(scanned_url)
            features["Label"] = label  # Assign label dynamically
            dataset_features.append(features)

# 📌 Convert to DataFrame and save as CSV
df = pd.DataFrame(dataset_features)
df.to_csv("qr_features.csv", index=False)

print("\n✅ Features extracted and saved to 'qr_features.csv'!")


Processing malicious QR codes: 100%|██████████| 2011/2011 [11:23<00:00,  2.94it/s]
Processing benign QR codes: 100%|██████████| 2001/2001 [11:10<00:00,  2.98it/s]


✅ Features extracted and saved to 'qr_features.csv'!





In [None]:
import cv2
import numpy as np
from pyzbar.pyzbar import decode

def scan_qr_code(image_path):
    image = cv2.imread(image_path)
    qr_data = decode(image)

    if not qr_data:
        return None  # No QR code detected

    return qr_data[0].data.decode("utf-8")  # Extracted URL

# Example usage
image_path = "/content/benign_168.png"  # Change to your QR image path
scanned_url = scan_qr_code(image_path)
print("🔍 Extracted URL:", scanned_url)
import re
import socket
import requests
import whois
import tldextract
from urllib.parse import urlparse
from bs4 import BeautifulSoup

def extract_url_features(url):
    parsed_url = urlparse(url)
    domain_info = tldextract.extract(url)

    try:
        domain_whois = whois.whois(parsed_url.netloc)  # WHOIS Lookup
    except:
        domain_whois = None

    # 1️⃣ Basic URL Features
    features = {
        "Have_IP": bool(re.match(r'\d+\.\d+\.\d+\.\d+', parsed_url.netloc)),
        "Have_At": "@" in url,
        "URL_Length": len(url),
        "URL_Depth": url.count('/'),
        "Redirection": "//" in url[7:],
        "https_Domain": "https" in domain_info.domain,
        "TinyURL": any(short in url.lower() for short in ["bit.ly", "tinyurl", "goo.gl"]),
        "Prefix/Suffix": "-" in parsed_url.netloc
    }

    # 2️⃣ Domain-Based Features
    try:
        socket.gethostbyname(parsed_url.netloc)  # Check if DNS record exists
        features["DNS_Record"] = 1
    except:
        features["DNS_Record"] = 0

    try:
        alexa_rank = requests.get(f"https://www.alexa.com/siteinfo/{parsed_url.netloc}").status_code  # Check if site exists
        features["Web_Traffic"] = 1 if alexa_rank == 200 else 0
    except:
        features["Web_Traffic"] = 0

    # Domain Age & Expiry
    if domain_whois:
        try:
            domain_age = (domain_whois.creation_date[0] if isinstance(domain_whois.creation_date, list) else domain_whois.creation_date)
            domain_expiry = (domain_whois.expiration_date[0] if isinstance(domain_whois.expiration_date, list) else domain_whois.expiration_date)
            features["Domain_Age"] = (domain_expiry - domain_age).days if domain_age and domain_expiry else 0
            features["Domain_End"] = (domain_expiry - domain_age).days if domain_expiry else 0
        except:
            features["Domain_Age"], features["Domain_End"] = 0, 0
    else:
        features["Domain_Age"], features["Domain_End"] = 0, 0

    # 3️⃣ Web Content Features (if accessible)
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.text, "html.parser")

        # iFrame Detection
        features["iFrame"] = 1 if "<iframe" in response.text else 0

        # Mouse Over Detection
        features["Mouse_Over"] = 1 if "onmouseover" in response.text else 0

        # Right Click Disabled
        features["Right_Click"] = 1 if "event.button==2" in response.text else 0

        # Web Forwarding Detection
        features["Web_Forwards"] = len(response.history) > 2  # If multiple redirects

    except:
        features["iFrame"], features["Mouse_Over"], features["Right_Click"], features["Web_Forwards"] = 0, 0, 0, 0

    return features

# Extract features from scanned URL
if scanned_url:
    url_features = extract_url_features(scanned_url)

    print("\n===== Extracted URL Features =====")
    for key, value in url_features.items():
        print(f"{key}: {value}")


: 