<a href="https://colab.research.google.com/github/mhutama/snerd/blob/main/snerd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# #**1** - Install required python libraries

In [2]:
# Import libraries
import requests
import pandas as pd
from datetime import datetime

# **#2** - Please read description before use it
*   use your own urlscan API, register to https://urlscan.io/user/signup/
*   for search keyword guidance refer to https://urlscan.io/docs/search/
*   use option #2 if you want export output into file .csv - go to files menu in the life corner
*   use option #3 display the output directly into last page of google colab

---
*   example of keyword : domain.keyword: *crowdstrike* AND NOT crowdstrike.com AND NOT crowdstrike.litmos.com AND NOT www.www* AND NOT youtube.com AND NOT godaddy.com AND NOT wikipedia AND NOT okta.com AND date:>now-9d



In [None]:
# Function to search urlscan.io with a given keyword
def search_urlscan(keyword, api_key):
    url = f"https://urlscan.io/api/v1/search/?q={keyword}&size=10000"
    headers = {"API-Key": api_key}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None

# Function to export results to CSV with UTF-8 encoding
def export_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False, encoding='utf-8')
    print(f"Results exported to {filename}")

# Function to format the scan date
def format_scan_date(scan_date):
    try:
        dt = datetime.strptime(scan_date, "%Y-%m-%dT%H:%M:%S.%fZ")
        return dt.strftime("%d-%b-%Y %H:%M:%S UTC")
    except ValueError:
        return "N/A"

# Main function
def main():
    api_key = input("Enter your urlscan.io API key: ")
    keyword = input("Enter the keyword to search: ")
    results = search_urlscan(keyword, api_key)
    if results:
        # Extracting relevant data
        data = []
        for result in results.get("results", []):
            # Convert the scan date to the desired format
            scan_date_str = result.get("task", {}).get("time", "")
            if scan_date_str:
                scan_date = datetime.strptime(scan_date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
                formatted_date = scan_date.strftime("%d-%b-%Y %H:%M:%S - UTC")
            else:
                formatted_date = "N/A"

            data.append({
                "Domain": result.get("page", {}).get("domain", ""),
                "Check Date": formatted_date,
                "Page Title": result.get("page", {}).get("title", "N/A")
            })
        # Exporting to CSV
        export_to_csv(data, "urlscan-search-results.csv")

# Run the main function
if __name__ == "__main__":
    main()


# **#3** Display the output directly into google colab (not exportable into a file)

In [3]:
# Function to search urlscan.io with a given keyword
def search_urlscan(keyword, api_key):
    url = f"https://urlscan.io/api/v1/search/?q={keyword}&size=10000"
    headers = {"API-Key": api_key}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None

# Function to format the scan date
def format_scan_date(scan_date):
    try:
        dt = datetime.strptime(scan_date, "%Y-%m-%dT%H:%M:%S.%fZ")
        return dt.strftime("%d-%b-%Y %H:%M:%S UTC")
    except ValueError:
        return "N/A"

# Main function
def main():
    api_key = input("Enter your urlscan.io API key: ")
    keyword = input("Enter the keyword to search: ")
    results = search_urlscan(keyword, api_key)
    if results:
        # Extracting relevant data
        data = []
        for result in results.get("results", []):
            # Convert the scan date to the desired format
            scan_date_str = result.get("task", {}).get("time", "")
            if scan_date_str:
                scan_date = datetime.strptime(scan_date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
                formatted_date = scan_date.strftime("%d-%b-%Y %H:%M:%S - UTC")
            else:
                formatted_date = "N/A"

            data.append({
                "Domain": result.get("page", {}).get("domain", ""),
                "Check Date": formatted_date,
                "Page Title": result.get("page", {}).get("title", "N/A")
            })

        # Displaying results as a numbered list
        for i, entry in enumerate(data, start=1):
            print(f"{i}. Domain: {entry['Domain']}, Check Date: {entry['Check Date']}, Page Title: {entry['Page Title']}")

# Run the main function
if __name__ == "__main__":
    main()


Enter your urlscan.io API key: 85fc108a-5811-4a62-ab95-06dd7d436ab1
Enter the keyword to search: domain.keyword: *crowdstrike* AND NOT crowdstrike.com AND NOT crowdstrike.litmos.com AND NOT www.www* AND NOT youtube.com AND NOT godaddy.com AND NOT wikipedia AND NOT okta.com AND date:>now-9d
1. Domain: gen.xyz, Check Date: 28-Jul-2024 12:01:37 - UTC, Page Title: N/A
2. Domain: crowdstrike.ru, Check Date: 28-Jul-2024 06:50:58 - UTC, Page Title: Домен продается. Купить в магазине доменов RU-CENTER
3. Domain: www.urbandictionary.com, Check Date: 28-Jul-2024 02:33:40 - UTC, Page Title: Urban Dictionary: Lose The Game
4. Domain: www.crowdstrike.develop.net, Check Date: 28-Jul-2024 01:10:05 - UTC, Page Title: develop.net
5. Domain: crowdstrikeupdate.com, Check Date: 28-Jul-2024 01:09:01 - UTC, Page Title: crowdstrikeupdate.com
6. Domain: crowdstrikeout.com, Check Date: 28-Jul-2024 01:08:44 - UTC, Page Title: porkbun.com | parked domain
7. Domain: www.crowdstrikeglitch.com, Check Date: 28-Jul-2