In [None]:
# ==========================================
# CyberAI Suite: URL, Email & Script Malware Detector (Gradio App)
# Features:
# 1. URL, Email & Script Detection
# 2. Synthetic Data Generation
# 3. Random Forest Models
# 4. Real-time Adaptive Learning
# 5. Combined Cyber Threat Score
# 6. Explainable AI (SHAP)
# 7. 51 Features across all modules
# ==========================================

import random
import string
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import shap
import gradio as gr
import re
import math

# =========================
# 1Ô∏è‚É£ FEATURE EXTRACTION
# =========================

SUSPICIOUS_WORDS = ["login", "secure", "verify", "update", "account", "bank"]
BRAND_NAMES = ["google", "paypal", "amazon", "facebook", "apple"]
SUSP_WORDS_EMAIL = ["verify","urgent","login","payment","account","update"]
SUSPICIOUS_FUNCTIONS = ["eval","unescape","escape","document.write","window.location","setTimeout","setInterval","alert"]

def url_entropy(s):
    probs = [float(s.count(c))/len(s) for c in set(s)]
    return -sum([p*math.log2(p) for p in probs])

def extract_url_features(url):
    features = {}
    features['length'] = len(url)
    features['num_digits'] = sum(c.isdigit() for c in url)
    features['num_special'] = sum(c in string.punctuation for c in url)
    features['num_subdomains'] = url.count('.') - 1
    features['has_https'] = int(url.startswith("https"))
    features['num_hyphens'] = url.count('-')
    features['num_underscores'] = url.count('_')
    features['num_query_params'] = url.count('?')
    features['num_fragments'] = url.count('#')
    features['numbers_in_domain'] = sum(c.isdigit() for c in url.split("//")[-1].split("/")[0])
    features['entropy'] = url_entropy(url)
    features['has_suspicious_word'] = int(any(word in url.lower() for word in SUSPICIOUS_WORDS))
    features['has_brand_name'] = int(any(brand in url.lower() for brand in BRAND_NAMES))
    features['repeated_chars'] = int(any(url[i]==url[i+1]==url[i+2] for i in range(len(url)-2)))
    domain = url.split("//")[-1].split("/")[0]
    features['domain_length'] = len(domain)
    path = url.split(domain)[-1]
    features['path_length'] = len(path)
    features['dots_in_path'] = path.count('.')
    features['letters_to_numbers_ratio'] = sum(c.isalpha() for c in url)/max(1,sum(c.isdigit() for c in url))
    features['ends_with_common_tld'] = int(url.endswith(('.com','.net','.org','.gov')))
    features['starts_with_ip'] = int(re.match(r'http[s]?://\d+\.\d+\.\d+\.\d+', url) is not None)
    return features

def extract_email_features(subject):
    features = {}
    features['length'] = len(subject)
    words = subject.split()
    features['num_suspicious_words'] = sum(word.lower() in SUSP_WORDS_EMAIL for word in words)
    features['num_exclamations'] = subject.count('!')
    features['num_questions'] = subject.count('?')
    features['num_uppercase_words'] = sum(1 for w in words if w.isupper())
    features['num_lowercase_words'] = sum(1 for w in words if w.islower())
    features['uppercase_to_lowercase_ratio'] = features['num_uppercase_words']/max(1,features['num_lowercase_words'])
    features['has_link'] = int(any(x in subject.lower() for x in ["http","https"]))
    features['has_attachment'] = int(any(x in subject.lower() for x in [".pdf",".exe",".zip"]))
    features['num_digits'] = sum(c.isdigit() for c in subject)
    features['num_special_chars'] = sum(c in string.punctuation for c in subject)
    features['starts_with_greeting'] = int(any(subject.lower().startswith(g) for g in ["dear","hi","hello"]))
    features['ends_with_signature'] = int(any(subject.lower().endswith(s) for s in ["regards","thanks","thank you"]))
    features['num_spaces'] = subject.count(' ')
    features['num_words'] = len(words)
    return features

def extract_script_features(script):
    features = {}
    features['length'] = len(script)
    features['num_lines'] = script.count('\n') + 1
    features['has_eval'] = int('eval' in script)
    features['has_iframe'] = int('iframe' in script)
    features['has_window_location'] = int('window.location' in script)
    features['num_document_write'] = script.count('document.write')
    features['num_settimeout'] = script.count('setTimeout') + script.count('setInterval')
    features['num_suspicious_functions'] = sum(script.count(f) for f in SUSPICIOUS_FUNCTIONS)
    features['num_concatenations'] = script.count('+')
    features['num_comments'] = script.count('//') + script.count('/*')
    letters = sum(c.isalpha() for c in script)
    numbers = sum(c.isdigit() for c in script)
    features['letters_to_numbers_ratio'] = letters/max(1,numbers)
    features['special_char_ratio'] = sum(c in string.punctuation for c in script)/max(1,len(script))
    features['num_var_declarations'] = script.count('var') + script.count('let') + script.count('const')
    features['num_functions'] = script.count('function')
    features['num_alerts'] = script.count('alert')
    features['has_base64'] = int('base64' in script.lower())
    return features

# =========================
# 2Ô∏è‚É£ SYNTHETIC DATA GENERATION & MODELS
# =========================

def generate_url_dataset(n=500):
    data, labels = [], []
    for _ in range(n):
        malicious = random.choice([0,1])
        domain = ''.join(random.choices(string.ascii_lowercase, k=6))
        extra = random.choice(["login","secure","verify","update"]) if malicious else ""
        url = f"http://{domain}{extra}.com"
        data.append(extract_url_features(url))
        labels.append(malicious)
    df = pd.DataFrame(data)
    df['label'] = labels
    return df

def generate_email_dataset(n=500):
    data, labels = [], []
    for _ in range(n):
        malicious = random.choice([0,1])
        subject = random.choice(["Verify Account","Urgent Action Required","Login Now"]) if malicious else random.choice(["Meeting Reminder","Project Update","Greetings"])
        data.append(extract_email_features(subject))
        labels.append(malicious)
    df = pd.DataFrame(data)
    df['label'] = labels
    return df

def generate_script_dataset(n=500):
    data, labels = [], []
    for _ in range(n):
        malicious = random.choice([0,1])
        script = random.choice(["eval('bad')","document.write('<iframe>')","window.location='evil.com';"]) if malicious else "console.log('hello');"
        data.append(extract_script_features(script))
        labels.append(malicious)
    df = pd.DataFrame(data)
    df['label'] = labels
    return df

# Train models
url_df = generate_url_dataset(500)
X_url, y_url = url_df.drop('label', axis=1), url_df['label']
url_model = RandomForestClassifier(n_estimators=100, random_state=42)
url_model.fit(X_url, y_url)

email_df = generate_email_dataset(500)
X_email, y_email = email_df.drop('label', axis=1), email_df['label']
email_model = RandomForestClassifier(n_estimators=100, random_state=42)
email_model.fit(X_email, y_email)

script_df = generate_script_dataset(500)
X_script, y_script = script_df.drop('label', axis=1), script_df['label']
script_model = RandomForestClassifier(n_estimators=100, random_state=42)
script_model.fit(X_script, y_script)

# SHAP explainers
url_explainer = shap.TreeExplainer(url_model)
email_explainer = shap.TreeExplainer(email_model)
script_explainer = shap.TreeExplainer(script_model)

# =========================
# 3Ô∏è‚É£ GRADIO INTERFACE FUNCTIONS
# =========================

def cyber_ai(url_input, email_input, script_input):
    # URL
    url_feat = extract_url_features(url_input)
    url_df_input = pd.DataFrame([url_feat])
    url_prob = url_model.predict_proba(url_df_input)[0][1]

    # Email
    email_feat = extract_email_features(email_input)
    email_df_input = pd.DataFrame([email_feat])
    email_prob = email_model.predict_proba(email_df_input)[0][1]

    # Script
    script_feat = extract_script_features(script_input)
    script_df_input = pd.DataFrame([script_feat])
    script_prob = script_model.predict_proba(script_df_input)[0][1]

    # Combined score
    threat_score = np.mean([url_prob,email_prob,script_prob])
    threat_level = "Low Threat ‚úÖ" if threat_score<0.4 else "Medium Threat ‚ö†Ô∏è" if threat_score<0.7 else "High Threat üö®"

    return f"URL Malicious Probability: {url_prob*100:.2f}%\n" + \
           f"Email Malicious Probability: {email_prob*100:.2f}%\n" + \
           f"Script Malicious Probability: {script_prob*100:.2f}%\n" + \
           f"Overall Cyber Threat Score: {threat_score*100:.2f}%\n" + \
           f"Threat Level: {threat_level}"

# =========================
# 4Ô∏è‚É£ GRADIO APP
# =========================

iface = gr.Interface(
    fn=cyber_ai,
    inputs=[
        gr.Textbox(label="Enter URL"),
        gr.Textbox(label="Enter Email Subject"),
        gr.Textbox(label="Enter Script (JS/HTML)")
    ],
    outputs=gr.Textbox(label="Cyber Threat Analysis"),
    title="üöÄ CyberAI Suite",
    description="Real-time detection of malicious URLs, emails, and scripts with combined threat scoring."
)

iface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ee3c6b8a80bc5105ff.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [2]:
! pip install reportlab
import gradio as gr
import re
import math
import zlib
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from urllib.parse import urlparse
import unicodedata
from mpl_toolkits.mplot3d import Axes3D
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.pagesizes import A4

# ===============================
# GLOBAL CONSTANTS
# ===============================

SUSPICIOUS_WORDS = ["login","verify","bank","secure","update",
                    "account","password","confirm","urgent"]

SOCIAL_ENGINEERING_WORDS = ["urgent","immediately","action required",
                            "suspend","limited time","verify now",
                            "click below","security alert"]

SUSPICIOUS_TLDS = [".tk",".ml",".ga",".cf",".xyz",".top"]


# ===============================
# ADVANCED FEATURE FUNCTIONS
# ===============================

def shannon_entropy(text):
    if not text:
        return 0
    prob = [text.count(c)/len(text) for c in set(text)]
    return -sum(p * math.log2(p) for p in prob)

def compression_ratio(text):
    if not text:
        return 0
    compressed = zlib.compress(text.encode())
    return len(compressed)/len(text.encode())

def homoglyph_count(text):
    count = 0
    for c in text:
        try:
            if "CYRILLIC" in unicodedata.name(c):
                count += 1
        except:
            continue
    return count

def bigram_anomaly_score(text):
    if len(text) < 2:
        return 0
    bigrams = [text[i:i+2] for i in range(len(text)-1)]
    freq = Counter(bigrams)
    return np.std(list(freq.values()))

def keyword_density(text, keywords):
    words = text.lower().split()
    if not words:
        return 0
    return sum(word in keywords for word in words)/len(words)

def randomness_index(text):
    if not text:
        return 0
    return shannon_entropy(text)/max(1,len(set(text)))


# ===============================
# FEATURE EXTRACTION
# ===============================

def extract_url_features(url):
    parsed = urlparse(url)
    return {
        'length': len(url),
        'dots': url.count('.'),
        'hyphens': url.count('-'),
        'digits': sum(c.isdigit() for c in url),
        'suspicious_words': sum(word in url.lower() for word in SUSPICIOUS_WORDS),
        'tld_risk': int(any(url.endswith(tld) for tld in SUSPICIOUS_TLDS)),
        'path_depth': url.count('/'),
        'entropy': shannon_entropy(url),
        'compression_ratio': compression_ratio(url),
        'homoglyphs': homoglyph_count(url),
        'bigram_anomaly': bigram_anomaly_score(url),
        'randomness_index': randomness_index(url),
        'keyword_density': keyword_density(url, SUSPICIOUS_WORDS)
    }

def extract_email_features(subject):
    return {
        'length': len(subject),
        'uppercase_ratio': sum(c.isupper() for c in subject)/max(1,len(subject)),
        'exclamations': subject.count('!'),
        'suspicious_words': sum(word in subject.lower() for word in SUSPICIOUS_WORDS),
        'entropy': shannon_entropy(subject),
        'social_engineering_score': keyword_density(subject, SOCIAL_ENGINEERING_WORDS),
        'compression_ratio': compression_ratio(subject),
        'bigram_anomaly': bigram_anomaly_score(subject),
        'randomness_index': randomness_index(subject),
        'homoglyphs': homoglyph_count(subject)
    }

def extract_script_features(script):
    return {
        'length': len(script),
        'eval_usage': script.count("eval"),
        'document_write': script.count("document.write"),
        'iframe': script.count("iframe"),
        'entropy': shannon_entropy(script),
        'compression_ratio': compression_ratio(script),
        'bigram_anomaly': bigram_anomaly_score(script),
        'randomness_index': randomness_index(script),
        'encoded_strings': script.count("atob") + script.count("btoa"),
        'hex_encoding': script.count("\\x"),
        'charcode_usage': script.count("charCodeAt"),
        'dynamic_execution': script.count("Function(")
    }


# ===============================
# SCORING
# ===============================

def calculate_score(features):
    return sum(features.values())


# ===============================
# VISUALIZATION
# ===============================

def create_visuals(url_f, email_f, script_f):

    # Radar Graph
    categories = list(url_f.keys())[:6]
    values = list(url_f.values())[:6]
    values += values[:1]
    angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
    angles += angles[:1]

    fig1 = plt.figure()
    ax = plt.subplot(111, polar=True)
    ax.plot(angles, values)
    ax.fill(angles, values, alpha=0.25)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories)
    plt.title("URL Risk Radar")

    # Heatmap
    combined = np.array([
        list(url_f.values())[:10],
        list(email_f.values())[:10],
        list(script_f.values())[:10]
    ])

    fig2 = plt.figure()
    plt.imshow(combined)
    plt.colorbar()
    plt.title("Threat Heatmap")

    # 3D Plot
    fig3 = plt.figure()
    ax3 = fig3.add_subplot(111, projection='3d')
    ax3.scatter(
        calculate_score(url_f),
        calculate_score(email_f),
        calculate_score(script_f)
    )
    ax3.set_xlabel("URL Score")
    ax3.set_ylabel("Email Score")
    ax3.set_zlabel("Script Score")
    plt.title("3D Threat Landscape")

    return fig1, fig2, fig3


# ===============================
# PDF REPORT
# ===============================

def generate_pdf(url_score, email_score, script_score, overall):
    file_path = "Cyber_Threat_Report.pdf"
    doc = SimpleDocTemplate(file_path, pagesize=A4)
    styles = getSampleStyleSheet()
    elements = []

    elements.append(Paragraph("PhD-Level Cyber Threat Intelligence Report", styles['Title']))
    elements.append(Spacer(1, 12))
    elements.append(Paragraph(f"URL Threat Score: {url_score}", styles['Normal']))
    elements.append(Paragraph(f"Email Threat Score: {email_score}", styles['Normal']))
    elements.append(Paragraph(f"Script Threat Score: {script_score}", styles['Normal']))
    elements.append(Paragraph(f"Overall Cyber Threat Score: {overall}", styles['Heading2']))

    doc.build(elements)
    return file_path


# ===============================
# MAIN ANALYSIS FUNCTION
# ===============================

def analyze(url, subject, script):

    url_f = extract_url_features(url)
    email_f = extract_email_features(subject)
    script_f = extract_script_features(script)

    url_score = calculate_score(url_f)
    email_score = calculate_score(email_f)
    script_score = calculate_score(script_f)

    overall = url_score + email_score + script_score

    fig1, fig2, fig3 = create_visuals(url_f, email_f, script_f)
    pdf_file = generate_pdf(url_score, email_score, script_score, overall)

    result_text = f"""
    URL Score: {url_score}
    Email Score: {email_score}
    Script Score: {script_score}
    OVERALL CYBER THREAT SCORE: {overall}
    """

    return result_text, fig1, fig2, fig3, pdf_file


# ===============================
# GRADIO APP
# ===============================

with gr.Blocks(title="PhD-Level Cyber Threat Intelligence System") as app:

    gr.Markdown("# üîê PhD-Level Cyber Threat Intelligence System")

    url_input = gr.Textbox(label="Enter URL")
    email_input = gr.Textbox(label="Enter Email Subject")
    script_input = gr.Textbox(label="Enter Script (JS/HTML)")

    analyze_btn = gr.Button("Analyze Threat")

    output_text = gr.Textbox(label="Threat Scores")
    radar_plot = gr.Plot(label="Radar Graph")
    heatmap_plot = gr.Plot(label="Heatmap")
    plot3d = gr.Plot(label="3D Threat Landscape")
    pdf_output = gr.File(label="Download PDF Report")

    analyze_btn.click(
        analyze,
        inputs=[url_input, email_input, script_input],
        outputs=[output_text, radar_plot, heatmap_plot, plot3d, pdf_output]
    )

app.launch()


Collecting reportlab
  Downloading reportlab-4.4.10-py3-none-any.whl.metadata (1.7 kB)
Downloading reportlab-4.4.10-py3-none-any.whl (2.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.0/2.0 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.10
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://55835b498c538d00c5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [9]:
import gradio as gr
import math
import zlib
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from collections import Counter
from urllib.parse import urlparse
import unicodedata
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.pagesizes import A4

# ===============================
# GLOBAL CONSTANTS
# ===============================

SUSPICIOUS_WORDS = ["login","verify","bank","secure","update",
                    "account","password","confirm","urgent"]

SOCIAL_ENGINEERING_WORDS = ["urgent","immediately","action required",
                            "suspend","limited time","verify now",
                            "click below","security alert"]

SUSPICIOUS_TLDS = [".tk",".ml",".ga",".cf",".xyz",".top"]

# ===============================
# ADVANCED FUNCTIONS
# ===============================

def shannon_entropy(text):
    if not text:
        return 0
    prob = [text.count(c)/len(text) for c in set(text)]
    return -sum(p * math.log2(p) for p in prob)

def compression_ratio(text):
    if not text:
        return 0
    compressed = zlib.compress(text.encode())
    return len(compressed)/len(text.encode())

def homoglyph_count(text):
    count = 0
    for c in text:
        try:
            if "CYRILLIC" in unicodedata.name(c):
                count += 1
        except:
            continue
    return count

def bigram_anomaly_score(text):
    if len(text) < 2:
        return 0
    bigrams = [text[i:i+2] for i in range(len(text)-1)]
    freq = Counter(bigrams)
    return np.std(list(freq.values()))

def keyword_density(text, keywords):
    words = text.lower().split()
    if not words:
        return 0
    return sum(word in keywords for word in words)/len(words)

# ===============================
# FEATURE EXTRACTION
# ===============================

def extract_url_features(url):
    return {
        'Length': len(url),
        'Dots': url.count('.'),
        'Hyphens': url.count('-'),
        'Digits': sum(c.isdigit() for c in url),
        'Suspicious Words': sum(word in url.lower() for word in SUSPICIOUS_WORDS),
        'TLD Risk': int(any(url.endswith(tld) for tld in SUSPICIOUS_TLDS)),
        'Path Depth': url.count('/'),
        'Entropy': shannon_entropy(url),
        'Compression': compression_ratio(url),
        'Homoglyphs': homoglyph_count(url),
        'Bigram Anomaly': bigram_anomaly_score(url),
        'Keyword Density': keyword_density(url, SUSPICIOUS_WORDS)
    }

def extract_email_features(subject):
    return {
        'Length': len(subject),
        'Uppercase Ratio': sum(c.isupper() for c in subject)/max(1,len(subject)),
        'Exclamations': subject.count('!'),
        'Suspicious Words': sum(word in subject.lower() for word in SUSPICIOUS_WORDS),
        'Entropy': shannon_entropy(subject),
        'Social Engineering': keyword_density(subject, SOCIAL_ENGINEERING_WORDS),
        'Compression': compression_ratio(subject),
        'Bigram Anomaly': bigram_anomaly_score(subject),
        'Homoglyphs': homoglyph_count(subject)
    }

def extract_script_features(script):
    return {
        'Length': len(script),
        'Eval Usage': script.count("eval"),
        'Iframe': script.count("iframe"),
        'Entropy': shannon_entropy(script),
        'Compression': compression_ratio(script),
        'Bigram Anomaly': bigram_anomaly_score(script),
        'Encoded Strings': script.count("atob") + script.count("btoa"),
        'Hex Encoding': script.count("\\x"),
        'Dynamic Execution': script.count("Function(")
    }

# ===============================
# NORMALIZATION TO PERCENTAGE
# ===============================

def normalize_score(score, max_possible=150):
    return min(100, (score / max_possible) * 100)

def calculate_percentage(features):
    raw_score = sum(features.values())
    return normalize_score(raw_score)

# ===============================
# VISUALIZATION (ALL INTERACTIVE) # Ensure plotly is imported as px, go
# ===============================

def create_visuals(url_f, email_f, script_f, url_p, email_p, script_p):

    # Radar
    radar = go.Figure()
    radar.add_trace(go.Scatterpolar(
        r=list(url_f.values()),
        theta=list(url_f.keys()),
        fill='toself',
        name='URL'
    ))
    radar.update_layout(title="URL Risk Radar", polar=dict(radialaxis=dict(visible=True)))
    # Removed radar.show()

    # Heatmap
    # Get the number of features for each type
    num_url_features = len(url_f)
    num_email_features = len(email_f)
    num_script_features = len(script_f)

    # Determine the maximum number of features
    max_num_features = max(num_url_features, num_email_features, num_script_features)

    # Pad feature lists to match the maximum number of features
    url_values_padded = list(url_f.values()) + [0] * (max_num_features - num_url_features)
    email_values_padded = list(email_f.values()) + [0] * (max_num_features - num_email_features)
    script_values_padded = list(script_f.values()) + [0] * (max_num_features - num_script_features)

    heat_data = [
        url_values_padded,
        email_values_padded,
        script_values_padded
    ]

    # For x-axis labels, use the keys from the dictionary that has the most features.
    # Assuming url_f is the longest as per feature extraction logic.
    x_labels = list(url_f.keys()) + [''] * (max_num_features - num_url_features)

    heatmap = px.imshow(heat_data,
                        labels=dict(x="Features", y="Module", color="Risk"),
                        x=x_labels, # Use the padded x-axis labels
                        y=["URL","Email","Script"],
                        title="Threat Heatmap")
    # Removed heatmap.show()

    # Bar Graph (Percentage)
    bar = go.Figure([go.Bar(
        x=["URL","Email","Script"],
        y=[url_p, email_p, script_p]
    )])
    bar.update_layout(title="Threat Percentage Comparison",
                      yaxis_title="Risk %")
    # Removed bar.show()

    # Interactive 3D
    fig3d = go.Figure(data=[go.Scatter3d(
        x=[url_p],
        y=[email_p],
        z=[script_p],
        mode='markers',
        marker=dict(size=10)
    )])
    fig3d.update_layout(
        title="Interactive 3D Threat Landscape",
        scene=dict(
            xaxis_title='URL %',
            yaxis_title='Email %',
            zaxis_title='Script %'
        )
    )
    # Removed fig3d.show()

    return radar, heatmap, bar, fig3d

# ===============================
# PDF REPORT
# ===============================

def generate_pdf(url_p, email_p, script_p, overall):
    file_path = "Cyber_Threat_Report.pdf"
    doc = SimpleDocTemplate(file_path, pagesize=A4)
    styles = getSampleStyleSheet()
    elements = []

    elements.append(Paragraph("PhD-Level Cyber Threat Intelligence Report", styles['Title']))
    elements.append(Spacer(1, 12))
    elements.append(Paragraph(f"URL Risk: {url_p:.2f}%", styles['Normal']))
    elements.append(Paragraph(f"Email Risk: {email_p:.2f}%", styles['Normal']))
    elements.append(Paragraph(f"Script Risk: {script_p:.2f}%", styles['Normal']))
    elements.append(Paragraph(f"Overall Threat Level: {overall:.2f}%", styles['Heading2']))

    doc.build(elements)
    return file_path

# ===============================
# MAIN ANALYSIS # This is the function called by Gradio
# ===============================

def analyze(url, subject, script):

    url_f = extract_url_features(url)
    email_f = extract_email_features(subject)
    script_f = extract_script_features(script)

    url_p = calculate_percentage(url_f)
    email_p = calculate_percentage(email_f)
    script_p = calculate_percentage(script_f)

    overall = (url_p + email_p + script_p) / 3

    radar, heatmap, bar, fig3d = create_visuals(
        url_f, email_f, script_f, url_p, email_p, script_p
    )

    pdf_file = generate_pdf(url_p, email_p, script_p, overall)

    result = f"""
    URL Risk: {url_p:.2f}%
    Email Risk: {email_p:.2f}%
    Script Risk: {script_p:.2f}%
    OVERALL CYBER THREAT LEVEL: {overall:.2f}%
    """

    return result, radar, heatmap, bar, fig3d, pdf_file

# ===============================
# GRADIO APP # Ensure this part remains for the Gradio interface
# ===============================

with gr.Blocks(title="PhD-Level Cyber Threat Intelligence System") as app:

    gr.Markdown("# üîê Advanced Cyber Threat Intelligence Dashboard")

    url_input = gr.Textbox(label="Enter URL")
    email_input = gr.Textbox(label="Enter Email Subject")
    script_input = gr.Textbox(label="Enter Script (JS/HTML)")

    analyze_btn = gr.Button("Analyze Threat")

    output_text = gr.Textbox(label="Threat Summary")
    radar_plot = gr.Plot(label="Radar")
    heatmap_plot = gr.Plot(label="Heatmap")
    bar_plot = gr.Plot(label="Bar Graph")
    plot3d = gr.Plot(label="Interactive 3D")
    pdf_output = gr.File(label="Download PDF Report")

    analyze_btn.click(
        analyze,
        inputs=[url_input, email_input, script_input],
        outputs=[output_text, radar_plot, heatmap_plot, bar_plot, plot3d, pdf_output]
    )

app.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a5fbbd562773e29162.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


