In [2]:
# --------------------------------------------------------
# GUI for Domain Classification using SelectiveDeepForest
# --------------------------------------------------------

import tkinter as tk
from tkinter import messagebox
import numpy as np
import pickle, string, math
from collections import Counter

# ------------------------------------------
# Include SelectiveDeepForest CLASS
# ------------------------------------------
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

class SelectiveDeepForest:
    def __init__(self, n_layers=5, n_estimators=100, top_ratio=0.7):
        self.n_layers = n_layers
        self.n_estimators = n_estimators
        self.top_ratio = top_ratio
        self.layers = []

    def _select_trees(self, forest, X, y):
        aucs = []
        for i, tree in enumerate(forest.estimators_):
            prob = tree.predict_proba(X)[:, 1]
            aucs.append((roc_auc_score(y, prob), i))
            aucs.sort(reverse=True)
        top_k = int(len(aucs) * self.top_ratio)
        return [forest.estimators_[i] for (auc, i) in aucs[:top_k]]

    def fit(self, X, y):
        Xc = X
        for _ in range(self.n_layers):
            rf = RandomForestClassifier(n_estimators=self.n_estimators)
            rf.fit(Xc, y)
            selected = self._select_trees(rf, Xc, y)
            self.layers.append(selected)
            proba = rf.predict_proba(Xc)
            Xc = np.hstack((Xc, proba))

    def predict_proba(self, X):
        Xc = X
        for trees in self.layers:
            probs = np.mean([t.predict_proba(Xc) for t in trees], axis=0)
            Xc = np.hstack((Xc, probs))
        return probs

    def predict(self, X):
        return (self.predict_proba(X)[:, 1] >= 0.5).astype(int)

# ------------------------------------------
# Load trained model
# ------------------------------------------
model = pickle.load(open("model.pkl", "rb"))

# ------------------------------------------
# Feature Extractor
# ------------------------------------------
def shannon_entropy(domain):
    prob = [float(domain.count(c)) / len(domain) for c in dict.fromkeys(list(domain))]
    return - sum([p * math.log2(p) for p in prob])

def extract_features(domain):
    features = {}
    features['length'] = len(domain)
    features['num_digits'] = sum(c.isdigit() for c in domain)
    features['num_letters'] = sum(c.isalpha() for c in domain)
    features['num_hyphens'] = domain.count('-')
    features['num_dots'] = domain.count('.')
    features['entropy'] = shannon_entropy(domain)
    features['digit_ratio'] = features['num_digits'] / (features['length'] + 1e-5)
    features['letter_ratio'] = features['num_letters'] / (features['length'] + 1e-5)
    features['num_levels'] = domain.count('.') + 1
    splits = domain.split('.')
    features['max_seg_len'] = max(len(s) for s in splits)
    features['min_seg_len'] = min(len(s) for s in splits)
    vowels = set('aeiou')
    features['num_vowels'] = sum(c in vowels for c in domain.lower())
    features['vowel_ratio'] = features['num_vowels'] / (features['length'] + 1e-5)
    features['starts_num'] = int(domain[0].isdigit())
    features['ends_num'] = int(domain[-1].isdigit())
    features['repeating_char_ratio'] = sum((Counter(domain)[c] > 1) for c in domain)/(len(set(domain))+1e-5)
    features['alpha_numeric_ratio'] = (features['num_digits']+features['num_letters'])/(features['length']+1e-5)
    features['special_char_ratio'] = (features['length']-(features['num_letters']+features['num_digits']))/(features['length']+1e-5)
    features['mean_pos_digit'] = np.mean([i for i,c in enumerate(domain) if c.isdigit()]+[0])
    features['mean_pos_letter'] = np.mean([i for i,c in enumerate(domain) if c.isalpha()]+[0])
    features['std_pos_letter'] = np.std([i for i,c in enumerate(domain) if c.isalpha()]+[0])
    features['std_pos_digit'] = np.std([i for i,c in enumerate(domain) if c.isdigit()]+[0])
    features['is_ip_like'] = int(all(c in string.digits+'.' for c in domain))
    return list(features.values())

FEATURE_NAMES = [
    'length','num_digits','num_letters','num_hyphens','num_dots','entropy',
    'digit_ratio','letter_ratio','num_levels','max_seg_len','min_seg_len',
    'num_vowels','vowel_ratio','starts_num','ends_num','repeating_char_ratio',
    'alpha_numeric_ratio','special_char_ratio','mean_pos_digit','mean_pos_letter',
    'std_pos_letter','std_pos_digit','is_ip_like'
]

# ------------------------------------------
# GUI Prediction function
# ------------------------------------------
def predict_domain():
    d = entry.get().strip()
    if not d:
        messagebox.showerror("Error", "Please enter a domain.")
        return

    feat_values = extract_features(d)
    feat = np.array(feat_values).reshape(1, -1)
    pred = model.predict(feat)[0]

    # Result Text
    if pred == 1:
        result_text.set("❌  Malicious")
        result_label.config(fg="red")
        color = "red"
    else:
        result_text.set("✅  Benign")
        result_label.config(fg="green")
        color = "green"

    # Populate exactly len(feat_values) features
    for idx in range(len(feat_values)):
        r = idx // 4
        c = idx % 4
        text = f"{idx+1}) {FEATURE_NAMES[idx]}: {feat_values[idx]:.4f}"
        table_labels[r][c].config(text=text, fg=color, relief="solid")

    # Clear any unused cells
    for idx in range(len(feat_values), 24):
        r = idx // 4
        c = idx % 4
        table_labels[r][c].config(text="", relief="flat")

# ------------------------------------------
# Build GUI
# ------------------------------------------
root = tk.Tk()
root.title("Domain Classification")
root.state('zoomed')  # maximize window

# TOP TITLE
top_title = tk.Label(root, text="Malicious Domain Detection System Based On Distributed Deep Learning",
                     wraplength=1000, font=("Helvetica", 16, "bold"), justify="center")
top_title.pack(pady=(10,2))

heading = tk.Label(root, text="SHAH ABDUL LATIF UNIVERSITY, KHAIRPUR",
                   font=("Helvetica", 15, "bold"))
heading.pack(pady=3)

# Optional Logo
from PIL import Image, ImageTk
try:
    img = Image.open("logo.png")
    img = img.resize((140, 140))
    logo = ImageTk.PhotoImage(img)
    logo_label = tk.Label(root, image=logo)
    logo_label.pack()
except:
    pass

# Institute name under logo (bold)
inst_name = tk.Label(root, text="INSTITUTE OF COMPUTER SCIENCE", font=("Helvetica", 12, "bold"))
inst_name.pack(pady=(0,15))

# Entry
entry = tk.Entry(root, width=60, font=("Helvetica", 13))
entry.pack(pady=5)

btn = tk.Button(root, text="Predict", width=18, font=("Helvetica", 11),
                command=predict_domain)
btn.pack(pady=7)

result_text = tk.StringVar(value="")
result_label = tk.Label(root, textvariable=result_text,
                        font=("Helvetica", 14, "bold"))
result_label.pack(pady=10)

# ---- Feature Table (4×6), hidden border initially -----
table_frame = tk.Frame(root)
table_frame.pack(pady=10)

table_labels = []
for r in range(6):
    row = []
    for c in range(4):
        lbl = tk.Label(table_frame, width=30, anchor="w", font=("Courier", 10),
                       relief="flat", borderwidth=1, text="")
        lbl.grid(row=r, column=c, sticky="nsew")
        row.append(lbl)
    table_labels.append(row)

# Footer sequence (bold)
footer_sup2 = tk.Label(root, text="Co-Supervisor : Professor Dr. Noor Ahmed Shaikh", font=("Helvetica", 11, "bold"))
footer_sup2.pack(side="bottom")

footer_sup = tk.Label(root, text="Supervisor : Professor Dr. Samina Rajper", font=("Helvetica", 11, "bold"))
footer_sup.pack(side="bottom")

footer_dev = tk.Label(root, text="Developed by : Samar Abbas Mangi", font=("Helvetica", 11, "bold"))
footer_dev.pack(side="bottom")

root.mainloop()


In [4]:
import tkinter as tk
from tkinter import messagebox, scrolledtext
import numpy as np
import pickle, string, math
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

# ------------------------------------------
# Include SelectiveDeepForest CLASS
# ------------------------------------------
class SelectiveDeepForest:
    def __init__(self, n_layers=5, n_estimators=100, top_ratio=0.7):
        self.n_layers = n_layers
        self.n_estimators = n_estimators
        self.top_ratio = top_ratio
        self.layers = []

    def _select_trees(self, forest, X, y):
        aucs = []
        for i, tree in enumerate(forest.estimators_):
            prob = tree.predict_proba(X)[:, 1]
            aucs.append((roc_auc_score(y, prob), i))
        aucs.sort(reverse=True)
        top_k = int(len(aucs) * self.top_ratio)
        return [forest.estimators_[i] for (auc, i) in aucs[:top_k]]

    def fit(self, X, y):
        Xc = X
        for _ in range(self.n_layers):
            rf = RandomForestClassifier(n_estimators=self.n_estimators)
            rf.fit(Xc, y)
            selected = self._select_trees(rf, Xc, y)
            self.layers.append(selected)
            proba = rf.predict_proba(Xc)
            Xc = np.hstack((Xc, proba))

    def predict_proba(self, X):
        Xc = X
        for trees in self.layers:
            probs = np.mean([t.predict_proba(Xc) for t in trees], axis=0)
            Xc = np.hstack((Xc, probs))
        return probs

    def predict(self, X):
        return (self.predict_proba(X)[:, 1] >= 0.5).astype(int)

# ------------------------------------------
# Load trained model
# ------------------------------------------
model = pickle.load(open("model.pkl", "rb"))

# ------------------------------------------
# Feature Extractor
# ------------------------------------------
def shannon_entropy(domain):
    prob = [float(domain.count(c)) / len(domain) for c in dict.fromkeys(list(domain))]
    return -sum([p * math.log2(p) for p in prob])

def extract_features(domain):
    features = {}
    features['length'] = len(domain)
    features['num_digits'] = sum(c.isdigit() for c in domain)
    features['num_letters'] = sum(c.isalpha() for c in domain)
    features['num_hyphens'] = domain.count('-')
    features['num_dots'] = domain.count('.')
    features['entropy'] = shannon_entropy(domain)
    features['digit_ratio'] = features['num_digits'] / (features['length'] + 1e-5)
    features['letter_ratio'] = features['num_letters'] / (features['length'] + 1e-5)
    features['num_levels'] = domain.count('.') + 1
    splits = domain.split('.')
    features['max_seg_len'] = max(len(s) for s in splits)
    features['min_seg_len'] = min(len(s) for s in splits)
    vowels = set('aeiou')
    features['num_vowels'] = sum(c in vowels for c in domain.lower())
    features['vowel_ratio'] = features['num_vowels'] / (features['length'] + 1e-5)
    features['starts_num'] = int(domain[0].isdigit())
    features['ends_num'] = int(domain[-1].isdigit())
    features['repeating_char_ratio'] = sum((Counter(domain)[c] > 1) for c in domain)/(len(set(domain))+1e-5)
    features['alpha_numeric_ratio'] = (features['num_digits']+features['num_letters'])/(features['length']+1e-5)
    features['special_char_ratio'] = (features['length']-(features['num_letters']+features['num_digits']))/(features['length']+1e-5)
    features['mean_pos_digit'] = np.mean([i for i,c in enumerate(domain) if c.isdigit()]+[0])
    features['mean_pos_letter'] = np.mean([i for i,c in enumerate(domain) if c.isalpha()]+[0])
    features['std_pos_letter'] = np.std([i for i,c in enumerate(domain) if c.isalpha()]+[0])
    features['std_pos_digit'] = np.std([i for i,c in enumerate(domain) if c.isdigit()]+[0])
    features['is_ip_like'] = int(all(c in string.digits+'.' for c in domain))
    return list(features.values())

# ------------------------------------------
# GUI Prediction function
# ------------------------------------------
def predict_domains():
    domain_text = entry.get("1.0", tk.END).strip().split("\n")
    domain_text = [d.strip() for d in domain_text if d.strip()]
    if not domain_text:
        messagebox.showerror("Error", "Please enter at least one domain.")
        return

    # Clear previous results
    for widget in results_frame.winfo_children():
        widget.destroy()

    # Display each domain and result
    for idx, d in enumerate(domain_text):
        if idx >= 12:  # limit to 12 domains
            break
        feat_values = extract_features(d)
        feat = np.array(feat_values).reshape(1, -1)
        pred = model.predict(feat)[0]
        result_text = "✅ Benign" if pred == 0 else "❌ Malicious"
        color = "green" if pred == 0 else "red"

        lbl = tk.Label(results_frame, text=f"{d} --------> {result_text}",
                       font=("Helvetica", 14, "bold"), fg=color, anchor="w", padx=10)
        lbl.pack(fill='x', pady=2)

# ------------------------------------------
# Build GUI
# ------------------------------------------
root = tk.Tk()
root.title("Malicious Domain Detection")
root.state('zoomed')  # maximize window

# Top Title
top_title = tk.Label(root, text="Malicious Domain Detection System Based On Distributed Deep Learning",
                     wraplength=1200, font=("Helvetica", 16, "bold"), justify="center")
top_title.pack(pady=(10,2))

heading = tk.Label(root, text="SHAH ABDUL LATIF UNIVERSITY, KHAIRPUR",
                   font=("Helvetica", 15, "bold"))
heading.pack(pady=3)

# Multi-line Entry
entry_label = tk.Label(root, text="Enter up to 12 domains (one per line):", font=("Helvetica", 12))
entry_label.pack(pady=5)

entry = scrolledtext.ScrolledText(root, width=80, height=10, font=("Helvetica", 12))
entry.pack(pady=5)

btn = tk.Button(root, text="Predict", width=20, font=("Helvetica", 12),
                command=predict_domains)
btn.pack(pady=7)

# Scrollable Frame for Results (fills whole window width)
results_frame = tk.Frame(root)
results_frame.pack(fill='both', expand=True, padx=20, pady=10)

# Footer
footer_sup2 = tk.Label(root, text="Co-Supervisor : Professor Dr. Noor Ahmed Shaikh", font=("Helvetica", 11, "bold"))
footer_sup2.pack(side="bottom")
footer_sup = tk.Label(root, text="Supervisor : Professor Dr. Samina Rajper", font=("Helvetica", 11, "bold"))
footer_sup.pack(side="bottom")
footer_dev = tk.Label(root, text="Developed by : Samar Abbas Mangi", font=("Helvetica", 11, "bold"))
footer_dev.pack(side="bottom")

root.mainloop()
