# Model Inspect

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

class SpamDetectionHTMLReporter:
    def __init__(self, model_dir):
        """Initialize tokenizer and model"""
        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        self.model.eval()

    def predict_single(self, text):
        """Predict single text with detailed output"""
        # Tokenization
        encoded = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="pt"
        )

        input_ids = encoded["input_ids"]
        attention_mask = encoded["attention_mask"]

        # Get ALL tokens (including special tokens)
        all_tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])

        # Separate meaningful tokens (for main display) and keep all for detailed view
        meaningful_tokens = [token for token in all_tokens if token not in ['[PAD]', '<pad>', '</s>', '<s>']]

        # Get input IDs list
        input_ids_list = input_ids[0].tolist()

        # Map input IDs to tokens with special token info
        token_id_mapping = []
        for i, (token, input_id) in enumerate(zip(all_tokens, input_ids_list)):
            is_special = token in ['[CLS]', '[SEP]', '[PAD]', '[UNK]', '[MASK]', '</s>', '<s>']
            token_id_mapping.append({
                'position': i,
                'token': token,
                'input_id': input_id,
                'is_special': is_special,
                'token_type': self._get_token_type(token)
            })

        # Prediction
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits[0]
            probabilities = torch.softmax(logits, dim=0)
            pred = torch.argmax(logits).item()
            confidence = probabilities[pred].item()

        label = "SPAM" if pred == 1 else "HAM"

        return {
            "text": text,
            "tokens": meaningful_tokens,
            "all_tokens": all_tokens,
            "input_ids": input_ids_list,
            "token_id_mapping": token_id_mapping,
            "attention_mask": attention_mask[0].tolist(),
            "prediction": label,
            "confidence": confidence,
            "spam_prob": probabilities[1].item(),
            "ham_prob": probabilities[0].item()
        }

    def _get_token_type(self, token):
        """Classify token type for styling"""
        if token == '[CLS]':
            return 'cls'
        elif token == '[SEP]':
            return 'sep'
        elif token == '[PAD]':
            return 'pad'
        elif token == '[UNK]':
            return 'unk'
        elif token in ['[MASK]', '</s>', '<s>']:
            return 'special'
        else:
            return 'normal'

    def create_full_html_report(self, results_df, output_file="spam_detection_full_report.html"):
        """Create comprehensive HTML report with full text display"""
        html_content = """
        <!DOCTYPE html>
        <html>
        <head>
            <title>Spam Detection Analysis - Complete Report</title>
            <meta charset="UTF-8">
            <style>
                body {
                    font-family: 'Segoe UI', Arial, sans-serif;
                    margin: 20px;
                    background-color: #f5f5f5;
                    line-height: 1.6;
                }
                .container { max-width: 100%; margin: 0 auto; }
                .header {
                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    color: white;
                    padding: 30px;
                    text-align: center;
                    border-radius: 15px;
                    margin-bottom: 30px;
                    box-shadow: 0 4px 15px rgba(0,0,0,0.1);
                }
                .stats {
                    display: grid;
                    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
                    gap: 20px;
                    margin-bottom: 30px;
                }
                .stat-card {
                    background: white;
                    padding: 25px;
                    border-radius: 12px;
                    text-align: center;
                    box-shadow: 0 4px 15px rgba(0,0,0,0.1);
                    transition: transform 0.3s ease;
                }
                .stat-card:hover { transform: translateY(-5px); }
                .stat-number { font-size: 32px; font-weight: bold; color: #333; margin-bottom: 8px; }
                .stat-label { color: #666; font-size: 16px; }

                .controls {
                    background: white;
                    padding: 20px;
                    border-radius: 12px;
                    margin-bottom: 20px;
                    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
                }
                .filter-btn {
                    background: #667eea;
                    color: white;
                    border: none;
                    padding: 10px 20px;
                    margin: 5px;
                    border-radius: 8px;
                    cursor: pointer;
                    transition: background 0.3s ease;
                }
                .filter-btn:hover { background: #5a6fd8; }
                .filter-btn.active { background: #764ba2; }

                .search-box {
                    width: 100%;
                    max-width: 400px;
                    padding: 12px;
                    border: 2px solid #ddd;
                    border-radius: 8px;
                    font-size: 16px;
                    margin: 10px 0;
                }

                .results-container {
                    background: white;
                    border-radius: 12px;
                    overflow: hidden;
                    box-shadow: 0 4px 15px rgba(0,0,0,0.1);
                }

                .prediction-card {
                    border-bottom: 1px solid #eee;
                    padding: 25px;
                    transition: background-color 0.3s ease;
                }
                .prediction-card:hover { background-color: #f8f9fa; }
                .prediction-card:last-child { border-bottom: none; }

                .spam { border-left: 6px solid #e74c3c; }
                .ham { border-left: 6px solid #27ae60; }

                .card-header {
                    display: flex;
                    justify-content: space-between;
                    align-items: center;
                    margin-bottom: 15px;
                    flex-wrap: wrap;
                }

                .prediction-label {
                    font-size: 18px;
                    font-weight: bold;
                    padding: 8px 16px;
                    border-radius: 20px;
                    color: white;
                }
                .spam-label { background: #e74c3c; }
                .ham-label { background: #27ae60; }

                .confidence-badge {
                    font-size: 14px;
                    padding: 6px 12px;
                    border-radius: 15px;
                    color: white;
                    font-weight: bold;
                }
                .high-confidence { background: #27ae60; }
                .medium-confidence { background: #f39c12; }
                .low-confidence { background: #e74c3c; }

                .text-content {
                    background: #f8f9fa;
                    padding: 20px;
                    border-radius: 8px;
                    margin: 15px 0;
                    font-family: 'Courier New', monospace;
                    font-size: 14px;
                    line-height: 1.8;
                    word-wrap: break-word;
                    white-space: pre-wrap;
                    border: 1px solid #dee2e6;
                }

                .probability-bars {
                    display: grid;
                    grid-template-columns: 1fr 1fr;
                    gap: 15px;
                    margin: 15px 0;
                }

                .prob-bar {
                    background: #f1f3f4;
                    border-radius: 10px;
                    overflow: hidden;
                    position: relative;
                    height: 35px;
                }

                .prob-fill {
                    height: 100%;
                    display: flex;
                    align-items: center;
                    justify-content: center;
                    color: white;
                    font-weight: bold;
                    font-size: 12px;
                    transition: width 0.3s ease;
                }

                .ham-bar { background: #27ae60; }
                .spam-bar { background: #e74c3c; }

                .tokens-section {
                    background: #e3f2fd;
                    padding: 15px;
                    border-radius: 8px;
                    margin: 15px 0;
                    border: 1px solid #bbdefb;
                }

                .input-ids-section {
                    background: #fff3e0;
                    padding: 15px;
                    border-radius: 8px;
                    margin: 15px 0;
                    border: 1px solid #ffcc02;
                }

                .tokens-header, .ids-header {
                    font-weight: bold;
                    margin-bottom: 10px;
                    display: flex;
                    justify-content: space-between;
                    align-items: center;
                }

                .tokens-header { color: #1976d2; }
                .ids-header { color: #f57c00; }

                .tokens-display, .ids-display {
                    font-family: 'Courier New', monospace;
                    font-size: 12px;
                    background: white;
                    padding: 12px;
                    border-radius: 6px;
                    line-height: 1.6;
                    max-height: 200px;
                    overflow-y: auto;
                }

                .tokens-display {
                    border: 1px solid #90caf9;
                }

                .ids-display {
                    border: 1px solid #ffb74d;
                }

                .token {
                    display: inline-block;
                    color: white;
                    padding: 2px 6px;
                    margin: 2px;
                    border-radius: 4px;
                    font-size: 11px;
                }

                .token.normal { background: #2196f3; }
                .token.special { background: #e91e63; font-weight: bold; }
                .token.cls { background: #4caf50; }
                .token.sep { background: #ff9800; }
                .token.pad { background: #9e9e9e; }
                .token.unk { background: #f44336; }

                .input-id {
                    display: inline-block;
                    background: #ff9800;
                    color: white;
                    padding: 2px 6px;
                    margin: 2px;
                    border-radius: 4px;
                    font-size: 11px;
                    min-width: 25px;
                    text-align: center;
                }

                .input-id.special { background: #e91e63; font-weight: bold; }

                .sample-number {
                    color: #666;
                    font-size: 14px;
                    margin-bottom: 10px;
                }

                .hidden { display: none; }

                .pagination {
                    text-align: center;
                    padding: 20px;
                    background: white;
                    border-radius: 0 0 12px 12px;
                }

                .page-btn {
                    background: #667eea;
                    color: white;
                    border: none;
                    padding: 8px 15px;
                    margin: 0 5px;
                    border-radius: 5px;
                    cursor: pointer;
                }

                .page-btn:hover { background: #5a6fd8; }
                .page-btn.active { background: #764ba2; }
                .page-btn:disabled { background: #ccc; cursor: not-allowed; }

                @media (max-width: 768px) {
                    .container { margin: 10px; }
                    .card-header { flex-direction: column; align-items: flex-start; }
                    .probability-bars { grid-template-columns: 1fr; }
                }
            </style>
        </head>
        <body>
            <div class="container">
                <div class="header">
                    <h1>üîç Spam Detection Analysis Report</h1>
                    <p>Complete tokenization and prediction analysis with full text display</p>
                </div>
        """

        # Add statistics
        total_count = len(results_df)
        spam_count = len(results_df[results_df['prediction'] == 'SPAM'])
        ham_count = len(results_df[results_df['prediction'] == 'HAM'])
        avg_confidence = results_df['confidence'].mean()
        high_conf_count = len(results_df[results_df['confidence'] > 0.9])

        html_content += f"""
                <div class="stats">
                    <div class="stat-card">
                        <div class="stat-number">{total_count:,}</div>
                        <div class="stat-label">Total Samples</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-number">{spam_count:,}</div>
                        <div class="stat-label">SPAM Detected ({spam_count/total_count:.1%})</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-number">{ham_count:,}</div>
                        <div class="stat-label">HAM Normal ({ham_count/total_count:.1%})</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-number">{avg_confidence:.1%}</div>
                        <div class="stat-label">Avg Confidence</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-number">{high_conf_count:,}</div>
                        <div class="stat-label">High Confidence (>90%)</div>
                    </div>
                </div>

                <div class="controls">
                    <h3>üéõÔ∏è Controls & Filters</h3>
                    <input type="text" id="searchBox" class="search-box" placeholder="Search in text content...">
                    <br>
                    <button class="filter-btn active" onclick="filterResults('all')">All ({total_count})</button>
                    <button class="filter-btn" onclick="filterResults('spam')">SPAM Only ({spam_count})</button>
                    <button class="filter-btn" onclick="filterResults('ham')">HAM Only ({ham_count})</button>
                    <button class="filter-btn" onclick="filterResults('high-conf')">High Confidence ({high_conf_count})</button>
                </div>

                <div class="results-container">
        """

        # Add individual predictions dengan full text
        for idx, row in results_df.iterrows():
            confidence_class = ""
            confidence_text = ""
            if row['confidence'] >= 0.9:
                confidence_class = "high-confidence"
                confidence_text = "High"
            elif row['confidence'] >= 0.7:
                confidence_class = "medium-confidence"
                confidence_text = "Medium"
            else:
                confidence_class = "low-confidence"
                confidence_text = "Low"

            class_name = "spam" if row['prediction'] == "SPAM" else "ham"
            label_class = "spam-label" if row['prediction'] == "SPAM" else "ham-label"
            emoji = "üö®" if row['prediction'] == "SPAM" else "‚úÖ"

            # Parse tokens safely
            try:
                if isinstance(row['tokens'], str):
                    tokens = eval(row['tokens'])
                else:
                    tokens = row['tokens']

                # Create tokens HTML with proper classification
                tokens_html = ''
                all_tokens = row['all_tokens'] if 'all_tokens' in row else tokens

                for token in all_tokens:
                    token_class = 'normal'
                    if token == '[CLS]':
                        token_class = 'cls'
                    elif token == '[SEP]':
                        token_class = 'sep'
                    elif token == '[PAD]':
                        token_class = 'pad'
                    elif token == '[UNK]':
                        token_class = 'unk'
                    elif token in ['[MASK]', '</s>', '<s>']:
                        token_class = 'special'

                    tokens_html += f'<span class="token {token_class}">{token}</span>'

                # Create input IDs HTML
                input_ids = row['input_ids']
                input_ids_html = ''

                for i, input_id in enumerate(input_ids):
                    # Check if this is a special token ID
                    is_special = input_id in [0, 1, 2, 3, 4]  # [UNK], [SEP], [PAD], [CLS], [MASK]
                    id_class = 'special' if is_special else ''
                    input_ids_html += f'<span class="input-id {id_class}" title="Position {i}: ID {input_id}">{input_id}</span>'

            except Exception as e:
                # Fallback if parsing fails
                tokens = str(row['tokens']).split()
                tokens_html = ''.join([f'<span class="token normal">{token}</span>' for token in tokens])
                input_ids_html = ''.join([f'<span class="input-id">{id}</span>' for id in str(row['input_ids']).split()])

            # Full text - NO TRUNCATION
            full_text = str(row['text'])

            # Get attention mask if available
            attention_mask = row.get('attention_mask', [1] * len(input_ids))
            active_tokens = sum(attention_mask)

            # Confidence and filter classes
            filter_classes = [class_name]
            if row['confidence'] > 0.9:
                filter_classes.append('high-conf')

            html_content += f"""
                <div class="prediction-card {class_name}" data-filter="{' '.join(filter_classes)}" data-text="{full_text.lower()}">
                    <div class="sample-number">Sample #{idx + 1}</div>

                    <div class="card-header">
                        <span class="prediction-label {label_class}">{emoji} {row['prediction']}</span>
                        <span class="confidence-badge {confidence_class}">{confidence_text}: {row['confidence']:.1%}</span>
                    </div>

                    <div class="text-content">
                        <strong>üìù Complete Text:</strong><br>
                        {full_text}
                    </div>

                    <div class="probability-bars">
                        <div class="prob-bar">
                            <div class="prob-fill ham-bar" style="width: {row['ham_prob']*100:.1f}%">
                                HAM: {row['ham_prob']:.1%}
                            </div>
                        </div>
                        <div class="prob-bar">
                            <div class="prob-fill spam-bar" style="width: {row['spam_prob']*100:.1f}%">
                                SPAM: {row['spam_prob']:.1%}
                            </div>
                        </div>
                    </div>

                    <div class="tokens-section">
                        <div class="tokens-header">
                            <span>üî§ All Tokens (Including Special Tokens)</span>
                            <span>Total: {len(all_tokens)} tokens | Active: {active_tokens}</span>
                        </div>
                        <div class="tokens-display">
                            {tokens_html}
                        </div>
                        <div style="margin-top: 8px; font-size: 11px; color: #666;">
                            <strong>Legend:</strong>
                            <span class="token cls">CLS</span>
                            <span class="token sep">SEP</span>
                            <span class="token pad">PAD</span>
                            <span class="token unk">UNK</span>
                            <span class="token special">Other Special</span>
                            <span class="token normal">Normal</span>
                        </div>
                    </div>

                    <div class="input-ids-section">
                        <div class="ids-header">
                            <span>üî¢ Input IDs Sequence</span>
                            <span>Length: {len(input_ids)} | Max Length: 128</span>
                        </div>
                        <div class="ids-display">
                            {input_ids_html}
                        </div>
                        <div style="margin-top: 8px; font-size: 11px; color: #666;">
                            <strong>Special Token IDs:</strong>
                            <span class="input-id special">0=[UNK]</span>
                            <span class="input-id special">1=[SEP]</span>
                            <span class="input-id special">2=[PAD]</span>
                            <span class="input-id special">3=[CLS]</span>
                            <span class="input-id special">4=[MASK]</span>
                        </div>
                    </div>
                </div>
            """

        # Add pagination and JavaScript
        html_content += """
                </div>

                <div class="pagination" id="pagination">
                    <!-- Pagination will be added by JavaScript -->
                </div>
            </div>

            <script>
                let currentFilter = 'all';
                let currentPage = 1;
                const itemsPerPage = 20;

                function filterResults(filter) {
                    currentFilter = filter;
                    currentPage = 1;

                    // Update button states
                    document.querySelectorAll('.filter-btn').forEach(btn => btn.classList.remove('active'));
                    event.target.classList.add('active');

                    applyFilters();
                }

                function applyFilters() {
                    const searchTerm = document.getElementById('searchBox').value.toLowerCase();
                    const cards = document.querySelectorAll('.prediction-card');
                    let visibleCards = [];

                    cards.forEach(card => {
                        const matchesFilter = currentFilter === 'all' || card.dataset.filter.includes(currentFilter);
                        const matchesSearch = card.dataset.text.includes(searchTerm);

                        if (matchesFilter && matchesSearch) {
                            visibleCards.push(card);
                        }
                    });

                    // Hide all cards first
                    cards.forEach(card => card.style.display = 'none');

                    // Show paginated results
                    const startIndex = (currentPage - 1) * itemsPerPage;
                    const endIndex = startIndex + itemsPerPage;
                    const pageCards = visibleCards.slice(startIndex, endIndex);

                    pageCards.forEach(card => card.style.display = 'block');

                    // Update pagination
                    updatePagination(visibleCards.length);
                }

                function updatePagination(totalItems) {
                    const totalPages = Math.ceil(totalItems / itemsPerPage);
                    const pagination = document.getElementById('pagination');

                    let paginationHTML = '';

                    if (totalPages > 1) {
                        // Previous button
                        paginationHTML += `<button class="page-btn" onclick="changePage(${currentPage - 1})" ${currentPage === 1 ? 'disabled' : ''}>Previous</button>`;

                        // Page numbers
                        for (let i = 1; i <= totalPages; i++) {
                            if (i === currentPage) {
                                paginationHTML += `<button class="page-btn active">${i}</button>`;
                            } else if (i === 1 || i === totalPages || (i >= currentPage - 2 && i <= currentPage + 2)) {
                                paginationHTML += `<button class="page-btn" onclick="changePage(${i})">${i}</button>`;
                            } else if (i === currentPage - 3 || i === currentPage + 3) {
                                paginationHTML += `<span>...</span>`;
                            }
                        }

                        // Next button
                        paginationHTML += `<button class="page-btn" onclick="changePage(${currentPage + 1})" ${currentPage === totalPages ? 'disabled' : ''}>Next</button>`;
                    }

                    paginationHTML += `<p>Showing ${Math.min((currentPage - 1) * itemsPerPage + 1, totalItems)}-${Math.min(currentPage * itemsPerPage, totalItems)} of ${totalItems} results</p>`;

                    pagination.innerHTML = paginationHTML;
                }

                function changePage(page) {
                    const searchTerm = document.getElementById('searchBox').value.toLowerCase();
                    const cards = document.querySelectorAll('.prediction-card');
                    let visibleCards = 0;

                    cards.forEach(card => {
                        const matchesFilter = currentFilter === 'all' || card.dataset.filter.includes(currentFilter);
                        const matchesSearch = card.dataset.text.includes(searchTerm);
                        if (matchesFilter && matchesSearch) visibleCards++;
                    });

                    const totalPages = Math.ceil(visibleCards / itemsPerPage);

                    if (page >= 1 && page <= totalPages) {
                        currentPage = page;
                        applyFilters();
                    }
                }

                // Search functionality
                document.getElementById('searchBox').addEventListener('input', function() {
                    currentPage = 1;
                    applyFilters();
                });

                // Initialize
                applyFilters();
            </script>
        </body>
        </html>
        """

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(html_content)

        print(f"üìä Complete HTML report saved: {output_file}")

def main():
    # Initialize analyzer
    model_dir = "/Users/rhd/Documents/Raihan/Dev/Model-ML/spam-detection-twitter/models/v2"
    reporter = SpamDetectionHTMLReporter(model_dir)

    # Load data
    print("üì• Loading datasets...")
    df_tiktok = pd.read_csv(
        "/Users/rhd/Documents/Raihan/Dev/Model-ML/spam-detection-twitter/models/NLP_explore/tiktok_posts.csv"
    )[["text"]]
    df_twitter = pd.read_csv(
        "/Users/rhd/Documents/Raihan/Dev/Model-ML/spam-detection-twitter/models/NLP_explore/twitter_posts.csv"
    )[["text"]]
    df_instagram = pd.read_csv(
        "/Users/rhd/Documents/Raihan/Dev/Model-ML/spam-detection-twitter/models/NLP_explore/instagram_comments.csv"
    )[["text"]]

    # Combine and clean
    df_all = pd.concat([df_tiktok, df_twitter, df_instagram], ignore_index=True)
    df_all.dropna(subset=["text"], inplace=True)

    print(f"üìä Total samples: {len(df_all)}")

    # Process all data
    print(f"‚ö° Processing all {len(df_all)} samples...")
    results = []

    for text in tqdm(df_all["text"], desc="Processing"):
        result = reporter.predict_single(text)
        results.append(result)

    # Create DataFrame
    df_result = pd.DataFrame(results)

    # Save CSV
    csv_output = "spam_detection_complete_results.csv"
    df_result.to_csv(csv_output, index=False)
    print(f"‚úÖ CSV saved: {csv_output}")

    # Create HTML report dengan FULL TEXT
    reporter.create_full_html_report(df_result)

    # Summary
    total_samples = len(df_result)
    spam_count = len(df_result[df_result['prediction'] == 'SPAM'])
    ham_count = len(df_result[df_result['prediction'] == 'HAM'])

    print(f"\nüìà SUMMARY:")
    print(f"Total samples: {total_samples:,}")
    print(f"SPAM detected: {spam_count:,} ({spam_count/total_samples:.1%})")
    print(f"HAM (normal): {ham_count:,} ({ham_count/total_samples:.1%})")
    print(f"Average confidence: {df_result['confidence'].mean():.2%}")

    return df_result

# Run the analysis
if __name__ == "__main__":
    results_df = main()

In [4]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from rich.console import Console
from rich.table import Table
from rich.text import Text
from rich import print as rprint
import warnings

warnings.filterwarnings("ignore")

# Initialize rich console
console = Console()

# Load tokenizer dan model
model_dir = "/Users/rhd/Documents/Raihan/Dev/Model-ML/spam-detection-twitter/models/v2"
tokenizer = AutoTokenizer.from_pretrained("indolem/indobertweet-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
model.eval()

console.print("ü§ñ [bold blue]Spam Detection Model Loaded![/bold blue]")


def analyze_single_sentence(text):
    """Analyze single sentence with complete tokenization details"""
    console.print(f"\n{'='*80}")
    console.print(f"üîç [bold blue]ANALYZING SENTENCE[/bold blue]")
    console.print(f"{'='*80}")

    # Display original text
    console.print(f"üìù [bold]Original Text:[/bold]")
    console.print(f'   "{text}"')

    # Tokenization
    encoded = tokenizer(
        text, padding="max_length", truncation=True, max_length=128, return_tensors="pt"
    )

    input_ids = encoded["input_ids"]
    attention_mask = encoded["attention_mask"]

    # Get ALL tokens (including special tokens)
    all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
    clean_tokens = [token for token in all_tokens if token not in ["[PAD]"]]
    meaningful_tokens = [
        token for token in all_tokens if token not in ["[PAD]", "[CLS]", "[SEP]"]
    ]

    # Get input IDs list
    input_ids_list = input_ids[0].tolist()
    attention_mask_list = attention_mask[0].tolist()

    # Count active tokens (where attention_mask = 1)
    active_tokens = sum(attention_mask_list)

    # Prediksi dengan confidence
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits[0]
        probabilities = torch.softmax(logits, dim=0)
        pred = torch.argmax(logits).item()
        confidence = probabilities[pred].item()

    label = "SPAM" if pred == 1 else "HAM"

    # Display prediction results
    emoji = "üö®" if label == "SPAM" else "‚úÖ"
    color = "red" if label == "SPAM" else "green"
    console.print(f"\n{emoji} [bold {color}]PREDICTION: {label}[/bold {color}]")
    console.print(f"   Confidence: {confidence:.4f} ({confidence:.2%})")

    # Probability breakdown table (Dark mode optimized)
    prob_table = Table(
        title="üìä Probability Breakdown",
        show_header=True,
        header_style="bold bright_white",
    )
    prob_table.add_column("Class", style="bright_cyan", width=15)
    prob_table.add_column("Probability", style="bright_green", width=15)
    prob_table.add_column("Percentage", style="bright_yellow", width=15)

    prob_table.add_row(
        "HAM (Normal)",
        f"{probabilities[0].item():.6f}",
        f"{probabilities[0].item():.2%}",
    )
    prob_table.add_row(
        "SPAM", f"{probabilities[1].item():.6f}", f"{probabilities[1].item():.2%}"
    )
    console.print(prob_table)

    # Tokenization details (Dark mode optimized)
    console.print(f"\nüî§ [bold white]TOKENIZATION DETAILS[/bold white]")
    console.print(f"   [bright_cyan]Text length:[/bright_cyan] {len(text)} characters")
    console.print(
        f"   [bright_cyan]Total tokens:[/bright_cyan] {len(all_tokens)} (including special tokens)"
    )
    console.print(
        f"   [bright_cyan]Active tokens:[/bright_cyan] {active_tokens} (non-padding)"
    )
    console.print(
        f"   [bright_cyan]Meaningful tokens:[/bright_cyan] {len(meaningful_tokens)} (excluding special)"
    )

    # Display all tokens with special token highlighting (Dark mode optimized)
    console.print(f"\nüéØ [bold white]ALL TOKENS (with special tokens):[/bold white]")
    token_display = ""
    for i, token in enumerate(all_tokens):
        if token == "[CLS]":
            token_display += f"[bold bright_green][{token}][/bold bright_green] "
        elif token == "[SEP]":
            token_display += f"[bold bright_yellow][{token}][/bold bright_yellow] "
        elif token == "[PAD]":
            token_display += f"[bright_black][{token}][/bright_black] "
        elif token == "[UNK]":
            token_display += f"[bold bright_red][{token}][/bold bright_red] "
        else:
            token_display += f"[bright_cyan]{token}[/bright_cyan] "
    console.print(f"   {token_display}")

    # Display meaningful tokens only (Dark mode optimized)
    console.print(f"\nüìù [bold white]MEANINGFUL TOKENS ONLY:[/bold white]")
    meaningful_display = " | ".join(
        [f"[bright_cyan]{token}[/bright_cyan]" for token in meaningful_tokens]
    )
    console.print(f"   {meaningful_display}")

    # Input IDs breakdown
    console.print(f"\nüî¢ [bold]INPUT IDS BREAKDOWN[/bold]")

    # Create input IDs table
    ids_table = Table(show_header=True, header_style="bold")
    ids_table.add_column("Position", style="dim", width=8)
    ids_table.add_column("Token", style="cyan", width=15)
    ids_table.add_column("Input ID", style="green", width=10)
    ids_table.add_column("Attention", style="yellow", width=10)
    ids_table.add_column("Special?", style="red", width=10)

    # Show first 20 tokens in detail
    display_limit = min(20, len(all_tokens))
    for i in range(display_limit):
        token = all_tokens[i]
        input_id = input_ids_list[i]
        attention = "Active" if attention_mask_list[i] == 1 else "Masked"
        is_special = (
            "Yes" if token in ["[CLS]", "[SEP]", "[PAD]", "[UNK]", "[MASK]"] else "No"
        )

        # Color coding for special tokens
        if token == "[CLS]":
            token_display = Text(token, style="bold green")
        elif token == "[SEP]":
            token_display = Text(token, style="bold orange1")
        elif token == "[PAD]":
            token_display = Text(token, style="dim")
        elif token == "[UNK]":
            token_display = Text(token, style="bold red")
        else:
            token_display = Text(token, style="cyan")

        ids_table.add_row(str(i), token_display, str(input_id), attention, is_special)

    if len(all_tokens) > display_limit:
        ids_table.add_row("...", "...", "...", "...", "...")
        console.print(
            f"   [dim](Showing first {display_limit} of {len(all_tokens)} tokens)[/dim]"
        )

    console.print(ids_table)

    # Complete input IDs sequence (Dark mode optimized)
    console.print(f"\nüîó [bold white]COMPLETE INPUT IDS SEQUENCE:[/bold white]")
    # Display in chunks of 20
    chunk_size = 20
    for i in range(0, len(input_ids_list), chunk_size):
        chunk = input_ids_list[i : i + chunk_size]
        console.print(
            f"   [bright_black][{i:3d}-{min(i+chunk_size-1, len(input_ids_list)-1):3d}]:[/bright_black] [bright_yellow]{chunk}[/bright_yellow]"
        )

    # Special token legend (Dark mode optimized)
    console.print(f"\nüìö [bold white]SPECIAL TOKENS LEGEND:[/bold white]")
    legend_table = Table(show_header=True, header_style="bold bright_white")
    legend_table.add_column("Token", style="bright_cyan", width=10)
    legend_table.add_column("ID", style="bright_green", width=5)
    legend_table.add_column("Purpose", style="bright_yellow", width=30)

    legend_table.add_row("[CLS]", "3", "Classification token (start of sequence)")
    legend_table.add_row("[SEP]", "1", "Separator token (end of sequence)")
    legend_table.add_row("[PAD]", "2", "Padding token (fill to max length)")
    legend_table.add_row("[UNK]", "0", "Unknown token (out-of-vocabulary)")
    legend_table.add_row("[MASK]", "4", "Mask token (for MLM tasks)")

    console.print(legend_table)

    return {
        "text": text,
        "prediction": label,
        "confidence": confidence,
        "spam_prob": probabilities[1].item(),
        "ham_prob": probabilities[0].item(),
        "all_tokens": all_tokens,
        "meaningful_tokens": meaningful_tokens,
        "input_ids": input_ids_list,
        "attention_mask": attention_mask_list,
        "total_tokens": len(all_tokens),
        "active_tokens": active_tokens,
        "meaningful_count": len(meaningful_tokens),
    }


# Interactive mode
def interactive_analysis():
    """Interactive single sentence analysis"""
    console.print("\nüéØ [bold blue]Single Sentence Spam Detection Analysis[/bold blue]")
    console.print("Enter 'quit' to exit")

    while True:
        console.print("\n" + "=" * 60)
        text = input("üìù Enter text to analyze: ").strip()

        if text.lower() in ["quit", "exit", "q"]:
            console.print("üëã [bold blue]Goodbye![/bold blue]")
            break

        if not text:
            console.print("[red]Please enter some text![/red]")
            continue

        try:
            result = analyze_single_sentence(text)

            # Ask if user wants to save results
            save_choice = input("\nüíæ Save results to CSV? (y/n): ").strip().lower()
            if save_choice == "y":
                df = pd.DataFrame([result])
                filename = f"single_analysis_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
                df.to_csv(filename, index=False)
                console.print(f"‚úÖ Results saved to: [cyan]{filename}[/cyan]")

        except Exception as e:
            console.print(f"[red]Error analyzing text: {str(e)}[/red]")

        # Ask if user wants to continue
        continue_choice = (
            input("\nüîÑ Analyze another sentence? (y/n): ").strip().lower()
        )
        if continue_choice != "y":
            console.print("üëã [bold blue]Analysis complete![/bold blue]")
            break


# Example usage
if __name__ == "__main__":
    # You can either run interactive mode or analyze a specific sentence

    example_text = "üòÄüòÄ"
    result = analyze_single_sentence(example_text)