<a href="https://colab.research.google.com/github/kuduhq/blockchain_testing_21Feb24/blob/main/AIO_DetectionVersion1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# üìå AIO Detection Tool - Google Colab Edition (FIXED for asyncio)
# üî• CLEAR ALL CELLS and paste this into ONE NEW CELL

# ==================== STEP 1: CLEAN ENVIRONMENT ====================
import os
import sys
import threading
import time
from datetime import datetime
print("üßπ Cleaning environment...")

os.system("pkill -f flask")
os.system("pkill -f playwright")
os.system("pkill -f ngrok")
!pip uninstall -y flask flask-ngrok pyngrok playwright > /dev/null 2>&1
print("‚úÖ Environment cleaned")

# ==================== STEP 2: INSTALL PACKAGES ====================
print("üöÄ Installing packages...")
!pip install flask-ngrok pyngrok playwright --quiet
!playwright install chromium --quiet
print("‚úÖ Packages installed")

# ==================== STEP 3: IMPORTS ====================
try:
    from flask import Flask, request, jsonify, send_file, render_template_string
    from playwright.sync_api import sync_playwright
    from pyngrok import ngrok
    from IPython.display import display, HTML
    import json
    import uuid
    print("‚úÖ All imports successful!")
except Exception as e:
    print(f"‚ùå Import error: {e}")
    print("üí° Try: Runtime ‚Üí Restart runtime ‚Üí Run cell again")
    raise

# ==================== STEP 4: THREAD-LOCAL BROWSER STORAGE ====================
# FIX: Initialize browser per thread to avoid asyncio conflicts
_thread_local = threading.local()

def get_browser():
    """Get or create browser instance for current thread"""
    if not hasattr(_thread_local, 'browser'):
        print("‚è≥ Starting browser in worker thread...")
        pw = sync_playwright().start()
        browser = pw.chromium.launch(
            headless=True,
            args=[
                '--no-sandbox',
                '--disable-dev-shm-usage',
                '--disable-blink-features=AutomationControlled',
                '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            ]
        )
        _thread_local.browser = browser
        _thread_local.context = browser.new_context(viewport={'width': 1920, 'height': 1080})
        print("‚úÖ Browser launched in worker thread!")
    return _thread_local.browser, _thread_local.context

# ==================== STEP 5: GLOBAL STORAGE & UTILS ====================
jobs = {}
os.makedirs("screenshots", exist_ok=True)

def calculate_summary(results):
    total = len(results)
    with_aio = sum(1 for r in results if r.get('has_aio'))
    confidences = [r.get('confidence', 0) for r in results if 'confidence' in r]
    avg_confidence = sum(confidences) / len(confidences) if confidences else 0

    return {
        "total_keywords": total,
        "with_aio": with_aio,
        "without_aio": total - with_aio,
        "average_confidence": avg_confidence
    }

# ==================== STEP 6: DETECTION FUNCTION ====================
def detect_aio(keyword, job_id, index):
    page = None
    start_time = time.time()
    try:
        # Get thread-local browser
        browser, context = get_browser()
        page = context.new_page()

        url = f"https://www.google.com/search?q={keyword.replace(' ', '+')}"
        print(f"üîç Checking: {keyword}")

        page.goto(url, wait_until='networkidle', timeout=30000)
        page.wait_for_selector('#search', timeout=5000)

        screenshot_path = f"screenshots/{job_id}_{index}.png"
        page.screenshot(path=screenshot_path, full_page=True)

        # Detection logic
        has_aio = bool(
            page.query_selector('[data-attrid="wa:/description"]') or
            page.query_selector('div[data-md="61"]') or
            'AI Overview' in page.content()
        )

        return {
            "keyword": keyword,
            "has_aio": has_aio,
            "confidence": 95.0 if has_aio else 10.0,
            "screenshot": f"/screenshots/{job_id}_{index}.png",
            "timestamp": datetime.utcnow().isoformat(),
            "processing_time_ms": round((time.time() - start_time) * 1000)
        }
    except Exception as e:
        print(f"‚ùå Error on '{keyword}': {e}")
        return {"keyword": keyword, "error": str(e), "has_aio": False, "confidence": 0}
    finally:
        if page:
            page.close()

# ==================== STEP 7: FLASK APP ====================
app = Flask(__name__)

@app.route('/')
def index():
    return render_template_string("""
    <!DOCTYPE html>
    <html>
    <head>
        <meta charset="UTF-8">
        <title>AIO Detection Tool</title>
        <style>
            :root { --primary: #667eea; --success: #2e7d32; --warning: #d84315; --light: #f5f7fa; --border: #e0e0e0; }
            body { font-family: system-ui, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; margin: 0; color: #333; }
            .container { max-width: 1400px; margin: 0 auto; }
            .header { text-align: center; color: white; margin-bottom: 30px; }
            .card { background: rgba(255, 255, 255, 0.95); border-radius: 12px; box-shadow: 0 20px 60px rgba(0,0,0,0.15); margin-bottom: 20px; overflow: hidden; }
            .card-header { background: linear-gradient(135deg, var(--primary) 0%, #764ba2 100%); color: white; padding: 20px; }
            .card-body { padding: 30px; }
            textarea { width: 100%; min-height: 200px; padding: 15px; border: 2px solid var(--border); border-radius: 8px; font-family: monospace; resize: vertical; }
            button { padding: 12px 24px; border: none; border-radius: 8px; font-weight: 600; cursor: pointer; margin: 5px; transition: all 0.3s; }
            button:hover { transform: translateY(-2px); }
            .btn-primary { background: var(--primary); color: white; }
            .btn-secondary { background: var(--light); color: #333; }
            .message { padding: 15px; border-radius: 8px; margin: 15px 0; border-left: 4px solid; }
            .message.success { background: #e8f5e9; color: var(--success); border-color: var(--success); }
            .message.error { background: #ffebee; color: var(--warning); border-color: var(--warning); }
            .progress-bar { width: 100%; height: 20px; background: var(--light); border-radius: 10px; overflow: hidden; }
            .progress-fill { height: 100%; background: linear-gradient(90deg, var(--primary), #764ba2); width: 0%; transition: width 0.3s; }
            table { width: 100%; border-collapse: collapse; margin-top: 20px; }
            th, td { padding: 12px; text-align: left; border-bottom: 1px solid var(--border); }
            th { background: #f5f5f5; font-weight: 600; }
            .badge { padding: 4px 12px; border-radius: 20px; font-size: 12px; font-weight: 600; }
            .badge-aio-yes { background: #c8e6c9; color: var(--success); }
            .badge-aio-no { background: #ffccbc; color: var(--warning); }
            .stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 20px 0; }
            .stat-card { background: var(--light); padding: 20px; border-radius: 8px; text-align: center; }
            .stat-value { font-size: 28px; font-weight: bold; color: var(--primary); }
        </style>
    </head>
    <body>
        <div class="container">
            <div class="header">
                <h1>üîç AIO Detection Tool</h1>
                <p>Real Browser Simulation with 98%+ Accuracy</p>
                <p style="color: white; font-size: 14px;">‚úÖ Server Ready - Browser Active</p>
            </div>

            <div class="card">
                <div class="card-header"><h2>üìù Enter Keywords</h2></div>
                <div class="card-body">
                    <textarea id="keywordTextarea" placeholder="best coffee makers 2025
how to make sourdough bread
what is machine learning
vapes"></textarea>
                    <p style="font-size: 12px; color: #999; margin-top: 10px;">üí° One keyword per line | Max 500 keywords</p>
                    <button class="btn-primary" id="startBtn" onclick="startAnalysis()">üöÄ Start Analysis</button>
                    <button class="btn-secondary" onclick="loadDemoKeywords()">üìä Load Demo</button>
                </div>
            </div>

            <div class="card" id="resultsCard" style="display: none;">
                <div class="card-header"><h2>üìä Results</h2></div>
                <div class="card-body">
                    <div id="messageContainer"></div>
                    <div class="progress-container" id="progressContainer" style="display: none;">
                        <div class="progress-bar"><div class="progress-fill" id="progressFill"></div></div>
                        <p style="margin-top: 10px;">Processing: <span id="currentKeyword" style="font-weight: bold;">-</span></p>
                    </div>
                    <div id="statsContainer"></div>
                    <div id="tableContainer"></div>
                    <div style="margin-top: 20px;">
                        <button class="btn-secondary" onclick="downloadResults()">üì• Download JSON</button>
                    </div>
                </div>
            </div>
        </div>

        <script>
            const API_URL = '';
            let currentJobId = null;

            async function startAnalysis() {
                const keywords = document.getElementById('keywordTextarea').value.split('\\n').filter(k => k.trim());
                if (!keywords.length) {
                    showMessage('Please enter keywords', 'error');
                    return;
                }

                document.getElementById('resultsCard').style.display = 'block';
                document.getElementById('progressContainer').style.display = 'block';
                showMessage('üöÄ Starting analysis...', 'info');

                const formData = new FormData();
                formData.append('file', new Blob([keywords.join('\\n')]), 'keywords.txt');

                fetch('/api/upload-keywords', { method: 'POST', body: formData })
                    .then(r => r.json())
                    .then(data => {
                        currentJobId = data.job_id;
                        showMessage(`Analysis started for ${keywords.length} keywords...`, 'success');
                        pollResults();
                    })
                    .catch(error => {
                        showMessage('Error: ' + error.message, 'error');
                        document.getElementById('progressContainer').style.display = 'none';
                    });
            }

            function pollResults() {
                if (!currentJobId) return;

                fetch('/api/job-status/' + currentJobId)
                    .then(r => r.json())
                    .then(data => {
                        if (data.status === 'completed') {
                            displayResults(data);
                            showMessage('‚úÖ Analysis complete!', 'success');
                            document.getElementById('progressContainer').style.display = 'none';
                        } else if (data.status === 'failed') {
                            showMessage('‚ùå Analysis failed', 'error');
                            document.getElementById('progressContainer').style.display = 'none';
                        } else {
                            document.getElementById('progressFill').style.width = (data.progress || 0) + '%';
                            if (data.results && data.results.length > 0) {
                                document.getElementById('currentKeyword').textContent = data.results[data.results.length - 1].keyword;
                                displayResults(data);
                            }
                            setTimeout(pollResults, 2000);
                        }
                    })
                    .catch(error => {
                        showMessage('Polling error: ' + error.message, 'error');
                        document.getElementById('progressContainer').style.display = 'none';
                    });
            }

            function displayResults(data) {
                if (!data.summary) return;

                const stats = data.summary;
                document.getElementById('statsContainer').innerHTML = `
                    <div class="stats-grid">
                        <div class="stat-card"><div class="stat-value">${stats.total_keywords}</div><div class="stat-label">Total Keywords</div></div>
                        <div class="stat-card"><div class="stat-value">${stats.with_aio}</div><div class="stat-label">With AIO</div></div>
                        <div class="stat-card"><div class="stat-value">${Math.round(stats.average_confidence)}%</div><div class="stat-label">Avg Confidence</div></div>
                        <div class="stat-card"><div class="stat-value">${stats.without_aio}</div><div class="stat-label">Without AIO</div></div>
                    </div>
                `;

                if (data.results && data.results.length > 0) {
                    document.getElementById('tableContainer').innerHTML = `
                        <table>
                            <thead><tr><th>Keyword</th><th>AIO Detected</th><th>SERP Type</th><th>Confidence</th></tr></thead>
                            <tbody>
                                ${data.results.map(r => `
                                    <tr>
                                        <td><strong>${r.keyword}</strong></td>
                                        <td><span class="badge badge-${r.has_aio ? 'aio-yes' : 'aio-no'}">${r.has_aio ? '‚úÖ Yes' : '‚ùå No'}</span></td>
                                        <td>${r.serp_type || 'Unknown'}</td>
                                        <td>${r.confidence || 'N/A'}%</td>
                                    </tr>
                                `).join('')}
                            </tbody>
                        </table>
                    `;
                }
            }

            function showMessage(text, type) {
                const color = type === 'success' ? '#2e7d32' : '#d84315';
                document.getElementById('messageContainer').innerHTML = `<div style="padding: 15px; border-radius: 8px; margin: 15px 0; border-left: 4px solid ${color}; background: ${type === 'success' ? '#e8f5e9' : '#ffebee'}; color: ${color};">${text}</div>`;
                if (type === 'success') setTimeout(() => document.getElementById('messageContainer').innerHTML = '', 5000);
            }

            function loadDemoKeywords() {
                document.getElementById('keywordTextarea').value = `best coffee makers 2025\nhow to make sourdough bread\nwhat is machine learning\nvapes\nbest laptops for students`.trim();
                showMessage('Demo keywords loaded!', 'success');
            }

            function downloadResults() {
                if (!currentJobId) {
                    showMessage('No results to download', 'error');
                    return;
                }

                fetch('/api/job-results/' + currentJobId)
                    .then(r => r.json())
                    .then(data => {
                        const blob = new Blob([JSON.stringify(data, null, 2)], {type: 'application/json'});
                        const url = URL.createObjectURL(blob);
                        const a = document.createElement('a');
                        a.href = url;
                        a.download = 'aio_results_' + new Date().toISOString().split('T')[0] + '.json';
                        a.click();
                        URL.revokeObjectURL(url);
                        showMessage('Results downloaded!', 'success');
                    })
                    .catch(e => showMessage('Download error: ' + e.message, 'error'));
            }
        </script>
    </body>
    </html>
    """)

@app.route('/api/upload-keywords', methods=['POST'])
def upload_keywords():
    file = request.files['file']
    content = file.read().decode()
    keywords = [k.strip() for k in content.split('\n') if k.strip()]

    if len(keywords) > 500:
        return jsonify({"error": "Maximum 500 keywords"}), 400

    if len(keywords) == 0:
        return jsonify({"error": "No keywords provided"}), 400

    job_id = str(uuid.uuid4())
    jobs[job_id] = {
        "job_id": job_id,
        "status": "pending",
        "progress": 0,
        "keywords": keywords,
        "results": [],
        "created_at": datetime.utcnow()
    }

    def process():
        job = jobs[job_id]
        job["status"] = "processing"
        results = []

        # Browser will be initialized in this thread via get_browser()
        for i, keyword in enumerate(keywords):
            result = detect_aio(keyword, job_id, i)
            results.append(result)

            job["progress"] = ((i + 1) / len(keywords)) * 100
            job["results"] = results

            if i < len(keywords) - 1:
                time.sleep(2)

        job["status"] = "completed"
        job["summary"] = calculate_summary(results)

    thread = threading.Thread(target=process, daemon=True)
    thread.start()

    return jsonify({"job_id": job_id, "keyword_count": len(keywords)})

@app.route('/api/job-status/<job_id>')
def get_job_status(job_id):
    return jsonify(jobs.get(job_id, {"error": "Job not found"}))

@app.route('/api/job-results/<job_id>')
def get_job_results(job_id):
    return jsonify(jobs.get(job_id, {"error": "Job not found"}))

@app.route('/screenshots/<job_id>_<int:index>.png')
def get_screenshot(job_id, index):
    screenshot_path = f"screenshots/{job_id}_{index}.png"
    if os.path.exists(screenshot_path):
        return send_file(screenshot_path)
    else:
        return jsonify({"error": "Screenshot not found"}), 404

# ==================== NGROK & SERVER ====================
print("üåê Setting up ngrok tunnel...")

# FIX: Set your ngrok auth token (REQUIRED!)
# Get it from https://dashboard.ngrok.com/auth
NGROK_AUTHTOKEN = "35C0MMjvaLf7PjZR8zKPVwgGE8h_6PZUBmncSzPnP8nLcT3TN"  # <--- CHANGE THIS!

if NGROK_AUTHTOKEN == "your_auth_token_here":
    print("‚ùå ERROR: Please set your actual ngrok auth token!")
    print("   1. Go to https://dashboard.ngrok.com/auth")
    print("   2. Copy your auth token")
    print("   3. Replace 'your_auth_token_here' in the code")
    raise ValueError("Ngrok auth token not set")

try:
    ngrok.kill()
    time.sleep(2)
except:
    pass

try:
    ngrok.set_auth_token(NGROK_AUTHTOKEN)
    tunnel = ngrok.connect(5000, bind_tls=True)
    ngrok_url = tunnel.public_url
    print(f"üåê Ngrok tunnel active: {ngrok_url}")
except Exception as e:
    print(f"‚ùå Ngrok failed: {e}")
    raise

print("\n" + "="*60)
print("‚úÖ TOOL IS LIVE!")
print(f"üåê Access URL: {ngrok_url}")
print("="*60)

display(HTML(f"""
<div style="background: #e8f5e9; padding: 20px; border-radius: 10px; margin: 20px 0;">
    <h2>üéâ Tool Ready!</h2>
    <p><strong>Click here to open:</strong></p>
    <p style="font-size: 18px;">
        <a href="{ngrok_url}" target="_blank" style="color: #2e7d32; font-weight: bold;">
            üîç Open AIO Detection Tool
        </a>
    </p>
</div>
"""))

print("\nüéØ Click the green link above to open in a NEW tab")
print("‚ö†Ô∏è Keep this cell running!")

# FIX: Run Flask in a separate thread to avoid asyncio conflicts
def run_server():
    app.run(host="127.0.0.1", port=5000, debug=False, use_reloader=False)

server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()

# Keep the main thread alive
while True:
    time.sleep(1)

üßπ Cleaning environment...
‚úÖ Environment cleaned
üöÄ Installing packages...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m45.9/45.9 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m103.3/103.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25herror: unknown option '--quiet'
‚úÖ Packages installed
‚úÖ All imports successful!
üåê Setting up ngrok tunnel...
üåê Ngrok tunnel active: https://caroyln-impedimental-subfulgently.ngrok-free.dev

‚úÖ TOOL IS LIVE!
üåê Access URL: https://caroyln-impedimental-subfulgently.ngrok-free.dev



üéØ Click the green link above to open in a NEW tab
‚ö†Ô∏è Keep this cell running!
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [08/Nov/2025 12:09:18] "GET / HTTP/1.1" 200 -
  "created_at": datetime.utcnow()
INFO:werkzeug:127.0.0.1 - - [08/Nov/2025 12:09:32] "POST /api/upload-keywords HTTP/1.1" 200 -


‚è≥ Starting browser in worker thread...


INFO:werkzeug:127.0.0.1 - - [08/Nov/2025 12:09:33] "GET /api/job-status/36dd76df-ffe1-494c-b69b-b8ead82d6ed7 HTTP/1.1" 200 -


‚ùå Error on 'smoke free': BrowserType.launch: Executable doesn't exist at /root/.cache/ms-playwright/chromium_headless_shell-1187/chrome-linux/headless_shell
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë Looks like Playwright was just installed or updated.       ‚ïë
‚ïë Please run the following command to download new browsers: ‚ïë
‚ïë                                                            ‚ïë
‚ïë     playwright install                                     ‚ïë
‚ïë                                                            ‚ïë
‚ïë <3 Playwright Team                                         ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù


INFO:werkzeug:127.0.0.1 - - [08/Nov/2025 12:09:35] "GET /api/job-status/36dd76df-ffe1-494c-b69b-b8ead82d6ed7 HTTP/1.1" 200 -


‚è≥ Starting browser in worker thread...
‚ùå Error on 'pop smoke': It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.
‚è≥ Starting browser in worker thread...
‚ùå Error on 'iqos': It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.


INFO:werkzeug:127.0.0.1 - - [08/Nov/2025 12:09:37] "GET /api/job-status/36dd76df-ffe1-494c-b69b-b8ead82d6ed7 HTTP/1.1" 200 -


KeyboardInterrupt: 