In [None]:
# Step 1: Install necessary libraries
!pip install flask pyngrok pandas nltk scikit-learn joblib

# Step 2: Imports
import pandas as pd
import string
import nltk
import joblib
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from flask import Flask, request, render_template_string
from pyngrok import ngrok
import os

# Step 3: Download NLTK stopwords (if not already present)
try:
    stopwords.words('english')
except LookupError:
    nltk.download('stopwords')

# Step 4: File paths (ensure your .pkl files are named this way)
model_path = 'news_model.pkl'
vectorizer_path = 'vectorizer.pkl'

# --- Check if files exist, if not, prompt for upload ---
if not os.path.exists(model_path) or not os.path.exists(vectorizer_path):
    print(f"'{model_path}' and/or '{vectorizer_path}' not found in the current Colab session directory.")
    print("Please upload your 'news_model.pkl' and 'vectorizer.pkl' files.")
    from google.colab import files
    uploaded = files.upload()
    # Verify they are uploaded
    for filename in uploaded.keys():
        print(f'Uploaded file "{filename}" with length {len(uploaded[filename])} bytes')
        # Optional: Move to expected names if uploaded with different names, though it's better to upload with correct names
        # For simplicity, we assume they are uploaded with the correct names model_path and vectorizer_path
else:
    print(f"'{model_path}' and '{vectorizer_path}' found. Skipping upload prompt.")


# Step 5: Load the model and vectorizer (GLOBAL - loaded once when app starts)
# Ensure this happens AFTER the potential upload step
try:
    model = joblib.load(model_path)
    vectorizer = joblib.load(vectorizer_path)
    print("✅ Model and vectorizer loaded successfully.")
except FileNotFoundError:
    print(f"❌ ERROR: Could not load '{model_path}' or '{vectorizer_path}'.")
    print("Please ensure the files are uploaded with the correct names and re-run the cell.")
    print("Stopping execution as the app cannot function without the model and vectorizer.")
    # Exit or raise an error to prevent Flask from starting with missing components
    # For Colab, we might just let it print the error and Flask will fail to serve requests needing them.
    # However, it's better to make it explicit.
    raise SystemExit("Model/Vectorizer not found. Please upload and restart.")
except Exception as e:
    print(f"❌ Error loading model/vectorizer: {e}")
    raise SystemExit(f"Error loading model/vectorizer: {e}")


# Step 6: Text cleaning function (GLOBAL)
def clean_text(text):
    text = str(text).lower()
    text = ''.join([c for c in text if c not in string.punctuation])
    tokens = text.split()
    stop_words_english = stopwords.words('english') # Ensure stopwords are loaded
    tokens = [w for w in tokens if w not in stop_words_english]
    return ' '.join(tokens)

# Step 7: Flask App Setup
app = Flask(__name__)

# Define the HTML template with improved styling
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>News Authenticity Checker</title>
    <style>
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f0f2f5;
            color: #1c1e21;
            display: flex;
            flex-direction: column;
            align-items: center;
            min-height: 100vh;
            box-sizing: border-box;
        }
        .container {
            background-color: #ffffff;
            padding: 30px 40px;
            border-radius: 12px;
            box-shadow: 0 8px 24px rgba(0, 0, 0, 0.1);
            width: 100%;
            max-width: 700px; /* Increased max-width slightly */
            text-align: center;
            margin-top: 20px;
        }
        header h1 {
            color: #1877f2; /* A vibrant blue */
            margin-bottom: 10px;
            font-size: 32px;
            font-weight: 600;
        }
        header p {
            color: #606770;
            font-size: 16px;
            margin-bottom: 30px;
        }
        textarea {
            width: calc(100% - 30px); /* Full width minus padding */
            padding: 15px;
            margin-bottom: 20px;
            border: 1px solid #dddfe2;
            border-radius: 8px;
            font-size: 16px;
            min-height: 150px;
            resize: vertical;
            box-sizing: border-box;
            line-height: 1.5;
        }
        textarea:focus {
            border-color: #1877f2;
            outline: none;
            box-shadow: 0 0 0 2px rgba(24, 119, 242, 0.2);
        }
        input[type="submit"] {
            background-color: #1877f2;
            color: white;
            padding: 14px 28px;
            border: none;
            border-radius: 8px;
            cursor: pointer;
            font-size: 18px;
            font-weight: bold;
            transition: background-color 0.2s ease-in-out, transform 0.1s ease;
        }
        input[type="submit"]:hover {
            background-color: #166fe5;
        }
        input[type="submit"]:active {
            transform: scale(0.98);
        }
        .result-section {
             margin-top: 30px;
        }
        .result {
            padding: 20px;
            border-radius: 8px;
            font-size: 20px;
            font-weight: 500;
            display: flex;
            align-items: center;
            justify-content: center;
        }
        .result .icon {
            font-size: 28px;
            margin-right: 12px;
        }
        .real {
            background-color: #e7f3ff; /* Light blue for real news (can be green too) */
            color: #1877f2; /* Corresponds to the theme blue */
            border: 1px solid #bde0fe;
        }
        .fake {
            background-color: #ffebee; /* Light red */
            color: #c62828; /* Darker red for text */
            border: 1px solid #ffcdd2;
        }
        .submitted-text-container {
            margin-top: 25px;
            padding: 20px;
            background-color: #f7f7f7;
            border: 1px solid #e9ecef;
            border-radius: 8px;
            text-align: left;
        }
        .submitted-text-container strong {
            display: block;
            margin-bottom: 10px;
            color: #333;
            font-size: 16px;
        }
        .submitted-text {
            font-size: 15px;
            color: #555;
            line-height: 1.6;
            word-wrap: break-word;
            max-height: 200px; /* Allow more text to be visible */
            overflow-y: auto;
            background-color: #fff;
            padding: 10px;
            border-radius: 4px;
            border: 1px solid #eee;
        }
        .footer {
            margin-top: 40px;
            font-size: 13px;
            color: #8a8d91;
        }
        .footer a {
            color: #1877f2;
            text-decoration: none;
        }
        .footer a:hover {
            text-decoration: underline;
        }
    </style>
</head>
<body>
    <div class="container">
        <header>
            <h1>📰 News Authenticity Checker</h1>
            <p>Enter a news headline or article text below to check if it's likely real or fake.</p>
        </header>

        <form method="POST">
            <textarea name="news_text" placeholder="Paste news text here...">{{ S_TEXT if S_TEXT else '' }}</textarea><br>
            <input type="submit" value="Analyze News">
        </form>

        {% if PRED_RESULT %}
            <div class="result-section">
                <div class="result {{ 'real' if PRED_RESULT == 'REAL' else 'fake' }}">
                    {% if PRED_RESULT == 'REAL' %}
                        <span class="icon">🟢</span> This news appears to be <strong>REAL</strong>.
                    {% elif PRED_RESULT == 'FAKE' %}
                        <span class="icon">🔴</span> This news appears to be <strong>FAKE</strong>.
                    {% else %}
                        <span class="icon">⚠️</span> Error processing the request.
                    {% endif %}
                </div>

                {% if S_TEXT %}
                <div class="submitted-text-container">
                    <strong>You analyzed:</strong>
                    <div class="submitted-text">{{ S_TEXT }}</div>
                </div>
                {% endif %}
            </div>
        {% endif %}
    </div>
    <footer class="footer">
        <p>Disclaimer: This tool provides an automated prediction based on a machine learning model. Always cross-verify information with multiple reliable sources.</p>
        <p>Powered by Python, Flask & NLTK in Google Colab.</p>
    </footer>
</body>
</html>
"""

@app.route('/', methods=['GET', 'POST'])
def home():
    prediction_result_text = None
    submitted_text = None # To hold the text entered by the user

    if request.method == 'POST':
        news_text = request.form.get('news_text', '')
        submitted_text = news_text # Store for displaying back on the page

        if news_text.strip(): # Proceed only if text is not empty
            try:
                # These should be loaded globally, ensure they are accessible
                if 'model' not in globals() or 'vectorizer' not in globals():
                    # This case should ideally be caught at startup
                    return "Error: Model or vectorizer not loaded.", 500

                cleaned = clean_text(news_text)
                vec = vectorizer.transform([cleaned])
                prediction = model.predict(vec)[0]

                if prediction == 1:
                    prediction_result_text = "REAL"
                else:
                    prediction_result_text = "FAKE"
            except Exception as e:
                print(f"Error during prediction: {e}")
                prediction_result_text = "ERROR" # Indicate error in prediction
        else:
            # If no text submitted, just re-render form, PRED_RESULT will be None
            pass

    # Pass submitted_text (S_TEXT) and prediction_result_text (PRED_RESULT) to the template
    return render_template_string(HTML_TEMPLATE, PRED_RESULT=prediction_result_text, S_TEXT=submitted_text)

# Step 8: Run with ngrok
# Get an authtoken from your ngrok dashboard (https://dashboard.ngrok.com/get-started/your-authtoken)
# and add it here for more stable tunnels:
NGROK_AUTH_TOKEN = "2xUa7SwlBgFYsfUeVaeKmAwnTjj_5A7w5fHqFEm1eC4E9xyoK"  # <--- YOUR TOKEN IS HERE
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Terminate any existing ngrok tunnels (good practice if re-running the cell)
ngrok.kill()

# Set up the ngrok tunnel to the Flask app (default port 5000)
try:
    # If you have a paid ngrok plan, you can use a custom subdomain.
    # For free tier, it will be a random URL.
    public_url = ngrok.connect(5000)
    print("🎉 Your News Classifier Website is LIVE! 🎉")
    print(f"👉 Access it here: {public_url}")
    print("ℹ️ Keep this Colab cell running to keep the website active.")
    print("ℹ️ Close this cell or disconnect from Colab to stop the website.")
    app.run(port=5000) # Run Flask app
except Exception as e:
    print(f"❌ Could not start ngrok or Flask app: {e}")
    print("   Possible reasons:")
    print("   - ngrok authtoken issue (if you set one that's invalid or missing).")
    # ... (rest of the error messages)

'news_model.pkl' and 'vectorizer.pkl' found. Skipping upload prompt.
✅ Model and vectorizer loaded successfully.
🎉 Your News Classifier Website is LIVE! 🎉
👉 Access it here: NgrokTunnel: "https://8578-34-106-62-96.ngrok-free.app" -> "http://localhost:5000"
ℹ️ Keep this Colab cell running to keep the website active.
ℹ️ Close this cell or disconnect from Colab to stop the website.
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:16:41] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:16:42] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:17:16] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:18:16] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:18:52] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:21:10] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:21:34] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:21:37] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/May/2025 10:22:24] "POST / HTTP/1.1" 200 -
