In [3]:
!pip install flask transformers torch


Defaulting to user installation because normal site-packages is not writeable


In [4]:
from nltk.tokenize import sent_tokenize


In [5]:
!pip install flask python-docx PyMuPDF


Defaulting to user installation because normal site-packages is not writeable


In [6]:
pip install sumy


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [13]:
from flask import Flask, request, render_template_string
from werkzeug.utils import secure_filename
import os, re
import numpy as np 
from collections import Counter 
import fitz  
import docx 
from threading import Thread
import logging 
from transformers import pipeline
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
ALLOWED_EXTENSIONS = {'.txt', '.pdf', '.docx'}
try:
    summarizer_pipeline = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1) 
    logging.info("✅ Hugging Face summarization pipeline initialized.")
except Exception as e:
    logging.error(f"❌ Failed to initialize Hugging Face pipeline: {e}")
    class FallbackExtractiveSummarizer:
        def sent_tokenize(self, text):
            return re.split(r'(?<=[.!?])\s+', text.strip())
        def summarize(self, text, min_length_chars=30, max_length_chars=150):
            sentences = self.sent_tokenize(text)
            if len(sentences) == 0:
                return "No sentences found to summarize."
            words = re.findall(r'\w+', text.lower())
            if not words:
                return "No meaningful words found for summarization."
            freq = Counter(words)
            scores = []
            for sentence in sentences:
                word_list = re.findall(r'\w+', sentence.lower())
                if not word_list:
                    scores.append(0)
                    continue
                score = sum(freq[word] for word in word_list)
                avg_score = score / len(word_list)
                scores.append(avg_score)
            sorted_sentences = [s for _, s in sorted(zip(scores, sentences), reverse=True)]
            
            summary_text = ""
            for s in sorted_sentences:
                if len(summary_text) + len(s) + 1 <= max_length_chars:
                    summary_text += s + " "
                else:
                    break
            if len(summary_text.strip()) < min_length_chars and len(sorted_sentences) > 0:
                # If summary is too short, try to add more
                for s in sorted_sentences:
                    if s not in summary_text and len(summary_text) + len(s) + 1 <= max_length_chars * 1.5: # Allow slightly over max for minimum
                        summary_text += s + " "
                        if len(summary_text.strip()) >= min_length_chars:
                            break
            return summary_text.strip() if summary_text.strip() else "Could not generate a summary."
    summarizer_pipeline = FallbackExtractiveSummarizer()
    logging.warning(" Falling back to simple extractive summarizer due to ML pipeline initialization failure.")

def read_file(filepath, ext):
    """
    Reads content from a file based on its extension.
    Supports .txt, .pdf (using PyMuPDF), and .docx (using python-docx).
    """
    try:
        if ext == '.txt':
            logging.info(f"Attempting to read TXT file: {filepath}")
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read()
            logging.info(f"Successfully read TXT file. Length: {len(content)}")
            return content
        elif ext == '.pdf':
            logging.info(f"Attempting to read PDF file: {filepath}")
            text = ''
            try:
                with fitz.open(filepath) as doc:
                    logging.info(f"PDF opened successfully. Number of pages: {doc.page_count}")
                    for i, page in enumerate(doc):
                        page_text = page.get_text()
                        text += page_text
                        logging.info(f"Read page {i+1}. Length of text from this page: {len(page_text)} chars.")
                logging.info(f"Finished reading all PDF pages. Total text length: {len(text)} chars.")
                return text
            except Exception as pdf_e:
                logging.error(f" Error specific to PDF processing with fitz: {pdf_e}", exc_info=True)
                return ''
        elif ext == '.docx':
            logging.info(f"Attempting to read DOCX file: {filepath}")
            doc = docx.Document(filepath)
            content = '\n'.join([para.text for para in doc.paragraphs])
            logging.info(f"Successfully read DOCX file. Length: {len(content)}")
            return content
    except Exception as e:
        logging.error(f" General error reading file {filepath} with extension {ext}: {e}", exc_info=True)
        return ''

HTML_PAGE = '''
<!DOCTYPE html>
<html>
<head>
    <title>Quantum Summarizer</title>
    <link href="https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
    <style>
        :root {
            --bg-color: #0d1117; /* Dark charcoal */
            --primary-text-color: #e6edf3; /* Light gray */
            --accent-color-1: #00bcd4; /* Cyan */
            --accent-color-2: #8e24aa; /* Deep Purple */
            --border-color: #21262d; /* Darker gray for borders */
            --glow-color: rgba(0, 188, 212, 0.5); /* Cyan glow */
            --error-bg: #2d0c0c; /* Dark red background */
            --error-text: #ff6b6b; /* Light red text */
            --box-shadow-dark: 0 5px 15px rgba(0, 0, 0, 0.5);
            --box-shadow-light: 0 2px 10px rgba(0, 0, 0, 0.2);
        }

        body {
            font-family: 'Inter', sans-serif;
            background-color: var(--bg-color);
            color: var(--primary-text-color);
            padding: 20px;
            line-height: 1.6;
            display: flex;
            flex-direction: column;
            align-items: center;
            min-height: 100vh;
            box-sizing: border-box;
        }

        h2 {
            font-family: 'Orbitron', sans-serif;
            color: var(--accent-color-1);
            text-align: center;
            margin-bottom: 40px;
            font-weight: 700;
            font-size: 2.5em;
            text-shadow: 0 0 10px var(--glow-color);
        }

        form {
            background-color: #161b22; /* Slightly lighter dark */
            padding: 40px;
            border-radius: 12px;
            box-shadow: var(--box-shadow-dark);
            max-width: 600px;
            width: 100%;
            margin-bottom: 30px;
            border: 1px solid var(--border-color);
            position: relative;
            overflow: hidden;
        }
        form::before {
            content: '';
            position: absolute;
            top: -5px; left: -5px; right: -5px; bottom: -5px;
            background: linear-gradient(45deg, var(--accent-color-1), var(--accent-color-2));
            z-index: -1;
            filter: blur(15px);
            opacity: 0.3;
            border-radius: 15px;
        }

        .form-group {
            width: 100%;
            margin-bottom: 25px;
        }

        .form-group label {
            display: block;
            margin-bottom: 10px;
            font-weight: 600;
            color: var(--primary-text-color);
            font-size: 1.1em;
        }

        input[type="file"] {
            width: 100%;
            padding: 12px;
            border: 1px solid var(--border-color);
            border-radius: 8px;
            background-color: #0d1117;
            color: var(--primary-text-color);
            font-family: 'Inter', sans-serif;
            font-size: 1em;
            box-sizing: border-box;
            transition: border-color 0.3s ease, box-shadow 0.3s ease;
        }
        input[type="file"]:hover {
            border-color: var(--accent-color-1);
            box-shadow: 0 0 8px var(--glow-color);
        }
        input[type="file"]::-webkit-file-upload-button {
            background-color: var(--accent-color-1);
            color: white;
            padding: 8px 15px;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            transition: background-color 0.3s ease;
        }
        input[type="file"]::-webkit-file-upload-button:hover {
            background-color: var(--accent-color-2);
        }

        input[type="submit"] {
            background: linear-gradient(90deg, var(--accent-color-1) 0%, var(--accent-color-2) 100%);
            color: white;
            padding: 15px 30px;
            border: none;
            border-radius: 8px;
            cursor: pointer;
            font-size: 1.2em;
            font-weight: 700;
            font-family: 'Orbitron', sans-serif;
            transition: transform 0.3s ease, box-shadow 0.3s ease;
            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.4);
        }
        input[type="submit"]:hover {
            transform: translateY(-3px);
            box-shadow: 0 6px 20px rgba(0, 0, 0, 0.6), 0 0 20px var(--glow-color);
        }
        input[type="submit"]:active {
            transform: translateY(0);
            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3);
        }

        h3 {
            font-family: 'Orbitron', sans-serif;
            color: var(--accent-color-1);
            margin-top: 40px;
            text-align: center;
            font-weight: 700;
            font-size: 1.8em;
            text-shadow: 0 0 8px var(--glow-color);
        }

        .summary-box {
            background-color: #161b22;
            padding: 30px;
            border-radius: 12px;
            border: 1px solid var(--border-color);
            margin-top: 20px;
            word-wrap: break-word;
            white-space: pre-wrap;
            max-width: 600px;
            width: 100%;
            box-shadow: var(--box-shadow-dark);
            font-size: 1.1em;
            color: var(--primary-text-color);
            position: relative;
        }
        .summary-box::before {
            content: '';
            position: absolute;
            top: -2px; left: -2px; right: -2px; bottom: -2px;
            background: linear-gradient(45deg, var(--accent-color-2), var(--accent-color-1));
            z-index: -1;
            filter: blur(10px);
            opacity: 0.2;
            border-radius: 15px;
        }

        .error-message {
            color: var(--error-text);
            background-color: var(--error-bg);
            padding: 15px;
            border-radius: 8px;
            border: 1px solid var(--error-text);
            margin-top: 20px;
            max-width: 600px;
            width: 100%;
            text-align: center;
            font-weight: 600;
            box-shadow: var(--box-shadow-dark);
        }
    </style>
</head>
<body>
    <h2>Quantum Summarizer Interface</h2>
    <form method="POST" enctype="multipart/form-data">
        <div class="form-group">
            <label for="text_file">Upload Data Stream (.txt, .pdf, .docx):</label>
            <input type="file" name="text_file" id="text_file" accept=".txt,.pdf,.docx" required>
        </div>
        <input type="submit" value="Process & Summarize">
    </form>
    {% if summary %}
        <h3>Generated Summary:</h3>
        <div class="summary-box">{{ summary }}</div>
    {% elif error %}
        <p class="error-message">{{ error }}</p>
    {% endif %}
</body>
</html>
'''


@app.route("/", methods=["GET", "POST"])
def upload_file():
    summary = ''
    error = ''
    logging.info(" Request received")
    if request.method == "POST":
        file = request.files.get("text_file")
        
        if not file or file.filename == '':
            error = "❌ No file selected. Please choose a file to upload."
            logging.warning(error)
            return render_template_string(HTML_PAGE, summary=summary, error=error)

        filename = secure_filename(file.filename)
        ext = os.path.splitext(filename)[1].lower()

        logging.info(f" Uploaded file: {filename}, extension: {ext}")

        if ext not in ALLOWED_EXTENSIONS:
            error = " Unsupported file type. Please upload a .txt, .pdf, or .docx file."
            logging.warning(error)
            return render_template_string(HTML_PAGE, summary=summary, error=error)

        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        try:
            file.save(filepath)
            logging.info(f" File saved to {filepath}")
        except Exception as e:
            error = f" Failed to save file: {e}. Please check server permissions."
            logging.error(error, exc_info=True)
            return render_template_string(HTML_PAGE, summary=summary, error=error)

        content = read_file(filepath, ext)
        logging.info(f" Content length after reading: {len(content)} characters.")

        if not content.strip():
            error = f" The file appears to be empty or unreadable ({ext.upper()}). No text could be extracted."
            logging.warning(error)
        elif summarizer_pipeline is None: # Check if pipeline initialized successfully
            error = " Summarization service is unavailable. ML pipeline failed to load."
            logging.error(error)
        else:
            try:
                # Use the ML pipeline for summarization
                # Abstractive models use min_length and max_length (in tokens/words)
                # rather than a fixed number of sentences.
                # Adjust these values based on your desired summary length.
                summary_result = summarizer_pipeline(content, min_length=30, max_length=150, do_sample=False)
                summary = summary_result[0]['summary_text']
                logging.info(" Summary generated successfully using ML pipeline.")
            except Exception as e:
                error = f" Error during ML summarization: {e}. Please try a different file or text."
                logging.error(error, exc_info=True)

        # Clean up the uploaded file after processing
        try:
            os.remove(filepath)
            logging.info(f" Removed temporary file: {filepath}")
        except Exception as e:
            logging.error(f"Failed to remove file {filepath}: {e}")

    return render_template_string(HTML_PAGE, summary=summary, error=error)

def run_app():
    """
    Runs the Flask application.
    Host '0.0.0.0' makes it accessible externally (e.g., from other devices on your network).
    """
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)
Thread(target=run_app).start()


2025-07-03 04:09:00,737 - ERROR - ❌ Failed to initialize Hugging Face pipeline: Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.29.48:5000
2025-07-03 04:09:00,749 - INFO - [33mPress CTRL+C to quit[0m
2025-07-03 04:09:02,203 - INFO - 192.168.29.48 - - [03/Jul/2025 04:09:02] "GET / HTTP/1.1" 200 -
2025-07-03 04:09:07,625 - INFO - Attempting to read TXT file: uploads\home.txt
2025-07-03 04:09:07,628 - INFO - Successfully read TXT file. Length: 718
2025-07-03 04:09:07,658 - INFO - 192.168.29.48 - - [03/Jul/2025 04:09:07] "POST / HTTP/1.1" 200 -


In [14]:
from rouge_score import rouge_scorer

ref = "The quick brown fox jumps over the lazy dog."
gen = "A fast brown fox leaps over a lazy dog."

scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
scores = scorer.score(ref, gen)
print(scores)


2025-07-03 04:15:28,112 - INFO - Using default tokenizer.


{'rouge1': Score(precision=0.5555555555555556, recall=0.5555555555555556, fmeasure=0.5555555555555556), 'rougeL': Score(precision=0.5555555555555556, recall=0.5555555555555556, fmeasure=0.5555555555555556)}
