<a href="https://colab.research.google.com/github/kpranati2006-png/my-demo/blob/main/final_try_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Step 1: Install required libraries ---
!pip install --quiet PyMuPDF language-tool-python textdistance

# --- Step 2: Import libraries ---
import fitz  # PDF reader
from google.colab import files
import language_tool_python
import textdistance

# --- Step 3: Upload PDF ---
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
print(f"Uploaded file: {file_name}")

# --- Step 4: Extract text from PDF ---
doc = fitz.open(file_name)
text = ""
for page in doc:
    text += page.get_text()

if len(text.strip()) == 0:
    print("❌ The PDF has no extractable text.")
else:
    print("✅ Text extracted successfully!")
    print("\n--- Text ---")
    print(text)

# --- Step 5: Readability check ---
sentences = text.split(".")
words = text.split()
avg_sentence_length = len(words)/max(len(sentences),1)

readability_feedback = ""
if avg_sentence_length > 25:
    readability_feedback = "⚠️ Sentences are long; consider breaking them up for readability."
else:
    readability_feedback = "✅ Sentence length is good."

# --- Step 6: Grammar check ---
tool = language_tool_python.LanguageTool('en-US')
matches = tool.check(text)

grammar_feedback = ""
if len(matches) == 0:
    grammar_feedback = "✅ No major grammar issues found."
else:
    grammar_feedback = f"⚠️ {len(matches)} grammar issues found. Sample corrections:\n"
    for match in matches[:5]:
        grammar_feedback += f"- {match.context} → Suggestion: {match.replacements}\n"

# --- Step 7: Basic plagiarism/repetition check ---
similarity_scores = []
for i in range(len(sentences)-1):
    sim = textdistance.jaccard(sentences[i], sentences[i+1])
    similarity_scores.append(sim)

avg_similarity = sum(similarity_scores)/len(similarity_scores) if similarity_scores else 0

plagiarism_feedback = ""
if avg_similarity > 0.5:
    plagiarism_feedback = "⚠️ Some sentences are very similar. Check for possible repetition."
else:
    plagiarism_feedback = "✅ No major repetition detected."

# --- Step 8: Grading system ---
readability_score = 30 if avg_sentence_length <= 25 else max(0, 30 - int(avg_sentence_length-25))
grammar_score = 40 - len(matches)
grammar_score = max(0, grammar_score)
originality_score = 30 if avg_similarity <= 0.5 else max(0, int(30 - (avg_similarity*60)))

total_score = readability_score + grammar_score + originality_score

# Letter grade
if total_score >= 90: grade="A+"
elif total_score >= 80: grade="A"
elif total_score >= 70: grade="B"
elif total_score >= 60: grade="C"
else: grade="F"

# --- Step 9: Display feedback ---
print("\n📊 Readability Feedback:")
print(readability_feedback)

print("\n📌 Grammar Feedback:")
print(grammar_feedback)

print("\n📌 Repetition/Originality Feedback:")
print(plagiarism_feedback)

print(f"\n🏆 Total Score: {total_score}/100")
print(f"Letter Grade: {grade}")

print("\n📝 Overall Feedback:")
if len(matches) == 0 and avg_similarity <= 0.5 and avg_sentence_length <= 25:
    print("Great! The document is readable, grammatically correct, and original.")
else:
    print("Some improvements are recommended. Check above feedback for details.")


Saving kome-text (2).pdf to kome-text (2).pdf
Uploaded file: kome-text (2).pdf
✅ Text extracted successfully!

--- Text ---
The sun rises in the east and sets in the west and it is very very bright and hot which is 
sometimes uncomfortable. The sun rises in the east and sets in the west and it is very very 
bright and hot which is sometimes uncomfortable. 
Reading books sometimes helps. Reading books sometimes helps. Reading books sometimes
helps. 
Generated with https://kome.ai


📊 Readability Feedback:
✅ Sentence length is good.

📌 Grammar Feedback:
⚠️ 4 grammar issues found. Sample corrections:
- ...n rises in the east and sets in the west and it is very very bright and hot which is... → Suggestion: [', and']
- ...the east and sets in the west and it is very very bright and hot which is  sometimes unco... → Suggestion: ['very, very', 'very']
- ...n rises in the east and sets in the west and it is very very  bright and hot which i... → Suggestion: [', and']
- ...the east and sets in 

In [None]:
# --- Step 1: Install libraries ---
!pip install --quiet gradio PyMuPDF language-tool-python textdistance

# --- Step 2: Import libraries ---
import fitz
import language_tool_python
import textdistance
import gradio as gr

# --- Step 3: Define PDF evaluation function with colored feedback ---
def evaluate_pdf(file):
    # Extract text
    doc = fitz.open(file.name)
    text = ""
    for page in doc:
        text += page.get_text()

    if len(text.strip()) == 0:
        return "<span style='color:red;'>❌ The PDF has no extractable text.</span>"

    # Readability
    sentences = text.split(".")
    words = text.split()
    avg_sentence_length = len(words)/max(len(sentences),1)
    readability_feedback = "<span style='color:green;'>✅ Sentence length is good.</span>" if avg_sentence_length <= 25 else "<span style='color:red;'>⚠️ Sentences are long; consider breaking them up.</span>"

    # Grammar
    tool = language_tool_python.LanguageTool('en-US')
    matches = tool.check(text)
    if len(matches) == 0:
        grammar_feedback = "<span style='color:green;'>✅ No major grammar issues found.</span>"
    else:
        grammar_feedback = f"<span style='color:red;'>⚠️ {len(matches)} grammar issues found. Sample corrections:</span><br>"
        for match in matches[:5]:
            grammar_feedback += f"- {match.context} → Suggestion: {match.replacements}<br>"

    # Repetition
    similarity_scores = []
    for i in range(len(sentences)-1):
        sim = textdistance.jaccard(sentences[i], sentences[i+1])
        similarity_scores.append(sim)
    avg_similarity = sum(similarity_scores)/len(similarity_scores) if similarity_scores else 0
    plagiarism_feedback = "<span style='color:green;'>✅ No major repetition detected.</span>" if avg_similarity <= 0.5 else "<span style='color:red;'>⚠️ Some sentences are very similar. Check for repetition.</span>"

    # Grading
    readability_score = 30 if avg_sentence_length <= 25 else max(0, 30 - int(avg_sentence_length-25))
    grammar_score = 40 - len(matches)
    grammar_score = max(0, grammar_score)
    originality_score = 30 if avg_similarity <= 0.5 else max(0, int(30 - (avg_similarity*60)))
    total_score = readability_score + grammar_score + originality_score

    if total_score >= 90: grade="A+"
    elif total_score >= 80: grade="A"
    elif total_score >= 70: grade="B"
    elif total_score >= 60: grade="C"
    else: grade="F"

    # --- Build colored HTML feedback ---
    feedback_html = f"""
    <h3>PDF Evaluation Results</h3>
    <b>Readability Feedback:</b> {readability_feedback}<br><br>
    <b>Grammar Feedback:</b> {grammar_feedback}<br>
    <b>Repetition/Originality Feedback:</b> {plagiarism_feedback}<br><br>
    <b>Total Score:</b> {total_score}/100<br>
    <b>Letter Grade:</b> {grade}<br>
    <b>Score Progress:</b> <progress value="{total_score}" max="100"></progress>
    """
    return feedback_html

# --- Step 4: Launch Gradio Interface ---
iface = gr.Interface(
    fn=evaluate_pdf,
    inputs=gr.File(label="Upload PDF"),
    outputs=gr.HTML(),  # Use HTML to display colored text
    title="PDF Evaluation Tool",
    description="Upload your PDF to get readability, grammar, and originality feedback along with a score and grade."
)

iface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0952b6b473a6b83fd9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Install Java 17
!apt-get update
!apt-get install openjdk-17-jdk -y
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-17-openjdk-amd64"
!java -version

0% [Working]            Hit:1 https://cli.github.com/packages stable InRelease
0% [Waiting for headers] [Waiting for headers] [Waiting for headers] [Connected                                                                               Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
                                                                               Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
0% [Waiting for headers] [Waiting for headers] [Waiting for headers] [Connected                                                                               Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to ppa.launchpadconte                                                                               Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 https://r2u.stat.illinois.edu/ub