In [None]:

import streamlit as st
import easyocr
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import Counter
from PIL import Image
import numpy as np

# Download required NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

# OCR Function
def extract_text(image, output_file):
    image = Image.open(image).convert("RGB")
    image_np = np.array(image)
    reader = easyocr.Reader(['en'])
    results = reader.readtext(image_np, detail=0)
    extracted_text = " ".join(results)

    with open(output_file, 'w') as file:
        file.write(extracted_text)
    return extracted_text, output_file

# Text summarization logic using POS tagging
def preprocess_text(text):
    sentences = sent_tokenize(text)
    words = [word.lower() for sentence in sentences for word in word_tokenize(sentence)]
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word.isalnum() and word not in stop_words]
    return words, sentences

def get_pos_tags(words):
    return nltk.pos_tag(words)

def get_key_sentences(sentences, pos_tags, num_sentences=3):
    key_words = [word for word, tag in pos_tags if tag.startswith('N') or tag.startswith('V') or tag.startswith('J')]
    word_freq = Counter(key_words)
    scored_sentences = [
        (sentence, sum(word_freq[word] for word in word_tokenize(sentence.lower()) if word in word_freq))
        for sentence in sentences
    ]
    scored_sentences.sort(key=lambda x: x[1], reverse=True)
    return [sentence for sentence, score in scored_sentences[:num_sentences]]

def summarize_text(text, num_sentences=3):
    words, sentences = preprocess_text(text)
    pos_tags = get_pos_tags(words)
    key_sentences = get_key_sentences(sentences, pos_tags, num_sentences)
    return ' '.join(key_sentences), key_sentences

# Highlight summary sentences in the original text
def highlight_sentences(full_text, summary_sentences):
    highlighted_text = full_text
    for sent in summary_sentences:
        if sent.strip() in highlighted_text:
            highlighted_text = highlighted_text.replace(sent.strip(), f"<mark>{sent.strip()}</mark>")
    return highlighted_text

# Streamlit UI
st.title("Legal Text Summarization System")

option = st.radio("Choose an action:", ("Upload Image and Summarize", "Upload Text File and Summarize"))

if option == "Upload Image and Summarize":
    uploaded_image = st.file_uploader("Upload a JPG/PNG image", type=["jpg", "jpeg", "png"])
    if uploaded_image:
        st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
        output_txt_file = "ocr_output.txt"

        with st.spinner("Performing OCR..."):
            extracted_text, file_path = extract_text(uploaded_image, output_txt_file)
            st.success("OCR Completed!")
            st.write("Extracted Text:")
            st.text_area("Preview Text", value=extracted_text, height=200)

        st.download_button(
            label="Download Extracted Text File",
            data=extracted_text,
            file_name="ocr_output.txt",
            mime="text/plain",
        )

        st.subheader("Text Summarization")
        num_sentences = st.slider("Select Number of Summary Sentences", min_value=1, max_value=10, value=3)

        if st.button("Summarize Extracted Text"):
            with st.spinner("Summarizing Text..."):
                summary, key_sentences = summarize_text(extracted_text, num_sentences)
                st.success("Summarization Completed!")
                st.subheader("Highlighted Summary in Text:")

                highlighted = highlight_sentences(extracted_text, key_sentences)
                st.markdown(highlighted, unsafe_allow_html=True)

                with st.expander("Show Only Summary"):
                    st.text(summary)

                st.download_button(
                    label="Download Summarized Text File",
                    data=summary,
                    file_name="summary.txt",
                    mime="text/plain",
                )

elif option == "Upload Text File and Summarize":
    uploaded_text_file = st.file_uploader("Upload a Text File", type=["txt"])
    if uploaded_text_file:
        text = uploaded_text_file.read().decode("utf-8")
        st.write("Uploaded Text:")
        st.text_area("Preview Text", value=text, height=300)

        num_sentences = st.slider("Select Number of Summary Sentences", min_value=1, max_value=10, value=3)

        if st.button("Summarize Uploaded Text"):
            with st.spinner("Summarizing with NLTK..."):
                progress = st.progress(0)
                progress.progress(10)

                summary, key_sentences = summarize_text(text, num_sentences)

                progress.progress(90)
                progress.progress(100)
                st.success("Summarization Completed!")

                st.subheader("Highlighted Summary in Text:")
                highlighted = highlight_sentences(text, key_sentences)
                st.markdown(highlighted, unsafe_allow_html=True)

                with st.expander("Show Only Summary"):
                    st.text(summary)

                st.download_button(
                    label="Download Summarized Text File",
                    data=summary,
                    file_name="summary.txt",
                    mime="text/plain",
                )
