In [14]:
import streamlit as st
from PIL import Image
import os
import spacy
from sklearn.metrics.pairwise import cosine_similarity
import docx
from docx.shared import Pt
from docx import Document
import tempfile
import io

In [3]:
nlp = spacy.load("en_core_web_md")

In [4]:
def preprocess_text(text):
    # Lowercasing
    text = text.lower()
    # Tokenization using spaCy
    doc = nlp(text)
    # Remove stop words and punctuation
    tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
    # Join tokens back into a single string
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

In [5]:
def find_closest_match_nlp(user_input, dataset, threshold=0.2):
    user_input_vector = nlp(user_input).vector
    closest_key = None
    max_similarity = -1

    for key in dataset.keys():
        key_vector = nlp(preprocess_text(key)).vector  # Preprocess key
        similarity = cosine_similarity([user_input_vector], [key_vector])[0][0]
        if similarity > max_similarity:
            max_similarity = similarity
            closest_key = key

    if max_similarity < threshold:
        closest_key = None

    return closest_key

In [6]:
def fill_template(template_text, user_input):
    filled_text = template_text
    for key, value in user_input.items():
        filled_text = filled_text.replace(f"{{{{ {key} }}}}", value)
    return filled_text

In [12]:
def download_document(doc_content, filename='generated_document.docx'):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpfile = os.path.join(tmpdir, filename)
        with open(tmpfile, 'wb') as f:
            f.write(doc_content)
        st.download_button(label="Download Document", data=tmpfile, file_name=filename, mime='application/docx')

In [20]:
dataset = {
    "document required to buy a house": {"image": "purchase_template.jpg", "template": "purchase_template.docx"},
    "how to create a will": {"image": "will_template.png", "template": "will_template.docx"},
    "steps for filing a divorce": {"image": "Divorce-Paper-template.jpg", "template": "Divorce-Paper-template.docx"}}

In [8]:
st.title("Document Recommendation and Generation System")

2024-02-28 11:13:47.027 
  command:

    streamlit run C:\Users\Mohanrao\anaconda3\envs\python_sem4\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [21]:
st.sidebar.title("Input")
st.write("Welcome to the automated tool! Please enter your query in the sidebar to search for similar documents.")
user_input = st.sidebar.text_input("Enter your query:")

In [10]:
preprocessed_input = preprocess_text(user_input)
closest_match = find_closest_match_nlp(preprocessed_input, dataset)

In [22]:
if user_input:
    if closest_match:
        st.image(dataset[closest_match]["image"], caption=f"Closest match: {closest_match}")

        # Load document template
        template_path = dataset[closest_match]["template"]
        with open(template_path, "rb") as file:
            template_doc = Document(file)
        template_text = "\n".join([paragraph.text for paragraph in template_doc.paragraphs])

        # Display document template and input fields
        st.header("Document Template:")
        filled_template_text = st.text_area("Edit Document", template_text, height=300)
        user_input_fields = {}
        for paragraph in template_doc.paragraphs:
            if "{{" in paragraph.text and "}}" in paragraph.text:
                blank = paragraph.text[paragraph.text.find("{{") + 2: paragraph.text.find("}}")]
                user_input_fields[blank] = st.text_input(f"Enter value for '{blank}':", key=blank)

        # Generate new document
        if st.button("Generate Document"):
            filled_text = fill_template(filled_template_text, user_input_fields)
            st.write(filled_text)

    else:
        st.warning("No similar document found.")