# Problem Statement:
### Design and implement a prototype of an AI-powered summarization bot that can generate concise summaries of startup applications for investors' review.

In [1]:
import dash
from dash import dcc, html, Input, Output, State
from dash.dependencies import Input, Output
from transformers import pipeline
import pdfplumber
import base64
import io
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from docx import Document
nltk.download('stopwords')
nltk.download('punkt')

app = dash.Dash(__name__)

# Specify the model and revision
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

app.layout = html.Div([
    html.H1("AI-Powered Startup Application Summarizer", style={'color': 'blue'}),
    dcc.Upload(
        id='upload-data',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select a Startup Application Document')
        ]),
        style={
            'width': '50%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        # Allow multiple files to be uploaded
        multiple=False
    ),
    dcc.Textarea(
        id='summary-output',
        readOnly=True,
        style={'width': '100%', 'height': '200px'}  # Smaller height
    ),
])

# Callback to generate and display the summary
@app.callback(
    Output('summary-output', 'value'),
    Input('upload-data', 'contents'),
    State('upload-data', 'filename')
)
def generate_summary(contents, filename):
    if contents is not None:
        document_text = extract_text_from_document(contents, filename)
        # Preprocess the text before summarization
        preprocessed_text = preprocess_text(document_text)
        summary = summarizer(preprocessed_text, max_length=150, min_length=30, do_sample=False)
        return summary[0]['summary_text']

# ... (rest of your code)

if __name__ == '__main__':
    app.run_server(debug=True)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rohit\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rohit\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
