|
6 | 6 | import json
|
7 | 7 | import logging
|
8 | 8 | import os
|
| 9 | +from urllib.parse import urlparse |
9 | 10 |
|
10 | 11 | import gradio as gr
|
11 | 12 | import requests
|
12 | 13 | import uvicorn
|
13 | 14 | from fastapi import FastAPI
|
14 |
| -from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader |
| 15 | +from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader, UnstructuredURLLoader |
15 | 16 |
|
16 | 17 | # Configure logging
|
17 | 18 | logging.basicConfig(level=logging.INFO)
|
@@ -91,6 +92,42 @@ def read_video_file(self, file):
|
91 | 92 | base64_str = self.encode_file_to_base64(file)
|
92 | 93 | return self.generate_summary(base64_str, document_type="video")
|
93 | 94 |
|
| 95 | + def is_valid_url(self, url): |
| 96 | + try: |
| 97 | + result = urlparse(url) |
| 98 | + return all([result.scheme, result.netloc]) |
| 99 | + except ValueError: |
| 100 | + return False |
| 101 | + |
| 102 | + def read_url(self, url): |
| 103 | + """Read and process the content of a url. |
| 104 | +
|
| 105 | + Args: |
| 106 | + url: The url to be read as a document. |
| 107 | +
|
| 108 | + Returns: |
| 109 | + str: The content of the website or an error message if the url is unsupported. |
| 110 | + """ |
| 111 | + |
| 112 | + self.page_content = "" |
| 113 | + |
| 114 | + logger.info(">>> Reading url: %s", url) |
| 115 | + if self.is_valid_url(url=url): |
| 116 | + os.environ["no_proxy"] = f"{os.environ.get('no_proxy', '')},{url}".strip(",") |
| 117 | + try: |
| 118 | + loader = UnstructuredURLLoader([url]) |
| 119 | + page = loader.load() |
| 120 | + self.page_content = [content.page_content for content in page][0] |
| 121 | + except Exception as e: |
| 122 | + msg = f"There was an error trying to read '{url}' --> '{e}'\nTry adding the domain name to your `no_proxy` variable and try again. Example: example.com*" |
| 123 | + logger.error(msg) |
| 124 | + else: |
| 125 | + msg = f"Invalid URL '{url}'. Make sure the link provided is a valid URL" |
| 126 | + logger.error(msg) |
| 127 | + return msg |
| 128 | + |
| 129 | + return self.page_content |
| 130 | + |
94 | 131 | def generate_summary(self, doc_content, document_type="text"):
|
95 | 132 | """Generate a summary for the given document content.
|
96 | 133 |
|
@@ -201,6 +238,25 @@ def render(self):
|
201 | 238 | )
|
202 | 239 | submit_btn.click(fn=self.generate_summary, inputs=[input_text], outputs=[generated_text])
|
203 | 240 |
|
| 241 | + with gr.Blocks() as url_ui: |
| 242 | + # URL text UI |
| 243 | + with gr.Row(): |
| 244 | + with gr.Column(): |
| 245 | + input_text = gr.TextArea( |
| 246 | + label="Please paste a URL for summarization", |
| 247 | + placeholder="Paste a URL for the information you need to summarize", |
| 248 | + ) |
| 249 | + submit_btn = gr.Button("Generate Summary") |
| 250 | + with gr.Column(): |
| 251 | + generated_text = gr.TextArea( |
| 252 | + label="Text Summary", placeholder="Summarized text will be displayed here" |
| 253 | + ) |
| 254 | + submit_btn.click( |
| 255 | + lambda input_text: self.generate_summary(self.read_url(input_text)), |
| 256 | + inputs=input_text, |
| 257 | + outputs=generated_text, |
| 258 | + ) |
| 259 | + |
204 | 260 | # File Upload UI
|
205 | 261 | file_ui = self.create_upload_ui(
|
206 | 262 | label="Please upload a document (.pdf, .doc, .docx)",
|
@@ -232,6 +288,8 @@ def render(self):
|
232 | 288 | audio_ui.render()
|
233 | 289 | with gr.TabItem("Upload Video"):
|
234 | 290 | video_ui.render()
|
| 291 | + with gr.TabItem("Enter URL"): |
| 292 | + url_ui.render() |
235 | 293 |
|
236 | 294 | return self.demo
|
237 | 295 |
|
|
0 commit comments