In [67]:
# I will be using the ollama library directly to summarize a random web page
# using beautifulSoup I will remove all the tags and get the raw text and pass that to the Laama3.2 model
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import ollama
import gradio as gr

In [68]:
# A class to represent a Webpage

class Website:
    url: str
    title: str
    text: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [69]:
systemPrompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey summaries about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# systemPrompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [70]:
# A function that writes a User Prompt baset on the url being passed
def userPrompt(company,url):
    webSite = Website(url)
    user_prompt = f"You are looking at a website titled {webSite.title}"
    user_prompt += f"The contents of the websiteare for the company : {company}; \
please provide a short summary of this company in markdown. \
If there are any news , then summarize them as well.\n\n"
    user_prompt += webSite.text
    return user_prompt

In [71]:
def streamOllama(company,url,model):
    stream = ollama.chat(
        model=model, 
        messages=[
            {"role": "system", "content": systemPrompt},
            {"role": "user", "content": userPrompt(company,url)}
      ],
        stream=True
    )
    
    result = ""
    for chunk in stream:
        result += chunk['message']['content'] or ""
        yield result
    

In [72]:
def stream_brochure(company_name, url, model):
    if model=="llama3.2" or model=="llava" or model=="mistral":
        result = streamOllama(company_name,url, model)
    else:
        raise ValueError("Unknown model")
    yield from result

In [73]:
#runAndDisplay()

In [74]:
# gradio time

In [75]:
# dark mode for gradio
force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""
# inside the interface: js=force_dark_mode


In [76]:
view = gr.Interface(
    fn=stream_brochure,
    inputs=[
        gr.Textbox(label="Company name:"),
        gr.Textbox(label="Landing page URL including http:// or https://"),
        gr.Dropdown(["llama3.2", "llava", "mistral"], label="Select model")],
    outputs=[gr.Markdown(label="Brochure:")],
    flagging_mode="never"
)
view.launch()