In [1]:
import os, requests, json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
load_dotenv()
api_key = os.getenv("OPEN_API_KEY")

MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [3]:
class Website:
    url: str
    title: str
    text: str
    body: str
    links: List[str]

    def __init__(self,url):
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title!"
        if soup.body:
            for irrelevant in soup.body(['script','style','img','input']):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator = '\n', strip = True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
    
    def get_contents(self):
        return f"Webpage title:\n{self.title}\n\nWebpage Content:\n{self.text}\n\n"

In [4]:
ed = Website("https://edwarddonner.com")
print(ed.get_contents())

Webpage title:
Home - Edward Donner

Webpage Content:
Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers a

In [5]:
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/',
 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/',
 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/',
 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'ht

In [6]:
link_system_prompt = "You are provided with a list of links found on a webpage.\
You are able to decide which of the links would be most relevant to include in a brochure about a company,\
such as links to an About page, or a Company page, or Careers/Jobs pages."
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links":[
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
print(link_system_prompt)

You are provided with a list of links found on a webpage.You are able to decide which of the links would be most relevant to include in a brochure about a company,such as links to an About page, or a Company page, or Careers/Jobs pages.You should respond in JSON as in this example:
{
    "links":[
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}



In [8]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "Please decide which of these are relevant web links for a brochure about the company, respond with the full https URL:\
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [9]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - Please decide which of these are relevant web links for a brochure about the company, respond with the full https URL:Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
https://edwarddonner.com/2024/12/21/llm-resources-su

In [10]:
def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role":"system", "content":link_system_prompt},
            {"role":"user", "content":get_links_user_prompt(website)}
        ],
        response_format={"type":"json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)

In [11]:
get_links("https://www.anthropic.com/")

{'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'},
  {'type': 'news page', 'url': 'https://www.anthropic.com/news'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'}]}

### Making a Brochure

In [12]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print(f"Found links: {links}")
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += Website(link['url']).get_contents()
    return result

In [13]:
print(get_all_details("https://www.anthropic.com/"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'api page', 'url': 'https://www.anthropic.com/api'}]}
Landing page:
Webpage title:
Home \ Anthropic

Webpage Content:
Claude
Overview
Team
Enterprise
API
Pricing
Research
Company
Careers
News
Try Claude
AI
research
and
products
that put safety at the frontier
Claude.ai
Meet Claude 3.7 Sonnet
Claude 3.7 Sonnet, our most intelligent AI model, is now available.
Talk to Claude
API
Build with Claude
Create AI-powered applications and custom experiences using Claude.
Learn more
Claude 3.7 Sonnet and Claude Code
Introducing Claude 3.7 Sonnet, our most intelligent model yet and the first hybrid reasoning model. We’re also launchi

In [None]:
system_prompt = "You are an assistant that analyzes the contents of several relevant webpages from a company website\
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information"

# system_prompt = "You are an assistant that analyzes the contents of several relevant webpages from a company website\
# and creates a short humorous, entertaining and jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information"

In [30]:
def get_brochure_system_prompt(expression = "formal"):
    system_prompt = "You are an assistant that analyzes the contents of several relevant webpages from a company website\
        and creates a {expression} short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
            Include details of company culture, customers and careers/jobs if you have the information"
    return system_prompt

In [31]:
def get_brochure_user_prompt(company_name, url, language = "english"):
    user_prompt = f"You are looking at a company called {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown in {language} language.\n"
    user_prompt += get_all_details(url)
    user_prompt += user_prompt[:20_000]
    return user_prompt

In [None]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role":"system", "content":get_brochure_system_prompt()},
            {"role":"user", "content":get_brochure_user_prompt(company_name,url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [17]:
create_brochure("VijayShree Toolings","https://www.vjtl.in/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.vjtl.in/aboutus.html'}, {'type': 'careers page', 'url': 'https://www.vjtl.in/careers.html'}, {'type': 'products page', 'url': 'https://www.vjtl.in/products.html'}, {'type': 'facilities page', 'url': 'https://www.vjtl.in/facilities.html'}]}


# **Vijayshree Wear Parts Pvt Ltd**

## **会社紹介**

Vijayshree Wear Parts Pvt Ltdは、タングステンカーバイドの工具と摩耗部品の製造および供給において30年以上の経験を持つエンジニアチームによって設立されました。2009年にVijayshree Toolingsとして設立され、2017年にVijayshree Wear Parts Pvt Ltdとして新たに構成されました。さまざまな用途に対応するタングステンカーバイドの摩耗部品と工具を提供し、一般工学、石油・ガス、電子スタンピング、金属成形、および粉末圧縮業界で使用されています。

## **製品ライン**

- **スリッティングカッター**
- **成形コアロッド**
- **ピアシングパンチ**
- **ヘッディングペレット**
  
当社は、50年以上の経験を持つ日本のカーバイドメーカーと提携しており、高度な技術的および製品サポートを提供しています。

## **ビジョンとミッション**

当社のミッションは、最新のタングステンカーバイド材料を使用して、お客様の部品コストを削減することです。顧客からの信頼と支持は、私たちが最高のサービスを提供し、顧客にとっての価値を創出する原動力です。

## **製造施設**

当社は、プネとナシックに複数の製造クラスターを持ち、ワークオーダー契約に基づいて運営しています。各クラスターはタングステンカーバイド部品の扱いに特化しており、寸法公差を厳守しています。これにより、コストと納期を厳しく管理することができます。

## **会社文化**

Vijayshreeでは、社員が仕事に情熱を持ち、自己の最大の潜在能力を開花させることができる支援的な環境を提供しています。社員は、自分の仕事のやり方をコントロールし、意思決定に大きな責任を持つことが期待されます。

## **キャリア情報**

Vijayshree Wear Partsでは、営業やマーケティングの職に関するオープニングがあります。やる気のある人材を求めており、履歴書は以下のメールアドレスまで送付してください。
- **メール:** pune@vjtl.in

## **お問合せ**

興味をお持ちいただけましたら、ぜひご連絡ください。

- **電話番号**: +91 0253 6693924 / +91 8149369430

--- 

Vijayshree Wear Parts Pvt Ltdでは、卓越した製品とサービスを通じて、お客様に価値を提供し、共に成長していくことを目指します。

In [32]:
def stream_brochure(company_name, url, language, expression):
    stream = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "system", "content": get_brochure_system_prompt(expression)},
            {"role": "user", "content": get_brochure_user_prompt(company_name,url,language)}
        ],
        stream = True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        response = response.replace("```","").replace('markdown',"")
        yield response

In [None]:
stream_brochure("VijayShree Toolings","https://www.vjtl.in/", "marathi")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.vjtl.in/aboutus.html'}, {'type': 'careers page', 'url': 'https://www.vjtl.in/careers.html'}, {'type': 'facilities page', 'url': 'https://www.vjtl.in/facilities.html'}, {'type': 'products page', 'url': 'https://www.vjtl.in/products.html'}]}



# ヴィジャイシュリー・ウェアパーツ株式会社のご案内

## 会社概要
ヴィジャイシュリー・ウェアパーツ株式会社は、タングステンカーバイド工具および摩耗部品の製造と供給において30年以上の経験を持つ熟練したエンジニアチームから成る企業です。2009年にヴィジャイシュリーツールズとして設立され、2017年に現在の法人名に改称しました。

当社は、一般工学、石油・ガス、電気スタンピング、金属成形および粉体圧縮産業において使用されるタングステンカーバイドの摩耗部品と工具の製造に多様な応用経験を持っています。日本のカーバイドメーカーと提携し、革新的な技術と40年以上の経験を持つサポートを受けております。

## 製品群
- **金属切断用**
- **電気スタンピング用**
- **ファスナー産業**
- **粉体圧縮産業**
- **自動車産業**
- **包装産業**
- **一般的な摩耗部品**
- **プラスチック産業**
- **ベアリング産業**
- **スプリング産業**
- **食品産業**

## 会社の使命
「先進的なタングステンカーバイド材料を用いて、貴社の部品コストを削減すること」

## 会社文化
当社では、従業員が自分の能力を最大限に発揮できるようにサポートする環境を整えています。業務を効率的に行うことが奨励され、各自がより大きな責任を持ち、キャリアを進める機会が与えられます。私たちは技術サポート、品質保証、納期厳守、アフターサービスに力を入れています。

## キャリア機会
私たちのチームに参加することを希望される方へ:
- **募集職種**:
  - 営業
  - マーケティング
- ご興味のある方は履歴書を以下のメールアドレスに送付してください: [pune@vjtl.in](mailto:pune@vjtl.in)

## 製造施設
当社はプネとナシクに様々な製造クラスターを有し、タングステンカーバイド部品の精密加工を行なっています。最新の機器を備えた施設で、寸法公差を維持し、コストと納期の厳格な管理が可能です。

## お問い合わせ
さらに詳しい情報や見積もりをご希望の場合は、以下の連絡先までお問い合わせください。

- 電話: +91 0253 6693924 / +91 8149369430
- ウェブサイト: [Vijayshree Wear Parts](http://www.vjtl.in)

---

**ヴィジャイシュリー・ウェアパーツ株式会社**は、ISO 9001:2015 認証を受けています。お客様の期待に応えるため、常に企業理念に基づいて進化を続けています。


In [None]:
import gradio as gr
gr.Interface(fn=stream_brochure,
             inputs= [gr.Textbox(label="Enter Company Name"), 
                      gr.Textbox(label="Enter company URL"), 
                      gr.Dropdown(["english","japanese","marathi"], label="Select language", value="english"),
                      gr.Dropdown(["formal","funny","playful","loud"], label="Select expression", value="formal")],
             outputs=[gr.Markdown(label="Brochure:")],
             flagging_mode="never").launch(share=True)

* Running on local URL:  http://127.0.0.1:7865
* Running on public URL: https://5ddd60a33b297f09d4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


