In [1]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import AzureOpenAI

In [2]:
load_dotenv(r"C:\Users\HP\OneDrive\Documents\credentials.env")
api_key = os.environ['API_KEY']
api_base = os.environ['RESOURCE_ENDPOINT']
chat_model_id=os.environ['chat_model_id']
chat_model=os.environ['chat_model']
api_type = "azure"
api_version = "2023-06-01-preview"

openai = AzureOpenAI(
    azure_endpoint=api_base,
    api_key=api_key, 
    api_version=api_version,
)

In [3]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
data = Website("https://amspredict.com/")
data.links

['#content',
 '/',
 '/predict-platform/',
 '/payment-integrity/',
 '/risk-management/',
 '/business-intelligence',
 '/company',
 '/insights-updates',
 'https://amspredict.com/resources/',
 'https://amspredict.com/book-demo/',
 'https://predict.mdstrat.com',
 'https://amspredict.com',
 '/',
 '/predict-platform/',
 '/payment-integrity/',
 '/risk-management/',
 '/business-intelligence',
 '/company',
 '/insights-updates',
 'https://amspredict.com/resources/',
 'https://amspredict.com/book-demo/',
 'https://predict.mdstrat.com',
 'https://amspredict.com',
 '/book-demo',
 'https://predict.mdstrat.com',
 '/predict-platform/',
 '/book-demo',
 '/book-demo',
 '/payment-integrity',
 '/risk-management',
 '/business-intelligence',
 '/company',
 '/predict-platform',
 '/book-demo',
 '/',
 'https://amspredict.com/payment-integrity/',
 'https://amspredict.com/risk-management/',
 'https://amspredict.com/business-intelligence/',
 'https://amspredict.com/payment-integrity/',
 'https://amspredict.com/risk-

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [9]:
print(get_links_user_prompt(data))

Here is the list of links on the website of https://amspredict.com/ - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
#content
/
/predict-platform/
/payment-integrity/
/risk-management/
/business-intelligence
/company
/insights-updates
https://amspredict.com/resources/
https://amspredict.com/book-demo/
https://predict.mdstrat.com
https://amspredict.com
/
/predict-platform/
/payment-integrity/
/risk-management/
/business-intelligence
/company
/insights-updates
https://amspredict.com/resources/
https://amspredict.com/book-demo/
https://predict.mdstrat.com
https://amspredict.com
/book-demo
https://predict.mdstrat.com
/predict-platform/
/book-demo
/book-demo
/payment-integrity
/risk-management
/business-intelligence
/company
/predict-platform
/book-demo
/
https://amspredict.com/payment-integrity/
https://amspredic

In [10]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=chat_model_id,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ]
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [11]:
huggingface = Website("https://amspredict.com/")
huggingface.links

['#content',
 '/',
 '/predict-platform/',
 '/payment-integrity/',
 '/risk-management/',
 '/business-intelligence',
 '/company',
 '/insights-updates',
 'https://amspredict.com/resources/',
 'https://amspredict.com/book-demo/',
 'https://predict.mdstrat.com',
 'https://amspredict.com',
 '/',
 '/predict-platform/',
 '/payment-integrity/',
 '/risk-management/',
 '/business-intelligence',
 '/company',
 '/insights-updates',
 'https://amspredict.com/resources/',
 'https://amspredict.com/book-demo/',
 'https://predict.mdstrat.com',
 'https://amspredict.com',
 '/book-demo',
 'https://predict.mdstrat.com',
 '/predict-platform/',
 '/book-demo',
 '/book-demo',
 '/payment-integrity',
 '/risk-management',
 '/business-intelligence',
 '/company',
 '/predict-platform',
 '/book-demo',
 '/',
 'https://amspredict.com/payment-integrity/',
 'https://amspredict.com/risk-management/',
 'https://amspredict.com/business-intelligence/',
 'https://amspredict.com/payment-integrity/',
 'https://amspredict.com/risk-

In [12]:
get_links("https://amspredict.com/")

{'links': [{'type': 'predict platform',
   'url': 'https://amspredict.com/predict-platform/'},
  {'type': 'payment integrity',
   'url': 'https://amspredict.com/payment-integrity/'},
  {'type': 'risk management',
   'url': 'https://amspredict.com/risk-management/'},
  {'type': 'business intelligence',
   'url': 'https://amspredict.com/business-intelligence/'},
  {'type': 'company', 'url': 'https://amspredict.com/company/'},
  {'type': 'insights updates',
   'url': 'https://amspredict.com/insights-updates/'},
  {'type': 'resources', 'url': 'https://amspredict.com/resources/'},
  {'type': 'book demo', 'url': 'https://amspredict.com/book-demo/'},
  {'type': 'careers', 'url': 'https://amspredict.com/careers/'},
  {'type': 'linkedin',
   'url': 'https://www.linkedin.com/company/advanced-medical-strategies-llc/'},
  {'type': 'twitter', 'url': 'https://twitter.com/AMSmdstratcom'}]}

In [13]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [29]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [15]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [16]:
get_brochure_user_prompt("AMS", "https://amspredict.com/")

Found links: {'links': [{'type': 'Predict Platform', 'url': 'https://amspredict.com/predict-platform/'}, {'type': 'Payment Integrity', 'url': 'https://amspredict.com/payment-integrity/'}, {'type': 'Risk Management', 'url': 'https://amspredict.com/risk-management/'}, {'type': 'Business Intelligence', 'url': 'https://amspredict.com/business-intelligence/'}, {'type': 'Company', 'url': 'https://amspredict.com/company/'}, {'type': 'Resources', 'url': 'https://amspredict.com/resources/'}, {'type': 'Book Demo', 'url': 'https://amspredict.com/book-demo/'}, {'type': 'Careers', 'url': 'https://amspredict.com/careers/'}]}


'You are looking at a company called: AMS\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nAdvanced Medical Strategies (AMS) | Healthcare Analytics\nWebpage Contents:\nSkip to content\nSolutions\nPREDICT Platform\nDiscover how our cutting-edge technology delivers unrivaled predictive insights for health plans, payers, underwriters, and more.\nExplore now\nPayment integrity\nAccelerate savings with automated workflows.\nRisk management\nPredict and plan for the highest-cost claims.\nBusiness intelligence\nGet data-driven contract and policy insights.\nCompany\nOur team\nMeet our world-class experts & innovators.\nUpdates & insights\nGet the latest news & insights from AMS.\nResources\nBook demo\nMembers\nX\nSolutions\nPREDICT Platform\nDiscover how our cutting-edge technology delivers unrivaled predictive insights for health plans, payers, underwriters, and more

In [17]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=chat_model_id,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [25]:
create_brochure("AMS", "https://amspredict.com/")

Found links: {'links': [{'type': 'predict platform', 'url': 'https://amspredict.com/predict-platform/'}, {'type': 'payment integrity', 'url': 'https://amspredict.com/payment-integrity/'}, {'type': 'risk management', 'url': 'https://amspredict.com/risk-management/'}, {'type': 'business intelligence', 'url': 'https://amspredict.com/business-intelligence/'}, {'type': 'company', 'url': 'https://amspredict.com/company/'}, {'type': 'insights and updates', 'url': 'https://amspredict.com/insights-updates/'}, {'type': 'resources', 'url': 'https://amspredict.com/resources/'}, {'type': 'book demo', 'url': 'https://amspredict.com/book-demo/'}, {'type': 'careers', 'url': 'https://amspredict.com/careers/'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/advanced-medical-strategies-llc/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/AMSmdstratcom'}]}


# AMS - Healthcare Analytics for Optimizing Costs and Quality

AMS is a premier provider of payment integrity, risk management, and business intelligence solutions for identifying and addressing excessive claims, preventing and recouping overpayments, and effectively managing the risks associated with high-cost claimants and group health underwriting. 

## Solutions

AMS offers cutting-edge solutions to support healthcare cost and quality optimization, which include:

### PREDICT Platform

AMS's industry-leading solutions are built on one of the most versatile and comprehensive analytical platforms ever built for healthcare, designed to deliver all the data and insights healthcare professionals need to pinpoint excessive claims, avoid or recover overpayments, and manage risks associated with high-cost claimants.

### Payment Integrity

AMS offers automated payment integrity processes that empower teams to avoid or recover overpayments, reduce outsourcing costs, and automate payment integrity workflows.

### Risk Management

AMS enhances critical coverage decisions with analytical tools that support more than 90% of all stop-loss and reinsurance premiums written annually, enabling customers to predict and plan for the highest-cost claims.

### Business Intelligence

AMS strengthens contracts and policies with powerful predictive analytics, advanced reporting tools, and consultative support. The company provides data-driven contract and policy insights to help customers make informed decisions.

## Company Culture

At AMS, the clinical experts, technologists, and data scientists strive to make healthcare more affordable, efficient, and effective for everyone. Since their inception, they have been united and inspired by this pursuit. 

## Customers

AMS's payment integrity, risk management, and business intelligence solutions cater to health plans, payers, underwriters, and other healthcare professionals who seek to optimize costs and improve quality.

## Careers/Jobs

There is no information available on the website about AMS's job openings. Interested candidates can visit the "Our Team" page to learn more about their world-class experts and innovators and make an inquiry by clicking on the "Contact Us" button. 

Get started with optimizing healthcare costs and quality with AMS today.

In [26]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=chat_model_id,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
stream_brochure("AMS", "https://amspredict.com/")