# **Vertexai Init**

In [1]:
import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

In [2]:
PROJECT_ID = "gen-lang-client-0341374211"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

if "google.colab" in sys.modules:
    # Define project information
    PROJECT_ID = PROJECT_ID
    LOCATION = LOCATION

    # Initialize Vertex AI
    import vertexai
    vertexai.init(project=PROJECT_ID, location=LOCATION)

In [3]:
search_query = """Sea food near Googleplex
1600 Amphitheatre Parkway
Mountain View, CA 94043
United States"""
#'how to make a great pastrami sandwich'

# **Webscraping**

In [4]:
!pip install -q langchain playwright beautifulsoup4 html2text

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.4/794.4 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.2/37.2 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m67.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.4/192.4 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m613.2/613.2 kB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
import html2text
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer

async def do_webscraping(link):
    try:
        urls = [link]
        loader = AsyncHtmlLoader(urls)
        docs = loader.load()

        html2text_transformer = Html2TextTransformer()
        docs_transformed = html2text_transformer.transform_documents(docs)

        if docs_transformed != None and len(docs_transformed) > 0:
            metadata = docs_transformed[0].metadata
            title = metadata.get('title', '')
            return {
                'summary': docs_transformed[0].page_content,
                'title': title,
                'metadata': metadata,
                'clean_content': html2text.html2text(docs_transformed[0].page_content)
            }
        else:
            return None

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

# **User Preferences**

In [6]:
user_albert_preference = {
    "prompt_template":{
        "summary_template_1": """
            You are an expert summary generator. Generate a clean and consise summary in less than 100 lines.

            Prompt 1: Identify key sections in the text for summary generation
            Prompt 2: Extract key information from the introduction section
            Prompt 3: Parse out the main objective or purpose of the text
            Prompt 4: Identify any key findings or conclusions discussed in the text
            Prompt 5: Summarize the main arguments or points presented in the text
            Prompt 6: Summarize the overall tone or attitude of the text

            Output the summary as per the below schema.
            {
              "summary": "",
              "highlights": []
              "keywords": []
            }

        """,
        "summary_template_2": """
          1. Present a brief snapshot of the content to be summarized.
          2. Uncover the essential insights, emphasizing the core elements.
          3. Illuminate the primary theme or objective that underlies the material.
          4. Incorporate pertinent details that enrich the overall context.
          5. Emphasize the necessity for brevity, focusing on the key information.
          6. Stress the importance of a clear and coherent flow in the summary.
          7. Encourage the exclusion of repetitive information for a streamlined summary.
        """
    }
}

In [7]:
text_to_summarize = ''
url = 'https://www.yelp.com/search?cflt=seafood&find_loc=Mountain+View%2C+CA+94043'
response = await do_webscraping(url)
if response != None:
  text_to_summarize = response['summary']
text_to_summarize

Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.77s/it]


'Yelp\n\nYelp for Business\n\nWrite a Review\n\nLog InSign Up\n\nRestaurants\n\nDelivery\n\nBurgers\n\nChinese\n\nItalian\n\nReservations\n\nJapanese\n\nMexican\n\nThai\n\nHome Services\n\nContractors\n\nElectricians\n\nHome Cleaners\n\nHVAC\n\nLandscaping\n\nLocksmiths\n\nMovers\n\nPlumbers\n\nAuto Services\n\nAuto Repair\n\nAuto Detailing\n\nBody Shops\n\nCar Wash\n\nCar Dealers\n\nOil Change\n\nParking\n\nTowing\n\nMore\n\nDry Cleaning\n\nPhone Repair\n\nBars\n\nNightlife\n\nHair Salons\n\nGyms\n\nMassage\n\nShopping\n\nMore\n\nFilters\n\n$$$$$$$$$$\n\nSuggested\n\nOpen Now\n\n\\--:--\n\nOffers Delivery\n\nReservations\n\nFree Wi-Fi\n\nOutdoor Seating\n\nDogs Allowed\n\nFeatures\n\nOffers Takeout\n\nGood for Groups\n\nGood for Dinner\n\nGood for Kids\n\nSee all\n\nDistance\n\nBird\'s-eye View\n\nDriving (5 mi.)\n\nBiking (2 mi.)\n\nWalking (1 mi.)\n\nWithin 4 blocks\n\nYelpRestaurantsSeafood\n\n# The Best 10 Seafood Restaurants near Mountain View, CA 94043\n\nSort:Recommended\n\nAll

In [8]:
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part

def generate_summary(text_to_summarize, max_output_tokens):
  model = GenerativeModel("gemini-pro")
  responses = model.generate_content(f"""You are an expert summary generator. Please follow the below rules for the summary generation.
          {user_albert_preference['prompt_template']['summary_template_1']}
          Here's the content:
          {text_to_summarize}
           """,
    generation_config={
        "max_output_tokens": max_output_tokens,
        "temperature": 0.9,
        "top_p": 1
    },
  stream=True,
  )

  for response in responses:
      print(response.candidates[0].content.parts[0].text)

generate_summary(text_to_summarize, 8000)

{
  "summary": "In Mountain View, California, ten exceptional seafood
 restaurants await your exploration. Lim\u00f3n, a Peruvian
 gem, offers a mouth-watering menu. Pacific Catch, known for its poke and Korean-style seafood pancake, is a must-visit. Cap\u
2019t Loui serves delicious seafood with free sides for larger orders. For a fine dining experience, try The Sea by Alexander\u20
19s Steakhouse, where you can enjoy fresh seafood and a stunning ambiance. King\u2019s Fish House is another popular choice with a wide variety of seafood dishes. If you're looking for a casual atmosphere
, La Marea of the Sea offers fresh oysters and various seafood options. Rustic House Oyster Bar and Grill provides a delightful outdoor seating experience and a diverse seafood menu. Supreme Crab is a local favorite for Cajun-style seafood. For a brunch
 and seafood combination, head to Forthright Oyster Bar & Kitchen. Lastly, Cook\u2019s Seafood offers high-quality seafood and a market for fresh seafood 