In [134]:
import os, requests
from dotenv import load_dotenv
# Load environment variables from the .env file
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

search_en_id = os.getenv('search_en_id')
api_key = os.getenv('api_key')

In [260]:
import google.generativeai as genai
from langchain.tools import DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

from langchain_google_genai import (ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings,HarmBlockThreshold,HarmCategory)

from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import faiss

from langchain_community.document_loaders import WebBaseLoader

import nest_asyncio



In [149]:
safe ={
	HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
	HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
	HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
	HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
	}

generation_config = {
  "temperature": 0.9,
  "top_p": 1,
  "top_k": 1,
  "max_output_tokens": 10000,
}

In [3]:
for m in genai.list_models():
	if 'generateContent' in m.supported_generation_methods:
		print(m.name)

model = "gemini-1.0-pro-vision-latest"

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


In [133]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [139]:
url_ = "https://www.googleapis.com/customsearch/v1"

def build_params(search_query, num = 10, start=1, dateRestrict='d1',**kwargs):
	params = { 'q': search_query, 
		   	   'key': api_key, 
			   'cx': search_en_id,
			   'num': num,
			   'start': 1,
			   'dateRestrict': 'd1',
			   }
	params.update(kwargs)
	return params

def google_search(params):
	response = requests.get(url_, params=params)
	if response.status_code != 200:
		raise Exception('API response: {}'.format(response.status_code))
	return response.json()

In [6]:
# search = DuckDuckGoSearchResults()
# res = search.run("Obamna", max_results=2)
# wrapper = DuckDuckGoSearchAPIWrapper( max_results=2)
# search = DuckDuckGoSearchResults(api_wrapper=wrapper, source="news")
# res = search.run("Obamna")

# Searching for pdf files
# results = DDGS().text('russia filetype:pdf', region='us-en', safesearch='off', timelimit='y', max_results=10)

# async


In [131]:
query = "obamna"

In [108]:
from duckduckgo_search import DDGS

results = DDGS().text('Obamna', region='us-en', safesearch='off', timelimit='y', max_results=20)


In [132]:
results[:2]

[{'title': 'Barack Obama: Biography, 44th U.S. President, Politician',
  'href': 'https://www.biography.com/political-figures/barack-obama',
  'body': 'Learn about the life and achievements of Barack Obama, the first Black American to be elected president of the United States. Find out about his early years, education, family, political career, Nobel Peace Prize, books, hobbies and more.'},
 {'title': 'Barack Obama | Biography, Parents, Education, Presidency, Books ...',
  'href': 'https://www.britannica.com/biography/Barack-Obama',
  'body': 'Barack Obama (born August 4, 1961, Honolulu, Hawaii, U.S.) 44th president of the United States (2009-17) and the first African American to hold the office. Before winning the presidency, Obama represented Illinois in the U.S. Senate (2005-08). He was the third African American to be elected to that body since the end of Reconstruction (1877).'}]

In [110]:
meta = [{"href": f"{r['href']}", "title": f"{r['title']}"} for r in results]
txt = [r['body'] for r in results]

In [135]:
meta[:2]

[{'href': 'https://www.biography.com/political-figures/barack-obama',
  'title': 'Barack Obama: Biography, 44th U.S. President, Politician'},
 {'href': 'https://www.britannica.com/biography/Barack-Obama',
  'title': 'Barack Obama | Biography, Parents, Education, Presidency, Books ...'}]

In [112]:
db_p = faiss.FAISS.from_texts(txt, embeddings, metadatas=meta)

In [114]:
docs_and_scores = db_p.similarity_search_with_score(query, k=10)

In [137]:
docs_and_scores[::-1][:2]

[(Document(page_content='Learn about the life and achievements of Barack Obama, the first Black American to be elected president of the United States. Find out about his early years, education, family, political career, Nobel Peace Prize, books, hobbies and more.', metadata={'href': 'https://www.biography.com/political-figures/barack-obama', 'title': 'Barack Obama: Biography, 44th U.S. President, Politician'}),
  0.5331205),
 (Document(page_content="Former president Barack Obama promises in a White House lunch to do all he can for President Biden's reelection. By Tyler Pager. August 2, 2023 at 12:18 p.m. EDT. Former president Barack Obama ...", metadata={'href': 'https://www.washingtonpost.com/politics/2023/08/02/obama-biden-campaign-reelection/', 'title': 'Obama promises Biden he will do all he can to get him reelected - The ...'}),
  0.5183651)]

In [130]:
links = [d[0].metadata['href'] for d in docs_and_scores[::-1]]
links

['https://www.biography.com/political-figures/barack-obama',
 'https://www.washingtonpost.com/politics/2023/08/02/obama-biden-campaign-reelection/',
 'https://www.nytimes.com/2023/05/31/us/politics/obama-oral-history.html',
 'https://www.latimes.com/opinion/story/2024-03-18/great-expectations-vinson-cunningham-obama-campaign',
 'https://www.britannica.com/biography/Michelle-Obama',
 'https://knowyourmeme.com/editorials/guides/where-did-the-nickname-obamna-come-from-memes-about-trumps-mispronounciation-of-obamas-name-explained',
 'https://www.cnn.com/2024/01/20/politics/obama-inauguration-america-blake-cec/index.html',
 'https://www.youtube.com/watch?v=NwLOuwtPAJ4',
 'https://www.forbes.com/advisor/health-insurance/what-is-obamacare/',
 'https://www.britannica.com/biography/Barack-Obama']

### Google

In [140]:
response = google_search(build_params(query,num = 10))

In [142]:
for i in response['items']:
	url, title = i['link'], i['title']
	print(f"URL : {url}  title : {title}")
	print(f"Snippet : {i['snippet']}")

URL : https://twitter.com/GarakObamna/status/1769384400918335896  title : Garak Obamna on X: "a gentle polemic against @OGRolandRat's ...
Snippet : 23 hours ago ... Garak Obamna · @GarakObamna. a gentle polemic against. @OGRolandRat. 's reading of MAGA. MAGA means Nationalism, not the Kali Yuga · From substackfwd.xyz.
URL : https://www.reddit.com/r/thomastheplankengine/comments/1bhbjy0/i_had_a_dream_that_this_screenshot_went_viral/  title : I had a dream that this screenshot went viral because nobody could ...
Snippet : 16 hours ago ... ... it clearly says Obama. Upvote 1. Downvote Reply reply. Share. u/Terracatlegend avatar · Terracatlegend. • in 23h. Obamna. Upvote 7. Downvote Reply reply. Share.
URL : https://twitter.com/GarakObamna/status/1769526225440264597  title : Garak Obamna on X: "Please God make this happen" / X
Snippet : 14 hours ago ... Spanian is becoming an icon. He has done some great videos on some of the most extreme communities in the world. I think its time he does 

In [336]:
from typing import Any, Dict, Iterator, List, Optional, Sequence, Union
from bs4 import BeautifulSoup
import re

def extract_text(html):
	soup = BeautifulSoup(html.content, 'html.parser')
	text = re.sub(r'\s+', ' ', soup.get_text()).strip()
	return text

class WebLoader(WebBaseLoader):
    # self.requests_kwargs.update({"headers": headers})
    def _scrape(self, url: str, parser: Union[str, None] = None, bs_kwargs: Optional[dict] = None,) -> Any:
        html_doc = self.session.get(url, **self.requests_kwargs)
        if self.raise_for_status:
            html_doc.raise_for_status()
        
        html_doc.encoding = html_doc.apparent_encoding
        text = extract_text(html_doc)
        return BeautifulSoup(text, 'html.parser')

In [334]:
nest_asyncio.apply()

In [335]:
headers = {'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}

loader = WebLoader(links[:])
loader.session.headers = headers
loader.requests_per_second = 5
docs = loader.load()
docs

[Document(page_content='Barack Obama: Biography, 44th U.S. President, PoliticianSearchWomen’s HistoryHistory & CultureMusiciansMovies & TVAthletesArtistsPower & PoliticsBusinessScholars & EducatorsScientistsActivistsNotorious FiguresBIO BuysNewsletterYour Privacy ChoicesPrivacy NoticeTerms Of UseSkip to ContentWomen’s HistoryMusiciansMovies & TVAthletesNewsletterFamous Political FiguresU.S. PresidentsBarack ObamaBarack ObamaThe 44th president of the United States, Barack Obama is the first Black American who has been elected to the Oval Office. He served from 2009 until 2017.By Tyler Piccotti and Biography.com EditorsUpdated: May 1, 2023Courtesy The White HouseWe may earn commission from links on this page, but we only recommend products we back.Jump to:Who Is Barack Obama?Quick FactsEarly Life and FamilyEducationMarriage to Michelle Obama and DaughtersIllinois Political Career2008 Presidential Election and Inauguration First Term as U.S. PresidentSecond Term as U.S. PresidentNotable S

In [337]:
t = docs[8].page_content
len(t)

27258

In [273]:
response = model.generate_content("Return only the relevant text from this text from html article:" + t, safety_settings=safe)

print(response.text)

**What is the Affordable Care Act?**

The Affordable Care Act, also known as Obamacare, is a law enacted in 2010 that has expanded access to health insurance for millions of Americans. It helps people get health insurance who may not otherwise get coverage because of lack of workplace health insurance, their finances, or pre-existing conditions.

**How does Obamacare work?**

The ACA helps people get health insurance through a health insurance marketplace (at HealthCare.gov). People who have household incomes between 100% and 400% of the federal poverty level can receive premium tax credits to lower health care costs and make critical procedures and medications available.

**Who qualifies for Obamacare?**

To qualify for Obamacare, you must meet these requirements:

Live in the U.S.
Be a U.S. citizen or national
Not incarcerated

**What does Obamacare cover?**

ACA plans must cover these 10 essential health benefits, at a minimum:

Ambulatory patient/outpatient services
Emergency servi

In [147]:
results = DDGS().suggestions(query)

# async
results 

[{'phrase': 'obamna soda'},
 {'phrase': 'obamna meme'},
 {'phrase': 'obama mp3'},
 {'phrase': 'obama trump'},
 {'phrase': 'obama gaming'},
 {'phrase': 'obama sound'},
 {'phrase': 'obamacare'},
 {'phrase': 'obama amnesty'}]

### Suggestions/ related

In [152]:
model = genai.GenerativeModel('gemini-1.0-pro-latest')
response = model.generate_content("Generate a more detailed internet search query based on this key word: zaphod beeblebrox", safety_settings=safe)

print(response.text)

Zaphod Beeblebrox character analysis
Zaphod Beeblebrox quotes
Zaphod Beeblebrox personality type
Zaphod Beeblebrox leadership style
Zaphod Beeblebrox role in The Hitchhiker's Guide to the Galaxy
Zaphod Beeblebrox behind the scenes
Zaphod Beeblebrox actor
Zaphod Beeblebrox voice actor
Zaphod Beeblebrox fan fiction
Zaphod Beeblebrox fan art
