# OPOL Python Client

### Initialization

#### Setup Client

In [22]:
from opol.main import OPOL
import os
from pprint import pprint

# 1. Setup client (using opol.io)
# opol = OPOL(api_key="")

# Alternatively - if you have booted the opol stack on your machine (no api key needed)
# Read opol/opol/stack/Readme.md for more information
opol = OPOL(mode="local")




#### Articles

In [3]:
from opol.main import OPOL
import os
from pprint import pprint

opol = OPOL(mode="local", api_key="")

# Class submethods default to search
print(opol.articles.get_articles(query="apple"))
print(opol.articles("New York", pretty=True))    

# Use date
articles = opol.articles.get_articles("Berlin", limit=100)

entities = [article['title'] for article in articles[:3]]
entity_ids = [article['id'] for article in articles]
print(entities)
print(entity_ids)

geojson = opol.geo.by_id(entity_ids)
print(geojson[0]['features'])


Request error occurred: timed out for URL: http://localhost:5434/contents


ReadTimeout: timed out

In [6]:
from opol.main import OPOL
import os
from pprint import pprint

opol = OPOL(mode="local", api_key="")

articles = opol.articles.by_entity("Trump", limit=100)

for article in articles:
    print(article.keys())
    print(f"{article['title']}:" + article['insertion_date'] + "\n")

dict_keys(['id', 'url', 'title', 'source', 'insertion_date', 'text_content', 'top_image', 'entities', 'tags', 'evaluation'])
Electoral College: How it’s changed this year:2024-12-17T23:01:11.108817

dict_keys(['id', 'url', 'title', 'source', 'insertion_date', 'text_content', 'top_image', 'entities', 'tags', 'evaluation'])
Canada's finance minister quits over Trump tariff dispute with Trudeau:2024-12-17T23:01:11.104342

dict_keys(['id', 'url', 'title', 'source', 'insertion_date', 'text_content', 'top_image', 'entities', 'tags', 'evaluation'])
What does a US government shutdown actually mean? – DW – 12:2024-12-20T15:01:22.091629

dict_keys(['id', 'url', 'title', 'source', 'insertion_date', 'text_content', 'top_image', 'entities', 'tags', 'evaluation'])
Chaotic scramble to avert shutdown highlights challenges ahead for Johnson:2024-12-21T06:01:01.186669

dict_keys(['id', 'url', 'title', 'source', 'insertion_date', 'text_content', 'top_image', 'entities', 'tags', 'evaluation'])
Will US Pre

#### Geojson & Geocoding

In [9]:
from opol.main import OPOL
import os

# set env variable
os.environ['OPOL_MODE'] = "remote"

opol = OPOL(api_key="")

geojson = opol.geo.json_by_event("War", limit=5)
print(geojson)

berlin_coords = opol.geo.code("Berlin")["coordinates"]
print(berlin_coords)




[13.407032, 52.524932]


#### Polls

In [5]:
from opol.main import OPOL
from pprint import pprint

opol = OPOL(mode="local")

# polls = opol.scraping.polls("Germany")

# for pol in polls:
#     if pol['party'] == "GRÜNE" or pol['party'] == "CDU/CSU":
#         print(pol)


# # Latest Polls for each Insittute
# polls = opol.scraping.polls("Germany", latest=True)

# for poll in polls:
#     print(poll)

ranked_polls = opol.scraping.polls("Germany", summarised=True)

for poll in ranked_polls:
    print(poll['party'], poll['percentage'])

CDU/CSU 32.0
AfD 18.75
SPD 16.0
GRÜNE 13.0
BSW 4.75
FDP 3.5
LINKE 2.75
FW 0.0


### Legislation

In [20]:
from opol.main import OPOL

opol = OPOL(mode="local")

events = opol.scraping.legislation("Germany")


for event in events[:3]:
    print(event)

{'law': 'Wahl eines Mitglieds des Parlamentarischen Kontrollgremiums gemäß Artikel 45d des Grundgesetzes', 'status': 'Noch nicht beraten', 'label': 'red', 'date': '2024-12-23', 'initiative': 'Fraktion der AfD', 'href': 'https://dip.bundestag.de/suche?term=318947&f.wahlperiode=20&rows=25'}
{'law': 'Wahl eines Stellvertreters der Präsidentin', 'status': 'Noch nicht beraten', 'label': 'red', 'date': '2024-12-23', 'initiative': 'Fraktion der AfD', 'href': 'https://dip.bundestag.de/suche?term=318946&f.wahlperiode=20&rows=25'}
{'law': 'Erteilung eines Ordnungsrufes an den Abg Karsten Hilse (AfD)', 'status': None, 'label': 'red', 'date': '2024-12-20', 'initiative': None, 'href': 'https://dip.bundestag.de/suche?term=318944&f.wahlperiode=20&rows=25'}


### Economics

In [17]:
from opol.main import OPOL

opol = OPOL(mode="local")

all_events = opol.scraping.economic("Germany")

gdp_events = opol.scraping.economic("Germany", indicators=["GDP"])


for event in all_events[:3]:
    print(event)

print("*"*20)

for event in gdp_events[:3]:
    print(event)

{'year': '2020', 'B1GQ+B1GQ': 3940.14254135403}
{'year': '2023', 'B1GQ+B1GQ': 4525.70390362753}
{'year': '2002', 'B1GQ+B1GQ': 2102.35079830589}
********************
{'year': '2020', 'B1GQ+B1GQ': 3940.14254135403}
{'year': '2023', 'B1GQ+B1GQ': 4525.70390362753}
{'year': '2002', 'B1GQ+B1GQ': 2102.35079830589}


### LLM Classifications (custom llm wrapper)

In [5]:
from opol.api.classification import Classification
from opol.main import OPOL
import os

# Initialize the OPOL client in local mode
opol = OPOL(mode="local")

# Set the API key for accessing the Google Generative AI service
# api_key = ""
api_key = os.environ["GOOGLE_API_KEY"]


# Initialize the classification service with the specified provider, model, and API key
xclass = opol.classification(provider="Google", model_name="models/gemini-1.5-flash-latest", llm_api_key=api_key)

# Example 1: Classify user preference for frozen dairy products on a scale of 1 to 10
user_input = "I love ice cream"
prompt = "On a 1-10 scale how much the user likes frozen dairy products"
int_value = xclass.classify("int", prompt, user_input)
print("User preference rating:", int_value)

# Example 2: Extract keywords from a given text
text = "In the madagascan wilds the biggest wild animal is the elephant in the globe finance trade is offshore jusrisdiction diffusion"
instruction = "The topics relevant to this text. Only semantically relevant to a content system"
keywords = xclass.classify("List[str]", instruction, text)
print("Extracted keywords:", keywords)

# Example 3: Use a Pydantic model to classify request types with keywords and relevance level
from pydantic import BaseModel, Field
from typing import List

class RequestType(BaseModel):
    """
    This is the main classification for incoming request types.
    """
    keywords: List[str] = Field(description="The keywords relevant to this text. Only semantically relevant to a content system")
    relevance_level: int = Field(description="On a 1-10 scale how much the content is relevant")

# Classify the request using the Pydantic model
request = xclass.classify(RequestType, instruction, text)
print("Classified request:", request)

## Results
# User preference rating: 10
# Extracted keywords: ['madagascan', 'wilds', 'biggest', 'animal', 'elephant']
# Classified request: keywords=['madagascar', 'wildlife', 'elephant'] relevance_level=8


KeyError: 'GOOGLE_API_KEY'

In [7]:
from opol.main import OPOL
import os
from pprint import pprint

# 1. Setup client (using opol.io)
# opol = OPOL(api_key="")

# Alternatively - if you have booted the opol stack on your machine (no api key needed)
# Read opol/opol/stack/Readme.md for more information
opol = OPOL(mode="local")


for poll in opol.scraping.polls("Germany", latest=True):
    print(poll)



{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'CDU/CSU', 'percentage': 36.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'SPD', 'percentage': 16.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'GRÜNE', 'percentage': 12.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'FDP', 'percentage': 4.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'LINKE', 'percentage': 0.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'AfD', 'percentage': 18.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'FW', 'percentage': 0.0}
{'date': '2024-12-20', 'institute': 'Allensbach', 'party': 'BSW', 'percentage': 6.0}
{'date': '2024-12-23', 'institute': 'Forsa', 'party': 'CDU/CSU', 'percentage': 31.0}
{'date': '2024-12-23', 'institute': 'Forsa', 'party': 'SPD', 'percentage': 16.0}
{'date': '2024-12-23', 'institute': 'Forsa', 'party': 'GRÜNE', 'percentage': 13.0}
{'date': '2024-12-23', 'institute': 'Forsa', 'party': 'FDP',

### Search Engine (Google, Bing, Wikipedia, Axiv)

In [3]:
from opol.main import OPOL
from pydantic import BaseModel
from typing import List, Optional

opol = OPOL(mode="local")

articles = opol.search.engine("Latest news from Gaza", engine="wikipedia")


for article in articles[:2]:
    print(article)

print("*"*20)
images = opol.search.image("Latest news from Gaza")

for image in images[:6]:
    print(image)



url='https://www.bbc.com/news/topics/c2vdnvdg6xxt' title='Israel Gaza war | Latest News & Updates' content="Children among dozens killed in Israeli strikes, Gaza officials say. A school sheltering displaced families was hit, according to Gaza's civil defence agency." publishedDate=None thumbnail=None engine='google' parsed_url=['https', 'www.bbc.com', '/news/topics/c2vdnvdg6xxt', '', '', ''] template='default.html' engines=['duckduckgo', 'google'] positions=[1, 1] score=4.0 category='general'
url='https://www.aljazeera.com/tag/gaza/' title="Gaza | Today's latest from Al Jazeera" content='Two Gaza Civil Defence workers killed as Israel launches multiple strikes. At least six other people killed and many wounded in an Israeli raid on a house in ...' publishedDate=None thumbnail=None engine='brave' parsed_url=['https', 'www.aljazeera.com', '/tag/gaza/', '', '', ''] template='default.html' engines=['brave', 'google'] positions=[1, 2] score=3.0 category='general'
********************
url='h

### Embeddings

In [3]:
from opol.main import OPOL

opol = OPOL(mode="local")

embeddings = opol.embeddings("What is the capital of Germany?")

print(embeddings)
print(len(embeddings))


[0.0424441359937191, 0.03726011887192726, -0.02247757278382778, -0.012588790617883205, 0.04560314491391182, 0.04935615882277489, 0.017185555770993233, 0.005008515901863575, -0.01972356252372265, -0.05956219136714935, -0.020115064457058907, -0.12333639711141586, 0.00643614586442709, -0.012156789191067219, -0.010624534450471401, 0.008957278914749622, -0.02515058033168316, 0.013209792785346508, -0.031887102872133255, -0.005450642667710781, 0.006557646207511425, 0.007857025600969791, 0.008444277569651604, -0.05348717048764229, -0.000762752431910485, 0.02203207090497017, -0.001440285937860608, -0.02783709019422531, -0.08472627401351929, -0.10643434524536133, -0.004360514227300882, -0.08067625761032104, 0.04741215333342552, 0.01102953590452671, -0.012352540157735348, 0.020263565704226494, 0.04128313437104225, -0.002538008149713278, 0.02840409055352211, -0.05300116911530495, 0.005460767541080713, 0.015741050243377686, 0.040392130613327026, -0.0274185873568058, 0.03407410904765129, -0.02733758

## Full Flow

In [1]:
from opol.main import OPOL
from pydantic import BaseModel, Field
from typing import List
import os
import numpy as np

opol = OPOL(mode="local")
api_key = "AIzaSyDrZ-Ap8XwU-KE9Rz3A8Xii1jY2vNHNT8I"
fastclass = opol.classification(provider="Google", model_name="models/gemini-1.5-flash-latest", llm_api_key=api_key)

class ExpandedQuery(BaseModel):
    """
    As a professional web researcher, your task is to generate a set of three queries that explore the subject matter more deeply, building upon the initial query and the information uncovered in its search results.

    For instance, if the original query was "Starship's third test flight key milestones", your output should follow this format:

    Aim to create queries that progressively delve into more specific aspects, implications, or adjacent topics related to the initial query. The goal is to anticipate the user's potential information needs and guide them towards a more comprehensive understanding of the subject matter.
    Please match the language of the response to the user's language.
    Return three queries.
    """
    similar: List[str] = None

query = "Whats happening in Ghaza?"
expanded_query = fastclass.classify(ExpandedQuery, "", query)

similar_queries = expanded_query.similar

all_articles = []

# Collect articles (max 50)
counter = 0
for query in similar_queries:
    articles = opol.search.engine(query)
    for article in articles:
        if counter > 10:
            break
        all_articles.append(article)
        counter += 1

# Rerank articles based on similarity to the query
query_embedding = opol.embeddings.get_embeddings(similar_queries[0])
ranked_articles = opol.embeddings.rerank_articles(query_embedding, all_articles, text_field="title")

# Print top 2 ranked articles
for article in ranked_articles[:2]:
    for field in article[0]:
        print(field)
    print("*"*20)

  from .autonotebook import tqdm as notebook_tqdm


('url', 'https://concernusa.org/news/gaza-crisis-explained/')
('title', 'The humanitarian crisis in Gaza, explained')
('content', 'May 8, 2024 — A massive humanitarian crisis has been spiraling out of control in Gaza, destroying homes, infrastructure, families, and lives.')
('publishedDate', None)
('thumbnail', None)
('engine', 'google')
('parsed_url', ['https', 'concernusa.org', '/news/gaza-crisis-explained/', '', '', ''])
('template', 'default.html')
('engines', ['google'])
('positions', [5])
('score', 0.2)
('category', 'general')
********************
('url', 'https://www.reuters.com/world/middle-east/details-humanitarian-crisis-gaza-2024-05-01/')
('title', 'Details of the humanitarian crisis in Gaza')
('content', 'June 4, 2024 - The Gaza Strip is suffering an unfolding humanitarian catastrophe nearly eight months since Israel launched a devastating offensive in response to the Oct. 7 Hamas-led attacks that killed 1,200 people in Israel.')
('publishedDate', '2024-06-04T00:00:00')
('t