# Search

> Search for topics using SearXNG - a popular open-source meta-search engine


In [None]:
# | default_exp search

## Installing SearXNG

[Docker Installation Instructions](https://docs.searxng.org/admin/installation-docker.html#installation-docker)

## Running

SearXNG can be started thusly

```
docker run -it \
    -d -p 8080:8080 \
    -v "${PWD}/searxng:/etc/searxng" \
    -e "BASE_URL=http://localhost:8080/" \
    -e "INSTANCE_NAME=my-instance" \
    searxng/searxng
```


In [None]:
# | export

import requests
import msgspec
import os

In [None]:
# | hide

from rich.pretty import pprint

# Using SearXNG to get results back in json format

The easiest way to search a number of sources and get results back in JSON format is
to use SearXNG's open-source meta-search engine. JSON responses needs to be enabled in
SeacXNG (by editing `settings.yml` in order to work.)


In [None]:
# Define the search query
query = "!news Continental"

# Define the SearXNG search API endpoint
api_endpoint = "http://localhost:8080/search"


# Define the request parameters
params = {
    "q": query,
    "format": "json",
    "language": "auto",
    "time_range": "month",
    "safesearch": 0,
}

search_results = []

for page in range(1, 3):
    params["pageno"] = page
    # Send the GET request to the SearXNG search API
    response = requests.get(api_endpoint, params=params)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Get the search results
        page_results = response.json()
        # Add search results to the end of the list
        search_results.extend(page_results["results"])
    else:
        print("Error occurred while performing the search.")

for i, result in enumerate(search_results):
    print(f"{i}: {result['title']}")

0: Continental Congress, families faced hardship during, after Revolutionary War stay in York
1: Slow sales drags Germany's Continental to a loss in Q1
2: Continental Backs Outlook Despite Net Loss
3: Continental AG Slips To Q1 Loss On Weak Sales; Sees Earnings Growth Ahead, Confirms FY24 View
4: Continental goes from profit to 53 million loss in the 1st quarter
5: Continental Confirms 1Q Preliminary Figures
6: Continental fined 100 million euros for role in VW emissions scandal
7: Why is the Olympic Torch going on a cross continental journey from Olympus to Paris?
8: Continental earnings: here's what to expect
9: “Sufficient potential for improvement” – Continental publishes Q1 2024 results
10: Spring Continental Market to return to grounds of Belfast City Hall this May
11: Auto parts supplier Continental agrees to pay €100m in diesel scandal
12: Continental earnings: here's what to expect
13: “Sufficient potential for improvement” – Continental publishes Q1 2024 results
14: Continent

### Format of data coming back


In [None]:
pprint(search_results[0])

In [None]:
# | export


class SearchResult(msgspec.Struct):
    title: str
    content: str
    url: str
    image_url: str
    source: str

    @classmethod
    def from_dict(cls, d):
        return cls(
            title=d["title"],
            content=d["content"],
            url=d["url"],
            image_url=d["img_src"],
            source=", ".join(d["engines"]),
        )


class SearchResults(msgspec.Struct):
    page: int
    results: list[SearchResult]

    @classmethod
    def from_list(cls, l: list[dict], page: int):
        return cls(
            page=page,
            results=[SearchResult.from_dict(d) for d in l],
        )

In [None]:
# | export


def run_query(query: str, page: int, time_range: str = "month") -> list[SearchResult]:
    api_endpoint = os.environ["SEARCH_ENDPOINT"]
    params = {
        "q": query,
        "format": "json",
        "language": "auto",
        "time_range": time_range,
        "safesearch": 0,
        "pageno": page,
    }
    response = requests.get(api_endpoint, params=params)
    if response.status_code == 200:
        page_results = response.json()
        return [SearchResult.from_dict(d) for d in page_results["results"]]
    else:
        return []

In [None]:
search_results = run_query("!news Trimble", 1, time_range="week")
pprint(search_results)

In [None]:
# | hide
import nbdev

nbdev.nbdev_export()