# API Trials

## Setup

In [None]:
import json
import urllib
import urllib.request

import arxiv
import pandas as pd
import xmltodict
from rich import print

## Arxiv API with `urllib`

In [None]:
# url = 'http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=3'
url = "http://export.arxiv.org/api/query?search_query=ti:deep+AND+ti:learning&start=0&max_results=3"
data = urllib.request.urlopen(url)
xml_res = data.read().decode("utf-8")
print(xml_res)

In [None]:
d = xmltodict.parse(xml_res)
print(d["feed"]["entry"])

In [None]:
print(d["feed"]["entry"][2]["summary"])

## Arxiv API with `arxiv`

In [None]:
search = arxiv.Search(
    # query = "ti:quantum",
    # query = "abs:graph",
    # query = "ti:quantum+OR+abs:graph",
    query="ti:deep AND ti:learning",
    max_results=3,
    sort_by=arxiv.SortCriterion.SubmittedDate,
)

for result in search.results():
    print(f"--- {result.title} [{result.published}] ---")
    print(result.summary)

In [None]:
with open("../data/03_primary/arxiv_dict_2023-09-04_01-09-05.json", "r") as f:
    info_dict = json.load(f)["results"]

In [None]:
pd.DataFrame(info_dict).transpose().head()

## Semantics Scholar API with `urllib`

## Google News with `GoogleNews`
[https://github.com/Iceloof/GoogleNews](https://github.com/Iceloof/GoogleNews)

## Google News with `GNews`
[https://github.com/ranahaani/GNews/](https://github.com/ranahaani/GNews/)

In [None]:
from gnews import GNews

google_news = GNews()
found_news = google_news.get_news("France")
print(found_news[:5])

## `newspaper3k`

In [None]:
import newspaper

cnn_paper = newspaper.build("http://cnn.com")
# cnn_paper = newspaper.build("https://www.sudouest.fr/")

print([article.url for article in cnn_paper.articles[:10]])

In [None]:
print(cnn_paper.category_urls()[:5])

In [None]:
# cnn_article = cnn_paper.articles[0]
# cnn_article.download()
# cnn_article.parse()
# cnn_article.nlp()

## `News API`

In [None]:
# load API key
import yaml

with open("../conf/local/credentials.yml", "r") as file:
    credentials = yaml.safe_load(file)

In [None]:
from newsapi import NewsApiClient

# Init
newsapi = NewsApiClient(api_key=credentials["news_api"]["key"])

# /v2/top-headlines
top_headlines = newsapi.get_top_headlines(
    # q='bitcoin',
    q="",
    # sources='bbc-news,the-verge',
    # category='business',
    language="en",
    # country='us'
)

# /v2/everything
all_articles = newsapi.get_everything(
    # q='bitcoin',
    q="economy",
    # sources='bbc-news,the-verge',
    # domains='bbc.co.uk,techcrunch.com',
    from_param="2023-09-29",
    to="2023-10-02",
    language="en",
    sort_by="relevancy",
    page=2,
)

# /v2/top-headlines/sources
sources = newsapi.get_sources()

In [None]:
# print(top_headlines)
print(top_headlines["articles"][:3])

In [None]:
# print(all_articles)
print("totalResults: ", all_articles["totalResults"])
print(all_articles["articles"][:3])

In [None]:
# print(sources)