# API Trials

## Setup

In [1]:
from rich import print

## Arxiv API with `urllib`

In [59]:
import urllib, urllib.request
# url = 'http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=3'
url = "http://export.arxiv.org/api/query?search_query=ti:deep+AND+ti:learning&start=0&max_results=3"
data = urllib.request.urlopen(url)
xml_res = data.read().decode('utf-8')
print(xml_res)


In [60]:
import xmltodict
d = xmltodict.parse(xml_res)
print(d["feed"]["entry"])

In [44]:
print(d["feed"]["entry"][2]['summary'])

## Arxiv API with `arxiv`

In [5]:
import arxiv
from rich import print

search = arxiv.Search(
  # query = "ti:quantum",
  # query = "abs:graph",
  # query = "ti:quantum+OR+abs:graph",
  query="ti:deep AND ti:learning",
  max_results = 3,
  sort_by = arxiv.SortCriterion.SubmittedDate
)

for result in search.results():
  print(f"--- {result.title} [{result.published}] ---")
  print(result.summary)

In [38]:
with open("../data/03_primary/arxiv_dict_2023-09-04_01-09-05.json", "r") as f:
    info_dict = json.load(f)["results"]

In [40]:
import pandas as pd
pd.DataFrame(info_dict).transpose().head()

Unnamed: 0,title,abstract,date,category
http://arxiv.org/abs/2308.16730v1,"Proof of Deep Learning: Approaches, Challenges...",The rise of computational power has led to unp...,2023-08-31 13:49:04+00:00,cs.CR
http://arxiv.org/abs/2308.16686v1,Exploring the Polarization of Axially Symmetri...,The measurement of non-zero polarization can b...,2023-08-31 12:42:16+00:00,astro-ph.SR
http://arxiv.org/abs/2308.16637v1,Learning Channel Importance for High Content I...,Uncovering novel drug candidates for treating ...,2023-08-31 11:11:38+00:00,cs.CV
http://arxiv.org/abs/2308.16530v1,Privacy-Preserving Medical Image Classificatio...,Deep learning (DL)-based solutions have been e...,2023-08-31 08:21:09+00:00,cs.CR
http://arxiv.org/abs/2308.16083v1,Learned Image Reasoning Prior Penetrates Deep ...,The success of deep neural networks for pan-sh...,2023-08-30 15:15:31+00:00,cs.CV


## Semantics Scholar API with `urllib`

## Google News with `GoogleNews`
[https://github.com/Iceloof/GoogleNews](https://github.com/Iceloof/GoogleNews)

## Google News with `GNews`
[https://github.com/ranahaani/GNews/](https://github.com/ranahaani/GNews/)

In [98]:
from gnews import GNews

google_news = GNews()
found_news = google_news.get_news('France')
print(found_news[:5])

## `newspaper3k`

In [90]:
import newspaper
cnn_paper = newspaper.build('http://cnn.com')
# cnn_paper = newspaper.build("https://www.sudouest.fr/")

print([article.url for article in cnn_paper.articles[:10]])

In [91]:
print(cnn_paper.category_urls()[:5])

In [96]:
# cnn_article = cnn_paper.articles[0]
# cnn_article.download()
# cnn_article.parse()
# cnn_article.nlp()

## `News API`

In [7]:
# load API key
import yaml
with open('../conf/local/credentials.yml', 'r') as file:
    credentials = yaml.safe_load(file)

In [46]:
from newsapi import NewsApiClient

# Init
newsapi = NewsApiClient(api_key=credentials["news_api"]["key"])

# /v2/top-headlines
top_headlines = newsapi.get_top_headlines(
    # q='bitcoin',
    q='',
    # sources='bbc-news,the-verge',
    # category='business',
    language='en',
    # country='us'
)

# /v2/everything
all_articles = newsapi.get_everything(
    # q='bitcoin',
    q='economy',
    # sources='bbc-news,the-verge',
    # domains='bbc.co.uk,techcrunch.com',
    from_param='2023-09-29',
    to='2023-10-02',
    language='en',
    sort_by='relevancy',
    page=2
)

# /v2/top-headlines/sources
sources = newsapi.get_sources()

In [47]:
# print(top_headlines)
print(top_headlines["articles"][:3])

In [51]:
# print(all_articles)
print("totalResults: ", all_articles["totalResults"])
print(all_articles["articles"][:3])

In [52]:
# print(sources)