In [3]:
import requests
import json
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np

We can connect to public APIs and download data. This one corresponds to the international station.

In [16]:
response = requests.get('http://api.open-notify.org/iss-now.json')
response.text

'{"iss_position": {"latitude": "-44.7272", "longitude": "-128.6375"}, "timestamp": 1653589285, "message": "success"}'

We can convert a json-formatted string such as the one we get in the response into a Python object with the json library.

In [17]:
response = json.loads(response.text)
response

{'iss_position': {'latitude': '-44.7272', 'longitude': '-128.6375'},
 'timestamp': 1653589285,
 'message': 'success'}

In [30]:
response['iss_position']['latitude']

'-44.7272'

In [31]:
response['iss_position']['longitude']

'-128.6375'

We also can go in the other direction and generate json-formatted strings from Python objects:

In [32]:
profs = [{'name': 'Dani'}, {'name': 'Toni'}]
json.dumps(profs)

'[{"name": "Dani"}, {"name": "Toni"}]'

In [34]:
response = requests.get('https://api.agify.io?name=jorge')
response.text

'{"name":"jorge","age":62,"count":114531}'

In [57]:
def age(name='Grandma'):
    response = requests.get('https://api.agify.io?name='+name)
    json_age = json.loads(response.text)['age']
    return f'{name} is {json_age} years old.'

In [58]:
age()

'Grandma is 72 years old.'

In [52]:
parameters = {'name': 'Jorge', 'country_id': 'ES'}

In [59]:
response = requests.get('https://api.agify.io?name=jorge', params=parameters)
response.text

'[{"name":"jorge","age":46,"count":36357,"country_id":"ES"},{"name":"Jorge","age":46,"count":36357,"country_id":"ES"}]'

In order to read web pages (html) we need to use BeautifulSoup library.

## Exercise

Get all the articles on the front page os elpais.com (title and URL)

Store it in a csv file.

In [63]:
response = requests.get('https://elpais.com')
soup = BeautifulSoup(response.text)

In [64]:
articles = soup.findAll('article')

In [65]:
titulares = []
enlaces = []
for a in articles:
    # first we find all links on the front page
    link = a.find('a', href=True)
    enlaces.append(link['href'])
    
    # second we get the text from the respective class
    titulo = a.find('h2', attrs={'class': 'c_t'})
    titulares.append(titulo.text)

    # after appending it to empty lists we just need to store it in a csv.


df = pd.DataFrame({'Titular':titulares, 'URL': enlaces})

In [66]:
df.to_csv('elpais.csv', index=False)