# Adquisición de datos

- ficheros (csv, txt, json, xml, tsv)
- API
- base de datos (SQL - noSQL)
- webscraping
- almacenamiento en la nube (cloud Azure - AWS - GCP etc)

## API

- como funciona un servidor
- requests
- status (2xx, 3xx, 4xx, 5xx)
- formato (json --> dict) 

### Manipulación ficheros del tipo python `Dict` / `List` <--> [ JSON = string ]

- serialización (dict / list --> json)
- deserialización (json --> dict / list)

In [3]:
# Creamos un diccionario
dic_ejemplo = {'nombre' : 'Maria', 'apellidos' : {'apellido1' : "Perez", 'apellido2':"Garcia"},
               'edad':45, 'geoloc':(-45.9, 18.9), 'mascotas':[{'perro':'Chuky'}, {'gato':'doraemon'}]}

In [4]:
type(dic_ejemplo)

dict

In [5]:
dic_ejemplo

{'nombre': 'Maria',
 'apellidos': {'apellido1': 'Perez', 'apellido2': 'Garcia'},
 'edad': 45,
 'geoloc': (-45.9, 18.9),
 'mascotas': [{'perro': 'Chuky'}, {'gato': 'doraemon'}]}

In [6]:
dic_ejemplo.keys()

dict_keys(['nombre', 'apellidos', 'edad', 'geoloc', 'mascotas'])

In [7]:
dic_ejemplo.items()

dict_items([('nombre', 'Maria'), ('apellidos', {'apellido1': 'Perez', 'apellido2': 'Garcia'}), ('edad', 45), ('geoloc', (-45.9, 18.9)), ('mascotas', [{'perro': 'Chuky'}, {'gato': 'doraemon'}])])

In [8]:
# Creamos una lista
lista_ejemplo = ['string', 15, 8.9, [4,5,6], None]
type(lista_ejemplo)

list

## Serialización tipo JSON

In [9]:
import json

In [10]:
# Mostrar la representación del fichero json del diccionario
json_dict = json.dumps(dic_ejemplo)
type(json_dict)

str

In [11]:
json_dict

'{"nombre": "Maria", "apellidos": {"apellido1": "Perez", "apellido2": "Garcia"}, "edad": 45, "geoloc": [-45.9, 18.9], "mascotas": [{"perro": "Chuky"}, {"gato": "doraemon"}]}'

In [12]:
print(json_dict)

{"nombre": "Maria", "apellidos": {"apellido1": "Perez", "apellido2": "Garcia"}, "edad": 45, "geoloc": [-45.9, 18.9], "mascotas": [{"perro": "Chuky"}, {"gato": "doraemon"}]}


In [13]:
json_dict.keys()

AttributeError: 'str' object has no attribute 'keys'

In [14]:
json_dict[0:5]

'{"nom'

In [15]:
json_dict['nombre']

TypeError: string indices must be integers

In [16]:
json_list = json.dumps(lista_ejemplo)
type(json_list)

str

In [17]:
json_list

'["string", 15, 8.9, [4, 5, 6], null]'

## Deserialización del json a dict / list

In [19]:
# Cargamos el json del formato string al formato dict de python
json_to_dict = json.loads(json_dict)
type(json_to_dict)

dict

In [20]:
json_to_dict['nombre']

'Maria'

In [22]:
json_to_dict

{'nombre': 'Maria',
 'apellidos': {'apellido1': 'Perez', 'apellido2': 'Garcia'},
 'edad': 45,
 'geoloc': [-45.9, 18.9],
 'mascotas': [{'perro': 'Chuky'}, {'gato': 'doraemon'}]}

In [23]:
dic_ejemplo

{'nombre': 'Maria',
 'apellidos': {'apellido1': 'Perez', 'apellido2': 'Garcia'},
 'edad': 45,
 'geoloc': (-45.9, 18.9),
 'mascotas': [{'perro': 'Chuky'}, {'gato': 'doraemon'}]}

In [24]:
dic_nuevo = json_to_dict

In [26]:
dic_nuevo == json_to_dict

True

In [27]:
json_to_list = json.loads(json_list)
type(json_to_list)

list

## Conexión con API / DB / Servidores

- requests GET / POST

In [29]:
!pip install requests

Collecting requests
  Downloading requests-2.30.0-py3-none-any.whl (62 kB)
     ---------------------------------------- 62.5/62.5 kB 3.3 MB/s eta 0:00:00
Collecting charset-normalizer<4,>=2
  Downloading charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl (97 kB)
     ---------------------------------------- 97.1/97.1 kB 5.4 MB/s eta 0:00:00
Collecting idna<4,>=2.5
  Using cached idna-3.4-py3-none-any.whl (61 kB)
Collecting certifi>=2017.4.17
  Downloading certifi-2023.5.7-py3-none-any.whl (156 kB)
     ---------------------------------------- 157.0/157.0 kB ? eta 0:00:00
Collecting urllib3<3,>=1.21.1
  Downloading urllib3-2.0.2-py3-none-any.whl (123 kB)
     -------------------------------------- 123.2/123.2 kB 7.1 MB/s eta 0:00:00
Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests
Successfully installed certifi-2023.5.7 charset-normalizer-3.1.0 idna-3.4 requests-2.30.0 urllib3-2.0.2


In [30]:
import requests

- api_url = http://api.postcodes.io/postcodes/

In [31]:
# Estructura de la solicitud requests
api_url = 'http://api.postcodes.io/postcodes/'
zipcode = 'NE30 1DP'

# Construimos el response
response = requests.get(api_url + zipcode)

In [55]:
response.raw

<urllib3.response.HTTPResponse at 0x243c44fd3d0>

In [33]:
if response.status_code in range(200, 299):
    True
else:
    "Error"

In [37]:
response.headers['Date']

'Fri, 19 May 2023 18:39:10 GMT'

In [41]:
response.headers.keys()

KeysView({'Date': 'Fri, 19 May 2023 18:39:10 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'x-gnu': 'Michael J Blanchard', 'access-control-allow-origin': '*', 'etag': 'W/"3b6-BrmR8ce2+HDRdMnRSvrjHypVDJE"', 'CF-Cache-Status': 'MISS', 'Report-To': '{"endpoints":[{"url":"https:\\/\\/a.nel.cloudflare.com\\/report\\/v3?s=hiKzionULmnUoHyEAF9DHpaTBeMuq9K2m7NG9ldVDuYCeQnD7XkWXqa0JJqDUsE%2FQdqfGU3l7AZRMjU2o6jm0BGKKPGZ1%2Bov%2FltCV3xVtv5VTu4pM4IydogK2W6b9eiGFW8%3D"}],"group":"cf-nel","max_age":604800}', 'NEL': '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}', 'Vary': 'Accept-Encoding', 'Server': 'cloudflare', 'CF-RAY': '7c9e760be80886c9-MAD', 'Content-Encoding': 'gzip'})

In [43]:
type(response.content)

bytes

In [48]:
diccionario = json.loads(response.content)
diccionario['result'].keys()

dict_keys(['postcode', 'quality', 'eastings', 'northings', 'country', 'nhs_ha', 'longitude', 'latitude', 'european_electoral_region', 'primary_care_trust', 'region', 'lsoa', 'msoa', 'incode', 'outcode', 'parliamentary_constituency', 'admin_district', 'parish', 'admin_county', 'date_of_introduction', 'admin_ward', 'ced', 'ccg', 'nuts', 'pfa', 'codes'])

In [49]:
import pandas as pd

In [62]:
diccionario['result']['region']

'North East'

In [63]:
new_dict = {
    'zipcode' : diccionario['result']['postcode'],
    'region' : diccionario['result']['region']
}

In [64]:
new_dict

{'zipcode': 'NE30 1DP', 'region': 'North East'}

In [78]:
df = pd.DataFrame(data=new_dict, index=[0])
df

Unnamed: 0,zipcode,region
0,NE30 1DP,North East


In [None]:
df['timestamp'] = datetime.datetime.now() 

In [79]:
import labs.creds

In [82]:
user, pwd, ruta = labs.creds.creds_openweath.values()

In [83]:
user

'nombre_usuario'