In [None]:
import asyncio
import requests
from concurrent.futures import ThreadPoolExecutor
import json
import pandas as pd
import os
from urllib.parse import urlparse

### Get image from url

In [None]:
def GetImage(fname,url='',path="data/",update=False):
    if not update:
        exists = os.path.isfile(path+fname)
        if exists:
            print("# Imagen ",fname," lista")
            return 1
    try:
        r=s.get(url,timeout=(600,600))
        r.raise_for_status()       
    except requests.exceptions.RequestException as e:
        print("Failed to get "+url,fname)
        return 0
    with open(path+fname, 'wb') as f:
        f.write(r.content)
        return 1

### Get JSON data

In [None]:
def ObtenerDatosMesa(fname,req,site='https://resultados2019.tse.org.gt/201901/',path="data/",update=False):
    if not update:
        exists = os.path.isfile(path+fname)
        if exists:
            print("# ACTA ",fname," lista")
            d=AbrirDatosMesa(path+fname)
            return 1,d
    try:
        r=s.get(site+req,timeout=(600,600))
        r.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(e)
        print("Failed to get "+site+req,fname)
        return 0,0
    
    my_json = r.content.decode('utf8')
    if len(my_json):
        with open(path+fname,'w') as outfile:  
            outfile.write(my_json)
        d = json.loads(my_json)
        return 1,d
    else:
        print(site+req)
        print('EMPTY RESPONSE')
        print(r.content)
        return 0,0

### Open JSON file

In [None]:
def AbrirDatosMesa(fname):
    with open(fname) as json_file:  
        data = json.load(json_file)
    return data

### Get data for a given "mesa"

In [None]:
def ProcesarMesa(mesa):
    data_name="mesa_"+'{0:06d}'.format(mesa)+'.json'
    req="api.php?mesa={}&vista=MESA&token={}".format(mesa,token)
    #req="api.php?mesa={}&vista=MESA".format(mesa) #old version didn't need the token
    archivos_completos=0
    datos_completos=True
    
    resp=ObtenerDatosMesa(data_name,req)
    locations={}
    if resp[0]:
        datos_completos=True
        d = resp[1]
        for acta in d['TE']:
            url=acta['IMGSRC']
            purl=urlparse(url)
            fname=os.path.basename(purl.path)
            locations[fname]=url
    else:
        datos_completos=False
        return(datos_completos,archivos_completos)
    for j in range(1,n_papeletas+1):
        fname='{0:06d}'.format(mesa*10+j)+'.jpg'        
        if fname in locations:
            if GetImage(fname,locations[fname]):
                archivos_completos+=1
        else:
            print("UNK ",fname)
    print("M",mesa,datos_completos,archivos_completos)
    return(datos_completos,archivos_completos)

# Task scheduler

In [None]:
async def get_data_asynchronous(path="data/",startFromZero=True):
    
    mesas=[]
    if startFromZero:
        mesas=range(start,end)
    else:
        df=pd.read_csv('data/results.csv')
        for index, row in df.iterrows():
            if row['datos']==False or row['actas']<5:
                mesas.append(int(row['mesa']))
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        loop = asyncio.get_event_loop()
        tasks = [
                loop.run_in_executor(
                    executor,
                    ProcesarMesa,
                    mesa
                )
            for mesa in mesas
        ]
        res=await asyncio.gather(*tasks)
        with open(path+'results.csv', 'w') as f:
            f.write("mesa,datos,actas\n")
            for i,response in enumerate(res):
                print("Mesa {} response {}".format(mesas[i],response))
                f.write("{},{},{}\n".format(mesas[i],response[0],response[1]))
                pass

### Create a session and init parameters

In [None]:
##TODO VERY UGLY global variables, need manual restart on server fault
s=requests.Session()
r=s.get('https://resultados2019.tse.org.gt/201901/')
my_json = r.content.decode('utf8')

#Extract token from the session
sv='vista=MESA&token='
if sv in my_json:
    idx=my_json.index(sv)+len(sv)
    token=my_json[idx:idx+40]


total_mesas=21100
start=1
end=total_mesas
n_papeletas=5
site='https://resultados2019.tse.org.gt/img/201901/'

#### Get singe 'mesa' data

In [None]:
ProcesarMesa(21099)

### Asynchronous download all the data within start and end

In [None]:
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(get_data_asynchronous())
loop.run_until_complete(future)