In [12]:
import asyncio
import requests
from concurrent.futures import ThreadPoolExecutor
import json
import pandas as pd
import os
from urllib.parse import urlparse

In [13]:
from ecdsa import SigningKey, SECP256k1, util
import hashlib 
import random, binascii

### Generate Key

In [14]:
def StringToKeyBytes(key):
        key = bytes(key, 'utf-8')
        key = binascii.unhexlify(key) 
        return key

In [15]:
def KeyBytesToString(key):
        key = binascii.hexlify(key)
        key = key.decode("utf-8")
        return key

In [16]:
random.seed(1) #This is for the example chose a proper random private_string
curve = SECP256k1
private_string = ''.join(['%x' % random.randrange(16) for x in range(0, 64)]) #This is for the example chose a proper random private_string
print("My Private key example: ",private_string," **SECRET** ")
private_key = StringToKeyBytes(private_string)
priv = SigningKey.from_string(private_key, curve=curve)
pub = priv.get_verifying_key().to_string()
pub_string = KeyBytesToString(pub)
print("-"*10)
print("-- DATA FOR PUBLIC --")
print("My Public key is: ", pub_string)
print("My Curve is ",curve.name)

My Private key example:  4283fefc63f0cd0e873a0000c6d07ef7b77e90d3593ad699fc1f7cd5bb2e35cb  **SECRET** 
----------
-- DATA FOR PUBLIC --
My Public key is:  04e42d50dae16e9c9d1b005cb6886270ecced5067316feff547a2ccc15fb68b49dc9c2ed56209b2de29039ea195adf02afee071ec108a721bdeb992059ba52ac
My Curve is  SECP256k1


### Get image from url

In [17]:
def GetImage(fname,url='',path="dataSig/",update=True):
    m = hashlib.sha256()
    if not update:
        exists = os.path.isfile(path+fname)
        if exists:
            print("# Imagen ",fname," lista")
            return 1
    try:
        r=s.get(url,timeout=(600,600))
        r.raise_for_status()       
    except requests.exceptions.RequestException as e:
        print("Failed to get "+url,fname)
        return 0
    with open(path+fname, 'wb') as f:
        m.update(r.content)
        print("SHA2 ",m.hexdigest())
        sig=priv.sign(r.content, hashfunc=hashlib.sha256, sigencode=util.sigencode_der)
        sig_string=KeyBytesToString(sig)
        print("SIG :", sig_string)
        f.write(r.content)
        return 1

### Get JSON data

In [19]:
def ObtenerDatosMesa(fname,req,site='https://resultados2019.tse.org.gt/201901/',path="dataSig/",update=False):
    if not update:
        exists = os.path.isfile(path+fname)
        if exists:
            print("# ACTA ",fname," lista")
            d=AbrirDatosMesa(path+fname)
            return 1,d
    try:
        r=s.get(site+req,timeout=(600,600))
        r.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(e)
        print("Failed to get "+site+req,fname)
        return 0,0
    
    my_json = r.content.decode('utf8')
    if len(my_json):
        with open(path+fname,'w') as outfile:  
            outfile.write(my_json)
        d = json.loads(my_json)
        return 1,d
    else:
        print(site+req)
        print('EMPTY RESPONSE')
        print(r.content)
        return 0,0

### Open JSON file

In [20]:
def AbrirDatosMesa(fname):
    with open(fname) as json_file:  
        data = json.load(json_file)
    return data

### Get data for a given "mesa"

In [21]:
def ProcesarMesa(mesa):
    data_name="mesa_"+'{0:06d}'.format(mesa)+'.json'
    req="api.php?mesa={}&vista=MESA&token={}".format(mesa,token)
    #req="api.php?mesa={}&vista=MESA".format(mesa) #old version didn't need the token
    archivos_completos=0
    datos_completos=True
    
    resp=ObtenerDatosMesa(data_name,req)
    locations={}
    if resp[0]:
        datos_completos=True
        d = resp[1]
        for acta in d['TE']:
            url=acta['IMGSRC']
            purl=urlparse(url)
            fname=os.path.basename(purl.path)
            locations[fname]=url
    else:
        datos_completos=False
        return(datos_completos,archivos_completos)
    for j in range(1,n_papeletas+1):
        fname='{0:06d}'.format(mesa*10+j)+'.jpg'        
        if fname in locations:
            if GetImage(fname,locations[fname]):
                archivos_completos+=1
        else:
            print("UNK ",fname)
    print("M",mesa,datos_completos,archivos_completos)
    return(datos_completos,archivos_completos)

# Task scheduler

In [22]:
async def get_data_asynchronous(path="dataSig/",startFromZero=True):
    
    mesas=[]
    if startFromZero:
        mesas=range(start,end)
    else:
        df=pd.read_csv('dataSig/results.csv')
        for index, row in df.iterrows():
            if row['datos']==False or row['actas']<5:
                mesas.append(int(row['mesa']))
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        loop = asyncio.get_event_loop()
        tasks = [
                loop.run_in_executor(
                    executor,
                    ProcesarMesa,
                    mesa
                )
            for mesa in mesas
        ]
        res=await asyncio.gather(*tasks)
        with open(path+'results.csv', 'w') as f:
            f.write("mesa,datos,actas\n")
            for i,response in enumerate(res):
                print("Mesa {} response {}".format(mesas[i],response))
                f.write("{},{},{}\n".format(mesas[i],response[0],response[1]))
                pass

### Create a session and init parameters

In [23]:
##TODO VERY UGLY global variables, need manual restart on server fault
s=requests.Session()
r=s.get('https://resultados2019.tse.org.gt/201901/')
my_json = r.content.decode('utf8')

#Extract token from the session
sv='vista=MESA&token='
if sv in my_json:
    idx=my_json.index(sv)+len(sv)
    token=my_json[idx:idx+40]


total_mesas=21100
start=1
end=total_mesas
n_papeletas=1 #5
site='https://resultados2019.tse.org.gt/img/201901/'

#### Get singe 'mesa' data

In [24]:
ProcesarMesa(1)

# ACTA  mesa_000001.json  lista
SHA2  1792f9d089bfd2e34dba0c5a4d3554a38d948a5a63abef12e2e754eed201f14a
SIG : 30440220318ee4c83edbab3a2b5dfb0a3b707132a3613f468a6e00dcf7fe17072b92bdd7022002bdf92233962735663e2c7ea3e03a9d0dc634fbde86f89aa325d2ce0efc94bc
M 1 True 1


(True, 1)

### Asynchronous download all the data within start and end

In [None]:
#Signatures printed out... save them to file or HTML and avoid race conditions
#loop = asyncio.get_event_loop()
#future = asyncio.ensure_future(get_data_asynchronous())
#loop.run_until_complete(future)