# 06 - Export Task API

Crear tareas de exportación **asíncronas** que vuelcan datos históricos en un **collector**. Luego puedes leer el collector como cola. Cada resultado exportado consume **1 crédito**. Máximo 1.000.000 por tarea.

## 1. Configuración

In [ ]:
# @title Credenciales { display-mode: "form" }
ACCESS_TOKEN = ""  # @param {type:"string"}
PROJECT_ID = ""     # @param {type:"string"}
TOPIC_ID = ""       # @param {type:"string"} (opcional)

BASE_URL = "https://api.talkwalker.com"
import requests
import json
import time

def tw_put(endpoint, data=None, params=None):
    p = {"access_token": ACCESS_TOKEN}
    if params:
        p.update(params)
    r = requests.put(f"{BASE_URL}{endpoint}", params=p, json=data or {})
    r.raise_for_status()
    return r.json()

def tw_post(endpoint, data, params=None):
    p = {"access_token": ACCESS_TOKEN}
    if params:
        p.update(params)
    r = requests.post(f"{BASE_URL}{endpoint}", params=p, json=data)
    r.raise_for_status()
    return r.json()

def tw_get(endpoint, params=None):
    p = {"access_token": ACCESS_TOKEN}
    if params:
        p.update(params)
    r = requests.get(f"{BASE_URL}{endpoint}", params=p)
    r.raise_for_status()
    return r.json()

assert ACCESS_TOKEN and PROJECT_ID, "Configura ACCESS_TOKEN y PROJECT_ID"
print("✅ Listo.")

## 2. Crear collector vacío

In [ ]:
collector_id = "workshop-export-collector"
resp = tw_put(f"/api/v3/stream/c/{collector_id}", {})
if resp.get("status_code") == "0":
    print("✅ Collector creado/actualizado:", collector_id)
else:
    print("Error:", resp)

## 3. Crear export task desde proyecto

In [ ]:
body = {
    "start": "2024-01-01",
    "stop": "2024-01-02",
    "target": collector_id,
    "limit": 100
}
if TOPIC_ID:
    body["topics"] = [TOPIC_ID]
resp = tw_post(f"/api/v3/stream/p/{PROJECT_ID}/export", body)
if resp.get("status_code") != "0":
    print("Error:", resp)
else:
    tasks = resp.get("result_tasks", {}).get("tasks", [])
    if tasks:
        task_id = tasks[0].get("id")
        print("✅ Export task creada. task_id:", task_id)
    else:
        print("Respuesta:", resp)

## 4. Consultar estado de la tarea

In [ ]:
task_id = resp.get("result_tasks", {}).get("tasks", [{}])[0].get("id")
if task_id:
    status_resp = tw_get(f"/api/v3/tasks/export/{task_id}")
    for t in status_resp.get("result_tasks", {}).get("tasks", []):
        print("Estado:", t.get("status"), "| Procesados:", t.get("processed"), "| Progress:", t.get("progress"))
else:
    print("Ejecuta la celda anterior para obtener task_id.")

## 5. Leer resultados del collector (cuando la tarea esté FINISHED)

In [ ]:
params = {"resume_offset": "earliest", "end_behaviour": "stop", "max_hits": 5}
r = requests.get(f"{BASE_URL}/api/v3/stream/c/{collector_id}/results", params={**params, "access_token": ACCESS_TOKEN}, stream=True, timeout=30)
r.raise_for_status()
count = 0
for line in r.iter_lines():
    if line:
        chunk = json.loads(line)
        if chunk.get("chunk_type") == "CT_RESULT":
            count += 1
            data = chunk.get("chunk_result", {}).get("data", {}).get("data", {})
            print(f"{count}.", data.get("title", "")[:60])
        if count >= 5:
            break
if count == 0:
    print("Sin resultados aún o tarea no finalizada.")

## 6. Listar tareas recientes y abortar (opcional)

In [ ]:
all_tasks = tw_get("/api/v3/tasks/export")
for t in all_tasks.get("result_tasks", {}).get("tasks", [])[:5]:
    print(t.get("id"), t.get("status"), t.get("target"))
# Para abortar: requests.delete(f"{BASE_URL}/api/v3/tasks/export/<task_id>?access_token=...")