# Refresh Data on Mailings

Grab the latest data from the Click2Mail database on delivery of mail.

In [1]:
import json
import time
from collections import defaultdict
from datetime import datetime
from pathlib import Path

import yaml
from bs4 import BeautifulSoup

from suso import click2mail, database

In [None]:
CREDENTIALS_FILE = Path.cwd() / ".." / ".." / ".." / "config.yml"

In [None]:
with open(CREDENTIALS_FILE) as f:
    config = yaml.load(f)

In [2]:
client = click2mail.Click2MailClient(is_production=True)
client.login(config["click2mail"]["username"], config["click2mail"]["password"])

In [5]:
r = client._post("account", "authorize")

In [7]:
document_ids = []

offset = 0

while True:
    r = client._get("documents", query={"offset": offset, "numberOfDocuments": 10})
    soup = BeautifulSoup(r.content, "xml")
    docs = soup.find("documents")

    # Quit if we have a bad status
    if int(docs.find("status").text) != 0:
        break
    doc_list = docs.find("documentList").findAll("document")
    document_ids.extend(int(doc.find("id").text) for doc in doc_list)

    # Quit if there are few than 10 documents returned
    if len(doc_list) < 10:
        break

    offset += 10
    print(doc_list[0].find("lastUpdated").text)
    time.sleep(1)

2018-03-26 15:01:50.0
2018-03-22 15:04:07.0
2018-03-22 15:01:10.0
2018-03-20 15:02:04.0
2018-03-19 15:01:37.0
2018-03-15 18:00:49.0
2018-03-13 15:02:16.0
2018-03-12 15:02:21.0
2018-03-09 14:02:06.0
2018-03-08 15:01:39.0
2018-03-06 15:00:41.0
2018-03-01 14:02:02.0
2018-02-28 14:01:30.0
2018-02-28 09:17:59.0
2018-02-28 09:15:07.0
2018-02-28 09:12:30.0
2018-02-26 16:00:57.0
2018-02-22 18:20:09.0
2018-02-20 14:49:46.0
2018-02-15 16:01:32.0
2018-02-14 15:19:09.0
2018-02-13 12:56:17.0
2018-02-12 16:12:09.0
2018-02-12 10:53:00.0
2018-02-09 12:34:34.0
2018-02-08 14:49:21.0
2018-02-08 13:50:30.0
2018-02-08 13:46:10.0
2018-02-06 14:41:20.0
2018-02-05 16:51:31.0
2018-02-05 14:40:50.0
2018-02-02 15:49:46.0
2018-02-01 16:19:42.0
2018-01-31 16:19:49.0
2018-01-31 10:08:31.0
2018-01-30 16:20:03.0
2018-01-30 10:26:05.0
2018-01-30 10:23:13.0
2018-01-26 16:57:11.0
2018-01-26 16:54:16.0
2018-01-26 16:52:01.0
2018-01-26 16:30:09.0
2018-01-26 16:27:03.0
2018-01-19 09:56:00.0
2018-01-19 09:39:37.0
2018-01-17

In [15]:
conn = database.get_connection(config["db"])
curs = conn.cursor()

curs.execute("""SELECT student_id FROM jobs_new""")
known_student_ids = [x[0] for x in curs.fetchall()]

curs.close()
conn.close()

In [14]:
job_to_name = defaultdict(list)
document_ids = set(document_ids)

for i, doc_id in enumerate(document_ids):
    r = client._get("documents", "jobDocuments", query={"documentId": doc_id})
    soup = BeautifulSoup(r.content, "xml")

    try:
        job_id = int(soup.find("jobId").text)
        job_to_name[job_id].append(soup.find("document").text)
    except AttributeError:
        continue

    if i % 10 == 9:
        print(f"Finished {i + 1} / {len(document_ids)}")
    time.sleep(1)

Finished 10 / 541
Finished 20 / 541
Finished 30 / 541
Finished 40 / 541
Finished 50 / 541
Finished 60 / 541
Finished 70 / 541
Finished 80 / 541
Finished 90 / 541
Finished 100 / 541
Finished 110 / 541
Finished 120 / 541
Finished 130 / 541
Finished 140 / 541
Finished 150 / 541
Finished 160 / 541
Finished 170 / 541
Finished 180 / 541
Finished 190 / 541
Finished 200 / 541
Finished 210 / 541
Finished 220 / 541
Finished 230 / 541
Finished 240 / 541
Finished 250 / 541
Finished 260 / 541
Finished 270 / 541
Finished 280 / 541
Finished 290 / 541
Finished 300 / 541
Finished 310 / 541
Finished 320 / 541
Finished 330 / 541
Finished 340 / 541
Finished 350 / 541
Finished 360 / 541
Finished 370 / 541
Finished 380 / 541
Finished 390 / 541
Finished 400 / 541
Finished 410 / 541
Finished 420 / 541
Finished 430 / 541
Finished 440 / 541
Finished 450 / 541
Finished 460 / 541
Finished 470 / 541
Finished 480 / 541
Finished 490 / 541
Finished 500 / 541
Finished 510 / 541
Finished 520 / 541
Finished 540 / 541


In [19]:
answers = {
    job_id: {x.split(" ")[-1].split("_")[0] for x in document_names}
    for job_id, document_names in job_to_name.items()
}

In [20]:
{len(v) for v in answers.values()}

{1}

In [21]:
answers = {k: int(list(v)[0]) for k, v in answers.items()}
job_id_to_clid = answers
len(job_id_to_clid)

In [25]:
job_id_to_status = {}

In [26]:
for i, job_id in enumerate(job_id_to_clid):
    if job_id in job_id_to_status:
        continue
    if job_id_to_clid[job_id] in known_student_ids:
        continue
    r = client._get("jobs", str(job_id), "tracking", query={"trackingType": "IMB"})
    soup = BeautifulSoup(r.content, "xml")
    try:
        piece = soup.find("tracking").find("mailPiece")
        status = piece.find("status").text
        status_time = piece.find("dateTime").text
    except AttributeError:
        print(f"An attribute error occured for id {job_id}")
        continue
    job_id_to_status[job_id] = (status, status_time)
    if i % 10 == 9:
        print(f"Finished {i + 1} / {len(job_id_to_clid)}")
    time.sleep(1)

An attribute error occured for id 1775318
An attribute error occured for id 1798000
Finished 500 / 527
Finished 510 / 527


In [47]:
conn = database.get_connection(config["db"])
curs = conn.cursor()

In [49]:
curs.execute("""SELECT id FROM students_new;""")
all_student_ids = [x[0] for x in curs.fetchall()]

In [50]:
for job_id, student_id in job_id_to_clid.items():
    if student_id in known_student_ids:
        continue
    if student_id not in all_student_ids:
        continue
    curs.execute(
        """
        INSERT INTO jobs_new
          (id, student_id)
        VALUES
          (?, ?)
        """,
        (job_id, student_id),
    )

In [51]:
conn.commit()
conn.close()

In [52]:
list(job_id_to_status.items())[0]

(1905207, ('USPS Indicated Delivered', '2018-01-13 10:25:07.0'))

In [7]:
conn = database.get_connection(config["db"])
curs = conn.cursor()

curs.execute("SELECT id FROM jobs_new")
all_job_ids = [x[0] for x in curs.fetchall()]

In [10]:
set_job_ids = set(all_job_ids)
job_id_to_status = {}
for i, job_id in enumerate(set_job_ids):
    r = client._get("jobs", str(job_id), "tracking", query={"trackingType": "IMB"})
    soup = BeautifulSoup(r.content, "xml")
    try:
        piece = soup.find("tracking").find("mailPiece")
        status = piece.find("status").text
        status_time = piece.find("dateTime").text
    except AttributeError:
        print(f"An attribute error occured for id {job_id}")
        continue
    job_id_to_status[job_id] = (status, status_time)
    if i % 10 == 9:
        print(f"Finished {i + 1} / {len(set_job_ids)}")
    time.sleep(1)

Finished 10 / 676
Finished 20 / 676
Finished 30 / 676
Finished 40 / 676
Finished 50 / 676
Finished 60 / 676
Finished 70 / 676
Finished 80 / 676
Finished 90 / 676
Finished 100 / 676
Finished 110 / 676
Finished 120 / 676
Finished 130 / 676
Finished 140 / 676
Finished 150 / 676
Finished 160 / 676
Finished 170 / 676
Finished 180 / 676
Finished 190 / 676
Finished 200 / 676
Finished 210 / 676
Finished 220 / 676
Finished 230 / 676
Finished 240 / 676
Finished 250 / 676
Finished 260 / 676
Finished 270 / 676
Finished 280 / 676
Finished 290 / 676
Finished 300 / 676
Finished 310 / 676
Finished 320 / 676
Finished 330 / 676
Finished 340 / 676
Finished 350 / 676
Finished 360 / 676
Finished 370 / 676
Finished 380 / 676
Finished 390 / 676
Finished 400 / 676
Finished 410 / 676
Finished 420 / 676
Finished 430 / 676
Finished 440 / 676
Finished 450 / 676
Finished 460 / 676
Finished 470 / 676
Finished 480 / 676
Finished 490 / 676
Finished 500 / 676
Finished 510 / 676
Finished 520 / 676
Finished 530 / 676
Fi

In [11]:
conn = database.get_connection(config["db"])
curs = conn.cursor()
for job_id, (status, status_datetime) in job_id_to_status.items():
    if job_id not in all_job_ids:
        continue
    curs.execute(
        """
        INSERT INTO mailings_new
          (job_id, status, status_datetime)
        VALUES
          (?, ?, ?)
        """,
        (job_id, status, datetime.strptime(status_datetime[:-2], "%Y-%m-%d %H:%M:%S")),
    )

curs.close()
conn.commit()
conn.close()