In [148]:
from google.cloud import bigquery
from google.cloud import firestore
import google.cloud.exceptions

import logging
import datetime
import time

import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/Users/277594b/Documents/Projects/COKI/certs/coki-214004-36a73c3216c6.json"

db = firestore.Client()

In [149]:
def write_results_to_bigquery(gs_uri, task_id, dataset_id, table_ref):

    client = bigquery.Client()
    dataset_ref = client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()

    job_config.schema = [
        bigquery.SchemaField('doi', 'STRING'),
            bigquery.SchemaField('grid_id', 'STRING'),
            bigquery.SchemaField('source', 'STRING'),
            bigquery.SchemaField('month', 'STRING'),
            bigquery.SchemaField('first_lookup', 'TIMESTAMP'),
            bigquery.SchemaField('scopus', 'TIMESTAMP'),
            bigquery.SchemaField('wos', 'TIMESTAMP'),
            bigquery.SchemaField('msa', 'TIMESTAMP'),
            bigquery.SchemaField('scopus_task_id', 'STRING'),
            bigquery.SchemaField('wos_task_id', 'STRING'),
            bigquery.SchemaField('msa_task_id', 'STRING'),
            bigquery.SchemaField('task_id', 'STRING'),
            bigquery.SchemaField('scopus_month', 'STRING'),
            bigquery.SchemaField('wos_month', 'STRING'),
            bigquery.SchemaField('msa_month', 'STRING'),

            bigquery.SchemaField('wos_title', 'STRING'),
            bigquery.SchemaField('wos_abstract', 'STRING'),
            bigquery.SchemaField('wos_source', 'STRING'),
            bigquery.SchemaField('wos_conference_id', 'STRING'),
            bigquery.SchemaField('wos_conference', 'STRING'),
            bigquery.SchemaField('wos_sortdate', 'STRING'),
            bigquery.SchemaField('wos_fund_ack', 'STRING'),
            bigquery.SchemaField('wos_keywords', 'STRING'),
            bigquery.SchemaField('wos_doi', 'STRING'),
            bigquery.SchemaField('wos_issn', 'STRING'),
            bigquery.SchemaField('wos_eissn', 'STRING'),
            bigquery.SchemaField('wos_isbn', 'STRING'),
            bigquery.SchemaField('wos_eisbn', 'STRING'),
            bigquery.SchemaField('wos_art_no', 'STRING'),
            bigquery.SchemaField('wos_meeting_abs', 'STRING'),
            bigquery.SchemaField('wos_xref_doi', 'STRING'),
            bigquery.SchemaField('wos_id', 'STRING'),
            bigquery.SchemaField('wos_parent_book_doi', 'STRING'),

            bigquery.SchemaField('scopus_title', 'STRING'),
            bigquery.SchemaField('scopus_publicationName', 'STRING'),
            bigquery.SchemaField('scopus_coverDate', 'STRING'),
            bigquery.SchemaField('scopus_doi', 'STRING'),
            bigquery.SchemaField('scopus_issn', 'STRING'),
            bigquery.SchemaField('scopus_eIssn', 'STRING'),
            bigquery.SchemaField('scopus_subtypeDescription', 'STRING'),
            bigquery.SchemaField('scopus_openaccess', 'STRING'),
            bigquery.SchemaField('scopus_openaccessFlag', 'STRING'),
            bigquery.SchemaField('scopus_citedby_count', 'STRING'),
            bigquery.SchemaField('scopus_identifier', 'STRING'),

            bigquery.SchemaField('msa_title', 'STRING'),
            bigquery.SchemaField('msa_journal', 'STRING'),
            bigquery.SchemaField('msa_publisher', 'STRING'),
            bigquery.SchemaField('msa_doi', 'STRING'),
            bigquery.SchemaField('msa_abstract', 'STRING')
    ]

    job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.skip_leading_rows = 1

    load_job = client.load_table_from_uri(
        gs_uri,
        dataset_ref.table(table_ref),
        job_config=job_config)  # API request

    print(f'Starting job {load_job.job_id}, insert into {table_ref}')
    load_job.result()  # Waits for table load to complete.       

    destination_table = client.get_table(dataset_ref.table(table_ref))
    print(f'Job finished, Loaded {load_job.output_rows} new rows. For {destination_table.num_rows} total')
    
    #log_progress(task_id, load_job.job_id, load_job.output_rows)
    return load_job.output_rows



In [29]:
def log_progress(task_id, bigquery_job_id, num_results):
    uncomplete = True
    backoff = 3
    attempts = 0

    while(uncomplete):
        try:
            db.collection("tasks").document(task_id).update(
                {
                    u"state": u"loaded",
                    u"lastUpdated": str(datetime.datetime.now(datetime.timezone.utc)),
                    u"load": {
                        u"bigquery_job_id": bigquery_job_id,
                        u"results": num_results,
                        u"eventId": "",
                        u"timestamp": str(datetime.datetime.now(datetime.timezone.utc)),
                        u"state": "completed",
                        u"errorMessage": ""
                    }
                }
            )
            uncomplete = False
        except:
            # Only try 3 times before failing
            if attempts == 3:
                logging.error(f"Update to firestore failed 3 times")
                raise Exception

            logging.warning(f"Update to firestore failed, trying again in {backoff} seconds")
            attempts += 1
            # Wait for [backoff] seconds
            time.sleep(backoff)
            backoff += 3

In [38]:
def process_failed_load(task_id, gs_uri):
    dataset_id = "institutions"
    table_ref = "raw_prod"
    print(task_id, gs_uri)
    write_results_to_bigquery(gs_uri, task_id, dataset_id, table_ref)

In [59]:
#task_id = ""
#gs_uri = ["gs://coki-parsed-doi/v1/grid.83440.3b/wos/2010-09/2018-11-29 02:48:30.476778+00:00_41cd67c5-dc65-4757-8df4-bdc8b5e4093f.csv",
#          "gs://coki-parsed-doi/v1/grid.9486.3/scopus/2013-02/2018-11-29 02:20:42.440825+00:00_986c99b7-aecd-4c44-8f06-dbe82ba2ffd1.csv"]

counts = {
    "wos": 0,
    "scopus": 0,
    "msa": 0
}
count = 0

appended = []

task_error_docs = db.collection("tasks").where(u'load.state', u'==', u'failed').get()
for task in task_error_docs:
    task_dict = task.to_dict()
    counts[task_dict["source"]] += 1
    appended.append({
        "task_id": task.id,
        "uri": f'gs://coki-parsed-doi/{task_dict["parse"]["blob_name"]}',
        "results": None
    })
    #process_failed_load(task.id, f'gs://coki-parsed-doi/{task_dict["parse"]["blob_name"]}')
    count += 1
    #time.sleep(1)
    #if count == 20:
    #    break
    
print(counts)

{'wos': 547, 'scopus': 1008, 'msa': 0}


In [73]:
print(appended[0])

total = 0
already_done = 0
completed = 0

for task in appended:
    if task["results"] == None:
        #print(task["task_id"])
        dataset_id = "institutions"
        table_ref = "raw_prod_tmp2"
        results = write_results_to_bigquery(task["uri"], task["task_id"], dataset_id, table_ref)
        task["results"] = results
        completed += 1
    else:
        already_done += 1

print(total, already_done, completed)

{'task_id': '07908d39-60e1-48fa-99ad-498e3bebab59', 'uri': 'gs://coki-parsed-doi/v1/grid.14476.30/scopus/2000-06/2018-11-28 14:11:04.379298+00:00_07908d39-60e1-48fa-99ad-498e3bebab59.csv', 'results': 153}
Starting job ff8d7792-305b-45df-9da4-755d39630d7b, insert into raw_prod_tmp2
Job finished, Loaded 95 new rows. For 95 total
Starting job 20b3e62c-02b2-4119-a522-a8f2ec0260fc, insert into raw_prod_tmp2
Job finished, Loaded 392 new rows. For 487 total
Starting job c3cc3e0d-c911-4767-963f-9941759cdcca, insert into raw_prod_tmp2
Job finished, Loaded 64 new rows. For 551 total
Starting job 85d9ef50-0f84-47c3-8447-db3215002f13, insert into raw_prod_tmp2
Job finished, Loaded 317 new rows. For 868 total
Starting job 5c7c975f-6fcf-4464-a15f-1e4fc3461539, insert into raw_prod_tmp2
Job finished, Loaded 80 new rows. For 948 total
Starting job 4bda10fd-a008-43f1-aa37-22387baf3b05, insert into raw_prod_tmp2
Job finished, Loaded 214 new rows. For 1162 total
Starting job 0883f683-ecb0-4c39-bcf6-cf801

Starting job 47048c62-83f8-42bb-a551-218af9804380, insert into raw_prod_tmp2
Job finished, Loaded 669 new rows. For 19609 total
Starting job 99cbe696-ac2e-4af2-a889-676bc5347743, insert into raw_prod_tmp2
Job finished, Loaded 228 new rows. For 19837 total
Starting job f85aaedb-60b8-4de6-8dff-845879805686, insert into raw_prod_tmp2
Job finished, Loaded 90 new rows. For 19927 total
Starting job d93f6698-8c32-4208-8dcd-defca39b2b10, insert into raw_prod_tmp2
Job finished, Loaded 235 new rows. For 20162 total
Starting job 1aaafd52-df7b-4802-b92d-df8819d519af, insert into raw_prod_tmp2
Job finished, Loaded 282 new rows. For 20444 total
Starting job 5a2f4b3b-d7eb-47cd-afc9-35f5e8c4d5d3, insert into raw_prod_tmp2
Job finished, Loaded 37 new rows. For 20481 total
Starting job dbdf4d4a-858a-4e14-9b2f-34664c4a21ce, insert into raw_prod_tmp2
Job finished, Loaded 20 new rows. For 20501 total
Starting job 46583b6d-1f81-47f4-b616-660d1e53aea4, insert into raw_prod_tmp2
Job finished, Loaded 382 new r

Job finished, Loaded 162 new rows. For 39801 total
Starting job bc5968c7-d11a-47ac-9eb7-21fdde019912, insert into raw_prod_tmp2
Job finished, Loaded 552 new rows. For 40353 total
Starting job f4c6b583-2118-4efd-b454-a46bc4545a35, insert into raw_prod_tmp2
Job finished, Loaded 313 new rows. For 40666 total
Starting job dcf68ba9-1e4d-4cab-bf67-979cd01600f3, insert into raw_prod_tmp2
Job finished, Loaded 146 new rows. For 40812 total
Starting job debe345c-0893-42ff-84d9-e58c4f48528f, insert into raw_prod_tmp2
Job finished, Loaded 75 new rows. For 40887 total
Starting job 74e1a62d-2517-463b-92b9-8dccb52df4c3, insert into raw_prod_tmp2
Job finished, Loaded 150 new rows. For 41037 total
Starting job 6d4ad3ee-a61b-42cb-a321-f50f3b0197f1, insert into raw_prod_tmp2
Job finished, Loaded 306 new rows. For 41343 total
Starting job 5b2c7de0-0921-412e-bbbf-6b32cfd56e4e, insert into raw_prod_tmp2
Job finished, Loaded 404 new rows. For 41747 total
Starting job b7f4cc77-c8db-4673-a75f-dca1ee5f0017, ins

Starting job fa21c49f-cfda-44d8-984e-53b90a8592b8, insert into raw_prod_tmp2
Job finished, Loaded 200 new rows. For 61210 total
Starting job df78aeb6-86b2-47a1-b55c-4681beda5537, insert into raw_prod_tmp2
Job finished, Loaded 570 new rows. For 61780 total
Starting job dcc0697a-3a3a-412d-8cca-24c18946dd1e, insert into raw_prod_tmp2
Job finished, Loaded 48 new rows. For 61828 total
Starting job 6bb03b1f-68ae-433f-a754-0f00e3040a08, insert into raw_prod_tmp2
Job finished, Loaded 473 new rows. For 62301 total
Starting job a6de94dc-ac2d-4f47-ba6a-cd7af49ef2d6, insert into raw_prod_tmp2
Job finished, Loaded 59 new rows. For 62360 total
Starting job e140d349-8bee-40df-a77e-635ba0df4ee1, insert into raw_prod_tmp2
Job finished, Loaded 48 new rows. For 62408 total
Starting job 0097258c-5471-4dd8-8dfc-15959c8bd2a7, insert into raw_prod_tmp2
Job finished, Loaded 182 new rows. For 62590 total
Starting job 91d30a68-644f-479b-9f4a-1013ad07feb8, insert into raw_prod_tmp2
Job finished, Loaded 100 new r

Job finished, Loaded 1864 new rows. For 83838 total
Starting job d7570c3c-7f1e-4df6-bc44-e1c2cbb33018, insert into raw_prod_tmp2
Job finished, Loaded 431 new rows. For 84269 total
Starting job 1e5d437c-4c5e-4cb3-984e-a9a893dc11ff, insert into raw_prod_tmp2
Job finished, Loaded 270 new rows. For 84539 total
Starting job fb36886e-170e-409c-b4fe-8edb7f10d6e9, insert into raw_prod_tmp2
Job finished, Loaded 151 new rows. For 84690 total
Starting job f7f0ebc4-3aa2-46bf-ae18-fcf7ff2d118e, insert into raw_prod_tmp2
Job finished, Loaded 385 new rows. For 85075 total
Starting job 8e159f5a-96fc-42e1-a4a8-a1a7815ccae7, insert into raw_prod_tmp2
Job finished, Loaded 38 new rows. For 85113 total
Starting job 4b84a192-9ac0-4824-9061-d49c92cbb4d8, insert into raw_prod_tmp2
Job finished, Loaded 153 new rows. For 85266 total
Starting job 949be799-0810-4dee-9539-511258592ea0, insert into raw_prod_tmp2
Job finished, Loaded 210 new rows. For 85476 total
Starting job 7a28ff13-e10c-405d-a8db-52e826c9a11d, in

Starting job 97ae9cdc-4418-4f5e-b038-24ac925efb5d, insert into raw_prod_tmp2
Job finished, Loaded 376 new rows. For 105124 total
Starting job 19a80a62-6682-4aa9-b681-0ec8772be563, insert into raw_prod_tmp2
Job finished, Loaded 614 new rows. For 105738 total
Starting job 1536da41-936c-4b45-94e4-dd3d2cf7ca76, insert into raw_prod_tmp2
Job finished, Loaded 335 new rows. For 106073 total
Starting job 4d5336c7-9511-4c47-b926-716d9e802fba, insert into raw_prod_tmp2
Job finished, Loaded 102 new rows. For 106175 total
Starting job 478643fb-f1c5-4079-b5f9-4a1aa887c71e, insert into raw_prod_tmp2
Job finished, Loaded 291 new rows. For 106466 total
Starting job bc02dc25-3b26-4fbe-aa72-cbb4c3db8684, insert into raw_prod_tmp2
Job finished, Loaded 101 new rows. For 106567 total
Starting job eb7fc3cc-7299-4671-a451-c9bff387565f, insert into raw_prod_tmp2
Job finished, Loaded 706 new rows. For 107273 total
Starting job 933d812e-ec75-406b-a711-2b7a090fcc71, insert into raw_prod_tmp2
Job finished, Loaded

Starting job f66df1f1-1e17-48f4-b5de-f3ae80daed2e, insert into raw_prod_tmp2
Job finished, Loaded 176 new rows. For 120459 total
Starting job e5035fc3-7183-4b5e-ae52-5d611eb88a23, insert into raw_prod_tmp2
Job finished, Loaded 242 new rows. For 120701 total
Starting job 3d2c8e1b-d986-429c-93f1-545d0d200643, insert into raw_prod_tmp2
Job finished, Loaded 137 new rows. For 120838 total
Starting job 36b52bc2-999d-46c3-ab55-dede1481ed25, insert into raw_prod_tmp2
Job finished, Loaded 81 new rows. For 120919 total
Starting job 9cc06e11-f0a8-42f0-9928-be4dbdb79acb, insert into raw_prod_tmp2
Job finished, Loaded 200 new rows. For 121119 total
Starting job 0562213d-720c-43ec-bb4c-a437e89c1083, insert into raw_prod_tmp2
Job finished, Loaded 232 new rows. For 121351 total
Starting job a234a54c-f314-4219-8d97-a1c60b2588d2, insert into raw_prod_tmp2
Job finished, Loaded 285 new rows. For 121636 total
Starting job 662c3eb9-d824-4c92-80e0-d29e6e3a810f, insert into raw_prod_tmp2
Job finished, Loaded 

Starting job 4dbbd1b4-5abd-4085-80c6-55648bbb131d, insert into raw_prod_tmp2
Job finished, Loaded 138 new rows. For 137582 total
Starting job 7e45a078-c62d-4089-8997-53bbe44db42e, insert into raw_prod_tmp2
Job finished, Loaded 290 new rows. For 137872 total
Starting job 6a7c7d05-0ba0-4d29-b873-3d222161936e, insert into raw_prod_tmp2
Job finished, Loaded 655 new rows. For 138527 total
Starting job 44c36bf2-eacc-46cf-bd83-3a40f8187ca0, insert into raw_prod_tmp2
Job finished, Loaded 361 new rows. For 138888 total
Starting job dddb0cf7-bcf4-4473-9846-33216db9a58e, insert into raw_prod_tmp2
Job finished, Loaded 266 new rows. For 139154 total
Starting job 8c0c8c74-ff21-4010-9aad-9ffe755dadc9, insert into raw_prod_tmp2
Job finished, Loaded 540 new rows. For 139694 total
Starting job 10da8796-bc6d-4d1c-be98-2a1273f38458, insert into raw_prod_tmp2
Job finished, Loaded 82 new rows. For 139776 total
Starting job 685e9d76-b30c-4e37-9fd0-1b3c9aa5aba1, insert into raw_prod_tmp2
Job finished, Loaded 

In [74]:
total = 0
already_done = 0
completed = 0

for task in appended:
    if task["results"] == None:
        #print(task["task_id"])
        dataset_id = "institutions"
        table_ref = "raw_prod_tmp"
        #results = write_results_to_bigquery(task["uri"], task["task_id"], dataset_id, table_ref)
        #task["results"] = results
        completed += 1
    else:
        already_done += 1

print(total, already_done, completed)

0 1555 0


In [71]:
saved_list = appended.copy()

In [72]:
total = 0
already_done = 0
completed = 0

for task in appended:
    if task["results"] == None:
        #print(task["task_id"])
        dataset_id = "institutions"
        table_ref = "raw_prod_tmp"
        #results = write_results_to_bigquery(task["uri"], task["task_id"], dataset_id, table_ref)
        #task["results"] = results
        completed += 1
    else:
        already_done += 1

print(total, already_done, completed)

0 1050 505


In [75]:
appended[0]

{'task_id': '07908d39-60e1-48fa-99ad-498e3bebab59',
 'uri': 'gs://coki-parsed-doi/v1/grid.14476.30/scopus/2000-06/2018-11-28 14:11:04.379298+00:00_07908d39-60e1-48fa-99ad-498e3bebab59.csv',
 'results': 153}

In [100]:
batch = db.batch()

already_done = 0
completed = 0
count = 0

for task in appended:
    if "updated" not in task:
        task_ref = db.collection("tasks").document(task["task_id"])
        batch.update(task_ref, {
            u"state": u"loaded",
            u"lastUpdated": str(datetime.datetime.now(datetime.timezone.utc)),
            u"load": {
                u"bigquery_job_id": "missing",
                u"eventId": "missing",
                u"results": task["results"],
                u"timestamp": str(datetime.datetime.now(datetime.timezone.utc)),
                u"state": "completed",
                u"errorMessage": ""
            }
        })
        task["updated"] = True
        completed += 1
        count += 1
        if count == 200:
            break
    else:
        already_done += 1

batch.commit()
print(already_done, completed)

1555 0


In [95]:
print(already_done, completed)

742 200


In [30]:
log_progress("00503459-f7be-4ff3-8d84-e6276905f18a", "71f44696-d242-4bab-b749-15d4043bdf22", 320)

In [40]:
process_failed_load("00665d02-66e7-4d45-8a75-49c7ea09d89b", 'gs://coki-parsed-doi/v1/grid.6571.5/scopus/2002-03/2018-11-28 17:23:13.673762+00:00_00665d02-66e7-4d45-8a75-49c7ea09d89b.csv')

00665d02-66e7-4d45-8a75-49c7ea09d89b gs://coki-parsed-doi/v1/grid.6571.5/scopus/2002-03/2018-11-28 17:23:13.673762+00:00_00665d02-66e7-4d45-8a75-49c7ea09d89b.csv
Starting job 3bc167ef-717f-4a19-ade4-2d713a8f52bd, insert into raw_prod
Job finished, Loaded 43 new rows. For 2556731 total


In [44]:
print("hello")
time.sleep(2)
print("hello")

hello
hello


In [87]:
def archiver():
    """
    Mark completed tasks as closed and update the institutions collection
    """
    finished_tasks = db.collection("tasks").where(u'state', u'==', "loaded").get()
    count = 0

    for task in finished_tasks:
        task_dict = task.to_dict()

        transaction = db.transaction()
        institutions_ref = db.collection("institutions").document(task_dict["gridId"])
        archiver_transaction(transaction, institutions_ref, task.id, task_dict)

        print(f'Archieved completed task: {task.id}: {task_dict["source"]}, {task_dict["month"]}, {task_dict["gridId"]}')
        count += 1

    print(f'Archive Completed: {count} tasks archieved')


@firestore.transactional
def archiver_transaction(transaction, institutions_ref, task_id, task_dict):
    institution_snapshot = institutions_ref.get(transaction=transaction)
    institution = institution_snapshot.to_dict()

    month = task_dict["month"]
    source = task_dict["source"]
    tasking_id = task_dict["source"]

    # Update the active flag
    transaction.update(db.collection("tasks").document(task_id),
        {
            u"state": u"completed",
            u"active": False
        }
    )

    # Update the specific month-source
    source = institution["months"][month]["sources"][source]
    source["state"] = "closed"
    source["lastCompleted"] = str(datetime.datetime.now(datetime.timezone.utc))
    source["results"] = task_dict["load"]["results"]

    # Then update the hierarchy of the institution
    month_finished = True 
    for source_key, source in institution["months"][month]["sources"].items():
        if source["state"] != "closed":
            month_finished = False

    if month_finished:
        institution["months"][month]["state"] = "closed"

    institution_finished = True
    for month_key, month in institution["months"].items():
        if month["state"] != "closed":
            institution_finished = False

    if institution_finished:
        institution["state"] = "closed"

    # update the lastUpdated field
    institution["lastCompleted"] = str(datetime.datetime.now(datetime.timezone.utc))

    # Commit the transaction
    transaction.set(institutions_ref, institution)

In [115]:
for i in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]:
    archiver()

Archieved completed task: ed22953d-170d-4cdf-9df5-680cf0cb5459: wos, 2004-09, grid.116068.8
Archieved completed task: ed70e9c9-cc5e-4eec-b2a9-09721ae4510c: scopus, 2006-11, grid.14476.30
Archieved completed task: edb65401-ca18-40c0-a62c-77a193f71c00: scopus, 2002-05, grid.26999.3d
Archieved completed task: edc7bcc3-53c3-4f16-9f8c-44962a885107: wos, 2003-09, grid.14476.30
Archieved completed task: ede03350-e5ce-404e-9ff2-a5219042ff63: wos, 2012-04, grid.14476.30
Archieved completed task: edf17ada-9a21-47ab-ad95-8577ae74622b: scopus, 2007-01, grid.7836.a
Archieved completed task: edf62d13-bcd2-4dd0-8732-dc3016ece85c: scopus, 2005-10, grid.14476.30
Archieved completed task: edfb5848-8a89-4dcb-afa7-265a7bbad059: scopus, 2008-05, grid.9486.3
Archive Completed: 8 tasks archieved
Archieved completed task: ee2f0c56-99cd-4076-b845-4df3f86d0c03: scopus, 2002-11, grid.9486.3
Archieved completed task: ef2476c3-d634-410c-9a61-bd793e07abba: scopus, 2012-11, grid.6571.5
Archieved completed task: ef45

Archieved completed task: f9d68369-379c-4105-93b2-950d6edbefa7: wos, 2011-08, grid.8664.c
Archieved completed task: fa597de3-73a7-4451-9d65-06932c9b7ddb: scopus, 2005-07, grid.83440.3b
Archieved completed task: fa8505e6-fc00-45c7-93fd-53474deb138d: wos, 2013-04, grid.14476.30
Archieved completed task: fab01665-11b7-45fe-b1a5-10b0ba8d3587: scopus, 2012-02, grid.9486.3
Archieved completed task: fb19145a-dced-4340-bb03-51e24988630e: scopus, 2003-06, grid.83440.3b
Archieved completed task: fb46847f-4e09-46fd-8690-2f10df892802: scopus, 2013-07, grid.6571.5
Archive Completed: 8 tasks archieved
Archieved completed task: fb58c6c6-af40-4d2f-a261-4d32ef6af9b6: wos, 2010-12, grid.8664.c
Archieved completed task: fb8ae70e-df47-44b1-b327-a1d538293ecd: scopus, 2002-07, grid.9486.3
Archieved completed task: fbeddbb6-6c5e-4101-b6a6-583f8c91ce11: wos, 2003-01, grid.8664.c
Archieved completed task: fbfadfc5-2ef5-486a-95ae-69dbb80bf42e: scopus, 2009-04, grid.7836.a
Archieved completed task: fc080675-804a

In [None]:
for i in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]:
    archiver()

In [114]:
archiver()

Archieved completed task: ec2cf8ca-8d49-4e67-a325-7f40d6cd855c: scopus, 2001-07, grid.83440.3b
Archieved completed task: ec4aee4c-a0e3-4938-9395-493a09b1c318: wos, 2008-07, grid.83440.3b
Archieved completed task: ec828525-b2b1-4b32-ad61-de9ceace319b: wos, 2009-11, grid.83440.3b
Archieved completed task: ec99c00b-2cff-4eaa-a26e-c95f7ce24303: scopus, 2006-02, grid.7836.a
Archieved completed task: ec99e6de-9f54-43ff-ab38-bccbc2286e29: wos, 2006-07, grid.34980.36
Archieved completed task: ecdf261b-d681-4462-835d-daece66aabd7: scopus, 2004-09, grid.30055.33
Archieved completed task: ecfc7189-20f0-4906-8172-dcb50f1c4019: wos, 2002-03, grid.116068.8
Archieved completed task: ed09047d-6f63-4b36-b174-5f79cb2401c2: scopus, 2001-11, grid.26999.3d
Archive Completed: 8 tasks archieved


In [124]:
write_results_to_bigquery("gs://coki-tmp/catchup_data_2.csv", "123456789", "institutions", "raw_prod")

Starting job 46969124-734f-47d7-8520-d2510fdd574f, insert into raw_prod
Job finished, Loaded 152626 new rows. For 3044209 total


152626

In [144]:
parsed_tasks = db.collection("tasks").where(u'state', u'==', "parsed").get()

In [145]:
batch = db.batch()

for task in parsed_tasks:
    print(task.id)
    task_ref = db.collection("tasks").document(task.id)
    batch.update(task_ref, {
        u"state": u"completed",
        u"active": False,
        u"lastUpdated": str(datetime.datetime.now(datetime.timezone.utc)),
        u"load": {
            u"bigquery_job_id": "04c24730-ea30-4730-b9ac-c2f1da283cd1",
            u"timestamp": str(datetime.datetime.now(datetime.timezone.utc)),
            u"state": "completed",
            u"errorMessage": ""
        }
    })

batch.commit()

85a3f716-1006-44ff-a1ed-ae2ea2c69456
b7889871-6113-4893-9997-ddf8b65dd1ac
d3b809bb-42f5-4023-86cf-970b379f6edd


[update_time {
   seconds: 1543820782
   nanos: 225336000
 }, update_time {
   seconds: 1543820782
   nanos: 225336000
 }, update_time {
   seconds: 1543820782
   nanos: 225336000
 }]

In [136]:
import pprint
pp = pprint.PrettyPrinter(indent=4)

def update_institution(institution, month, source, results, no_doi_in_record):
    # Update the specific month-source
    source = institution["months"][month]["sources"][source]

    source["state"] = "closed"
    source["lastCompleted"] = str(datetime.datetime.now(datetime.timezone.utc))
    source["results"] = results
    source["no_doi_in_record"] = no_doi_in_record

    # Then update the hierarchy of the institution
    is_month_finished = True 
    for source_key, source in institution["months"][month]["sources"].items():
        if source["state"] != "closed":
            is_month_finished = False

    if is_month_finished:
        institution["months"][month]["state"] = "closed"

    is_institution_finished = True
    for month_key, month in institution["months"].items():
        if month["state"] != "closed":
            is_institution_finished = False

    if is_institution_finished:
        institution["state"] = "closed"

    # update the lastUpdated field
    institution["lastCompleted"] = str(datetime.datetime.now(datetime.timezone.utc))

In [137]:
institution = db.collection("institutions").document("grid.11899.38").get().to_dict()
pp.pprint(institution)

month = "2002-07"
source = "scopus"
results = 40
no_doi_in_record = 10

update_institution(institution, month, source, results, no_doi_in_record)

pp.pprint(institution)




{   'createdAt': '2018-11-21 04:01:56.829121+00:00',
    'externalIdentifiers': {   'msa': '17974374',
                               'scopus': '60008088',
                               'wos': 'Universidade de Sao Paulo'},
    'gridId': 'grid.11899.38',
    'institutionName': 'University of Sao Paulo',
    'lastCompleted': '2018-12-03 04:07:17.734403+00:00',
    'lastUpdated': '2018-11-21 04:01:56.829157+00:00',
    'months': {   '2000-01': {   'createdAt': '2018-11-28 '
                                              '06:12:11.557631+00:00',
                                 'sources': {   'msa': {   'lastCompleted': '2018-11-28 '
                                                                            '11:39:03.587660+00:00',
                                                           'results': 3616,
                                                           'state': 'closed',
                                                           'taskId': '6028b6b3-a451-47ee-9d94-e00046181872'

                                 'state': 'inProgress',
                                 'summary': {'venn': {}}},
                  '2004-04': {   'createdAt': '2018-11-28 '
                                              '06:12:11.624185+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                                                              'state': 'inProgress',
                                                              'taskId': '5c1b4152-44f3-424c-b50b-d8f0899d927a',
                                                              'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'wos': {   'lastCompleted': None,
                                                           'results': 0,
                                                           'state': 'inProgress',
                                           

                                                              'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'wos': {   'lastCompleted': None,
                                                           'results': 0,
                                                           'state': 'inProgress',
                                                           'taskId': 'd0cf7511-02b4-4414-9631-d5700f521b2c',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'}},
                                 'state': 'inProgress',
                                 'summary': {'venn': {}}},
                  '2008-09': {   'createdAt': '2018-11-28 '
                                              '06:12:11.685955+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                 

                  '2013-01': {   'createdAt': '2018-11-28 '
                                              '06:12:11.754908+00:00',
                                 'sources': {   'msa': {   'lastCompleted': '2018-11-28 '
                                                                            '12:20:06.691730+00:00',
                                                           'results': 14681,
                                                           'state': 'closed',
                                                           'taskId': '4a2f5f26-9c01-4e84-b36e-a95db329ef78',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                                                              'state': 'inProgress',
                                                       

                                 'sources': {   'scopus': {   'lastCompleted': '2018-11-21 '
                                                                               '15:26:02.304402+00:00',
                                                              'results': 1023,
                                                              'state': 'closed',
                                                              'taskId': '3620c762-d334-43e1-8dc0-a9410c333653',
                                                              'taskingId': 'c862c7b3-593e-46f5-82da-2e47b9a0da88'},
                                                'wos': {   'lastCompleted': '2018-11-22 '
                                                                            '01:23:02.701644+00:00',
                                                           'results': 1063,
                                                           'state': 'closed',
                                                           'taskId': '

                                 'sources': {   'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                                                              'state': 'inProgress',
                                                              'taskId': '308f94a9-9693-4164-82ec-b0a928722038',
                                                              'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'wos': {   'lastCompleted': None,
                                                           'results': 0,
                                                           'state': 'inProgress',
                                                           'taskId': '0c93daa1-5eff-40d4-b4b6-cf4db937317a',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'}},
                                 'state': 'inProgress',
          

                                                           'state': 'inProgress',
                                                           'taskId': '213bd777-5798-4b11-badb-4c56142a30bd',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'}},
                                 'state': 'inProgress',
                                 'summary': {'venn': {}}},
                  '2006-10': {   'createdAt': '2018-11-28 '
                                              '06:12:11.659858+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                                                              'state': 'inProgress',
                                                              'taskId': '9175fb15-3184-4e5b-982c-ca9e825df7e7',
                                                              'taskingId': 'dd598f2b-fe22-4

                                                              'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'wos': {   'lastCompleted': None,
                                                           'results': 0,
                                                           'state': 'inProgress',
                                                           'taskId': '3d5db9bd-093d-4735-8c4d-37924512e5e2',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'}},
                                 'state': 'inProgress',
                                 'summary': {'venn': {}}},
                  '2011-04': {   'createdAt': '2018-11-28 '
                                              '06:12:11.732223+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                 

                                                           'taskingId': 'c862c7b3-593e-46f5-82da-2e47b9a0da88'}},
                                 'state': 'closed',
                                 'summary': {'venn': {}}},
                  '2016-02': {   'createdAt': '2018-11-21 '
                                              '04:01:57.079539+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': '2018-11-21 '
                                                                               '15:09:02.100750+00:00',
                                                              'results': 823,
                                                              'state': 'closed',
                                                              'taskId': 'f5d818ca-916a-4b33-b303-c4c6bb139d52',
                                                              'taskingId': 'c862c7b3-593e-46f5-82da-2e47b9a0da88'},
                                                'wos': {   '

In [146]:
tasks = ["85a3f716-1006-44ff-a1ed-ae2ea2c69456", "b7889871-6113-4893-9997-ddf8b65dd1ac", 
         "d3b809bb-42f5-4023-86cf-970b379f6edd"]

for task in tasks:
    task_dict = db.collection("tasks").document(task).get().to_dict()
    month = task_dict["month"]
    source = task_dict["source"]
    results = task_dict['parse']['results']
    no_doi_in_record = task_dict['parse']['no_doi_in_record']
    print(task, month, source, results, no_doi_in_record)
    
    institution = db.collection("institutions").document(task_dict["gridId"]).get().to_dict()
    update_institution(institution, month, source, results, no_doi_in_record)
    db.collection("institutions").document(task_dict["gridId"]).set(institution)
    
    

85a3f716-1006-44ff-a1ed-ae2ea2c69456 2002-10 scopus 282 53
b7889871-6113-4893-9997-ddf8b65dd1ac 2002-09 scopus 286 76
d3b809bb-42f5-4023-86cf-970b379f6edd 2011-05 wos 866 199


In [147]:
institution = db.collection("institutions").document("grid.11899.38").get().to_dict()
pp.pprint(institution)

{   'createdAt': '2018-11-21 04:01:56.829121+00:00',
    'externalIdentifiers': {   'msa': '17974374',
                               'scopus': '60008088',
                               'wos': 'Universidade de Sao Paulo'},
    'gridId': 'grid.11899.38',
    'institutionName': 'University of Sao Paulo',
    'lastCompleted': '2018-12-03 07:08:11.545894+00:00',
    'lastUpdated': '2018-11-21 04:01:56.829157+00:00',
    'months': {   '2000-01': {   'createdAt': '2018-11-28 '
                                              '06:12:11.557631+00:00',
                                 'sources': {   'msa': {   'lastCompleted': '2018-11-28 '
                                                                            '11:39:03.587660+00:00',
                                                           'results': 3616,
                                                           'state': 'closed',
                                                           'taskId': '6028b6b3-a451-47ee-9d94-e00046181872'

                                                'wos': {   'lastCompleted': None,
                                                           'results': 0,
                                                           'state': 'inProgress',
                                                           'taskId': '0cb5e397-7699-4b30-a17f-8e2807f5fe25',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'}},
                                 'state': 'inProgress',
                                 'summary': {'venn': {}}},
                  '2004-09': {   'createdAt': '2018-11-28 '
                                              '06:12:11.629794+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                                                              'state': 'inProgress',
                                                

                                              '06:12:11.690777+00:00',
                                 'sources': {   'msa': {   'lastCompleted': '2018-11-28 '
                                                                            '12:03:08.481842+00:00',
                                                           'results': 11432,
                                                           'state': 'closed',
                                                           'taskId': 'b4f8e842-ae44-409d-b943-15d5ffe7ac9a',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'scopus': {   'lastCompleted': None,
                                                              'results': 0,
                                                              'state': 'inProgress',
                                                              'taskId': 'de2fd387-09f6-45ef-851b-3388a76b94e5',
   

                                                              'results': 0,
                                                              'state': 'inProgress',
                                                              'taskId': '279f2ddb-0f67-4469-8e26-6a0fb6afc1af',
                                                              'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'},
                                                'wos': {   'lastCompleted': None,
                                                           'results': 0,
                                                           'state': 'inProgress',
                                                           'taskId': 'c737d522-5748-4a7c-b3e2-68038b37335c',
                                                           'taskingId': 'dd598f2b-fe22-4b3b-acea-ec6b2973c83e'}},
                                 'state': 'inProgress',
                                 'summary': {'venn': {}}},
                  '2013-06': {   'cr

                                                'wos': {   'lastCompleted': '2018-11-22 '
                                                                            '01:27:01.295057+00:00',
                                                           'results': 1098,
                                                           'state': 'closed',
                                                           'taskId': '589c6f02-a4f9-45d3-a697-c31c9092d1de',
                                                           'taskingId': 'c862c7b3-593e-46f5-82da-2e47b9a0da88'}},
                                 'state': 'closed',
                                 'summary': {'venn': {}}},
                  '2017-08': {   'createdAt': '2018-11-21 '
                                              '04:01:57.284592+00:00',
                                 'sources': {   'scopus': {   'lastCompleted': '2018-11-21 '
                                                                               '15:30:05.098716+0

In [155]:
still_hanging = db.collection("tasks").where(u'gridId', u'==', "grid.9486.3").where("state", "==", "parsed").get()
count = 0
task_ids_hanging = {}
for task in still_hanging:
    count += 1
    task_dict = task.to_dict()
    task_ids_hanging[task.id] = task_dict['parse']['blob_name']
    
print(count)
print(task_ids_hanging)

470
{'0055c7ca-36c6-4dac-b2ce-c48c6f258704': 'v2/grid.9486.3/wos/2005-09/2018-12-03 22:53:01.615265+00:00_0055c7ca-36c6-4dac-b2ce-c48c6f258704.csv', '00c9934f-ff8f-4050-8789-2a380daee241': 'v2/grid.9486.3/wos/2018-01/2018-11-22 03:20:01.249018+00:00_00c9934f-ff8f-4050-8789-2a380daee241.csv', '00d5b2ab-22b9-4038-a112-f2e8a62573c8': 'v2/grid.9486.3/wos/2011-04/2018-12-04 00:01:01.689147+00:00_00d5b2ab-22b9-4038-a112-f2e8a62573c8.csv', '00f25cfa-ab94-42de-a854-cf7054546348': 'v2/grid.9486.3/wos/2002-11/2018-12-03 22:19:00.804723+00:00_00f25cfa-ab94-42de-a854-cf7054546348.csv', '0133f2d7-4493-4262-86ac-a9a394e7805d': 'v2/grid.9486.3/wos/2016-02/2018-11-22 03:11:27.327981+00:00_0133f2d7-4493-4262-86ac-a9a394e7805d.csv', '013d3d67-1308-4dce-9843-d441f2df55ea': 'v2/grid.9486.3/scopus/2010-08/2018-11-29 01:51:34.456192+00:00_013d3d67-1308-4dce-9843-d441f2df55ea.csv', '02c45a33-8886-4e03-84d5-93b77bf6a051': 'v2/grid.9486.3/wos/2011-02/2018-12-03 23:59:00.905490+00:00_02c45a33-8886-4e03-84d5-93b

In [156]:
for task in task_ids_hanging:
    try:
        write_results_to_bigquery(f"gs://coki-parsed-doi/{task_ids_hanging[task]}", "123456789", "institutions", "raw_prod_testing_one_at_a_time")
    except:
        print("Failed Load", task, task_ids_hanging[task])   
    

Starting job 302fb2c5-ee8e-44ee-aa6f-e4006d16360f, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 362 new rows. For 10594 total
Starting job 855fd574-2fc6-471e-9ae2-784e1786c1f9, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 1206 new rows. For 11800 total
Starting job f046f4c1-46f0-4130-938f-2816c9af34c0, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 393 new rows. For 12193 total
Starting job 00abdd9a-0a47-4e57-a55e-20167d3e84c5, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 254 new rows. For 12447 total
Starting job 75b894b8-0a2d-4216-a3b3-16791472fe72, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 402 new rows. For 12849 total
Starting job f92f3045-1b94-4c00-a6fc-b80ce6b70431, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 171 new rows. For 13020 total
Starting job 99a19415-b482-4a9c-acf2-f24ad6efb4b4, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 299 new rows. 

Job finished, Loaded 424 new rows. For 32831 total
Starting job 37545195-9d00-42e2-81b9-32a27cd0073a, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 84 new rows. For 32915 total
Starting job 2793f989-a24b-422b-88f7-002ec93e53c1, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 294 new rows. For 33209 total
Starting job 9a4d82e1-b07c-4b36-9025-cd95c6af1ee2, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 305 new rows. For 33514 total
Starting job 07e1a399-6c7e-4f9d-a324-7f7cea7e17f6, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 386 new rows. For 33900 total
Starting job 64abf5b9-7415-40f4-aa79-e74c04fbc9f4, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 371 new rows. For 34271 total
Starting job 79627ea8-8e92-44ae-8c80-48bc8ca01d3d, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 345 new rows. For 34616 total
Starting job 22a41b7e-a740-41da-9fcd-1480115dbd1a, insert into raw_prod_testing_

Job finished, Loaded 248 new rows. For 57330 total
Starting job b2e8942a-e8ca-45d0-af86-6dda49a71228, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 285 new rows. For 57615 total
Starting job 3ad1c139-ee3a-49fc-a912-ee7db1ba7734, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 260 new rows. For 57875 total
Starting job 98026ff3-6ae9-41e0-82a5-2ddb9cc2505b, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 277 new rows. For 58152 total
Starting job 53df5e11-75ec-47b9-9b0c-ac36c1882130, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 293 new rows. For 58445 total
Starting job 537faafd-e190-4df1-8bb4-bc72734cc426, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 330 new rows. For 58775 total
Starting job d009cff4-86e7-4807-b9e9-8ce2f9ed0c97, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 376 new rows. For 59151 total
Starting job e6e2462e-5fcd-4444-9aec-53256b15f1d3, insert into raw_prod_testing

Job finished, Loaded 249 new rows. For 88335 total
Starting job 12c299e7-e056-4543-a8d5-69a71ac84612, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 305 new rows. For 88640 total
Starting job 4f9af9a4-2e6a-442b-a2d8-7e7a6138b5a3, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 263 new rows. For 88903 total
Starting job fb5cea35-d906-452c-94f1-2d0e8c009b87, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 240 new rows. For 89143 total
Starting job a160bce5-ad2a-49eb-af57-5ea2f115e000, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 417 new rows. For 89560 total
Starting job d2897dd7-544d-4c4f-b7f9-112e14863d76, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 389 new rows. For 89949 total
Starting job a419bcd3-f03b-46b6-a980-6178a692a529, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 445 new rows. For 90394 total
Starting job 385c3826-861f-4e79-a267-8df10ef70507, insert into raw_prod_testing

Job finished, Loaded 276 new rows. For 105366 total
Starting job f2e47dd4-fd77-488a-b26d-7518c6efd301, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 214 new rows. For 105580 total
Starting job 36f6895d-0b47-4373-bff5-8431b9444b28, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 410 new rows. For 105990 total
Starting job 72327467-15f1-4c76-b615-9d167662c751, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 88 new rows. For 106078 total
Starting job a339e1ce-89f7-4a3c-b3e9-e49e2f94ec52, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 534 new rows. For 106612 total
Starting job d7f09166-d581-4102-acb0-9cfcdea1d1e0, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 432 new rows. For 107044 total
Starting job 3644c900-e6df-4202-9bfa-d5530891578c, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 665 new rows. For 107709 total
Starting job e5766efc-cad1-4593-ad6c-d19478f75e1f, insert into raw_prod_t

Starting job b7fa78c8-17e1-48dc-b687-c61cbfd8e7ac, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 5894 new rows. For 139052 total
Starting job d65d5f6b-8422-498a-9894-b5a83b7b0d8c, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 448 new rows. For 139500 total
Starting job a87a2f80-87ef-40dc-bdf3-2ce0cf5fa88f, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 280 new rows. For 139780 total
Starting job 5cebba67-9741-43af-83b0-21c0a7acb757, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 357 new rows. For 140137 total
Starting job acabe7ce-042d-40fd-8aae-51142c2aefe1, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 586 new rows. For 140723 total
Starting job 1b72d170-6561-4f37-a9c3-0d88d7eaf5dc, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 246 new rows. For 140969 total
Starting job 4d84c74a-77a6-48f5-bbc5-b306b4900cba, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 110 new 

Job finished, Loaded 185 new rows. For 164449 total
Starting job f39dbfcc-bd4b-488b-94d7-c74841df590f, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 360 new rows. For 164809 total
Starting job 36a18f6b-4e2f-42f2-a625-2320577ac2b4, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 281 new rows. For 165090 total
Starting job d5ab5b2a-e108-4fdf-9392-8ed035d70af7, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 250 new rows. For 165340 total
Starting job aaeaeb31-e9be-4008-924f-37e08d0dc3d2, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 193 new rows. For 165533 total
Starting job c97a3157-2744-4ba1-a729-ad720eb7a9c7, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 185 new rows. For 165718 total
Starting job be11763b-03b4-4892-8fa9-077a06f7bde1, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 381 new rows. For 166099 total
Starting job 9d440c21-435e-4200-aa60-e8aa22208c3b, insert into raw_prod_

Job finished, Loaded 278 new rows. For 186214 total
Starting job fcf84792-8dbd-45dd-b6d5-bbc3c903312a, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 251 new rows. For 186465 total
Starting job 34238527-e8f8-47e2-b792-b61883717532, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 358 new rows. For 186823 total
Starting job 1ce6dc6e-06c0-4f36-950f-7a4b38df453b, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 271 new rows. For 187094 total
Starting job fa97bf23-c7cc-4684-95e6-de7df0fb8005, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 1314 new rows. For 188408 total
Starting job 11122bfc-629b-4228-8700-bd95606cb2bf, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 325 new rows. For 188733 total
Starting job e62d154e-dda2-4b00-9c89-835149ff618d, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 474 new rows. For 189207 total
Starting job 8eab1648-023a-4903-be41-d311f6c599e8, insert into raw_prod

Job finished, Loaded 396 new rows. For 217336 total
Starting job e6a1ed0d-c36e-45b0-8bdd-4393f4c35ba1, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 255 new rows. For 217591 total
Starting job 3ba17784-49c0-48c5-a059-21a91b4d6542, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 249 new rows. For 217840 total
Starting job a180c353-caaf-46ac-9b6e-b52311fa83e2, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 391 new rows. For 218231 total
Starting job ef2d41ac-84b9-40f9-8b6c-013f7fe22b84, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 65 new rows. For 218296 total
Starting job b01dee79-2e93-487d-b979-8c2a38fbda46, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 256 new rows. For 218552 total
Starting job b5297d76-9847-4d7a-8be3-a48f20e2ad3f, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 97 new rows. For 218649 total
Starting job bc451768-8d7e-4037-abe7-e852e9716dce, insert into raw_prod_te

In [154]:
write_results_to_bigquery(f"gs://coki-parsed-doi/v2/grid.1032.0/scopus/2004-03/2018-11-28 07:29:19.773716+00:00_bebcb95b-cc27-41ca-a798-f417001924ab.csv", "123456789", "institutions", "raw_prod_testing_one_at_a_time")

Starting job beb1bbc0-4e09-43fc-b894-ce35e2f7dd66, insert into raw_prod_testing_one_at_a_time
Job finished, Loaded 51 new rows. For 10232 total


51