## HOLLIS Harvest

In [None]:
# Input data, then run the notebook
date = "2306"
classification = "QC"
filepath = "<path>" + classification + "/"

In [None]:
import requests
import math
import json
import pandas as pd
import sys, os, io
import argparse
import numpy as np
import re
import csv
import unicodedata
from openpyxl import load_workbook

HOLLIS_API = 'https://api.lib.harvard.edu/v2/items.json'
MAX_RECORDS = 250

class_param = classification + "*"
query_params = {
    'classification':class_param,
    'originDate':'2012 OR 2013 OR 2014 OR 2015 OR 2016 OR 2017 OR 2018 OR 2019 OR 2020 OR 2021 OR 2022 OR 2023',
    'resourceType':'text',
    'issuance':'monographic'
}

def get_batch(api_url, params):
    get_header = {'Accept': 'text/plain',
                  'Content-type': 'application/json'}
    buff = requests.get(api_url, headers=get_header, params=params).json()
    return buff

def get_records(url, params):
    records = []
    params['limit'] = 1   # First get 1 record to determine the total amount of records
    
    # Do the first query
    try:
        batch = get_batch(url, params)
    except Exception as err:
        raise Exception("Request to Hollis blew up: %s" % err)

    totrecs = batch['pagination']['numFound']
 
    #    Store the first batch of records
    #       Note: the individual records are in the 'mods' attribute
    #       of 'items'. In the case of multiple records, this
    #       is a list of JSON objects, but when only 1 record is
    #       returned, it is just a JSON object (no list).     
    records.append(batch['items']['mods'])
    
    # How often do we need to paginate to get them all?
    num_paginates = int(math.ceil((totrecs) / (1.0*MAX_RECORDS)))
    
    # We harvested the first record to get the total number of records,
    # so we continue with the 2nd
    offset = 1
    params['limit'] = MAX_RECORDS
    for i in range(num_paginates):
        params['start'] = offset
        try:
            batch = get_batch(url, params)
        except Exception as err:
            raise URLError("Request to Hollis blew up: %s" % err)
        records += batch['items']['mods']
        offset += MAX_RECORDS
    return records
        
def get_unique_records(ids, from_hollis):
    """
    goes through the list from_hollis, if the url not in the ids list, 
    adds it in the record to the returned structure

    :param ids:
    :param from_hollis:
    :return:
    """
    unique = []
    for items in from_hollis:
        try:
            item_data = items.get('extension', [])
        except:
            continue
        for one in item_data:
            # basing uniqueness on the url provided in originalDocument
            try:
                orig_url = one.get('librarycloud', {}).get('originalDocument', '').strip()
            except:
                continue
            if orig_url not in ids:
                ids.append(orig_url)
                unique.append(items)
    return ids, unique

unique_records = []     # save all unique records in this list
ids = []                # empty list of ids to start

# go through the loop, get items from hollis, call get_unique_records until returned structure is empty
counter = 0
while True:
    from_hollis = get_records(HOLLIS_API, query_params)
    counter += 1
    ids, unique_records_set = get_unique_records(ids, from_hollis)
    print('Attempt',counter,'got number of unique records:', len(unique_records_set),'\n')
    # if no duplicates quit
#     if len(from_hollis) == len(unique_records_set):
#         print('no duplicates, quitting')
#         break
    # if no new records quit
    if not unique_records_set:
        print('no new records, quitting \n')
        break
    # add the new records from this set to the structure
    unique_records += unique_records_set

print("Retrieved",len(unique_records),"records")

## Metadata Extraction & Transformation

In [None]:
# Open my Hollis results as a data frame
dt = pd.json_normalize(unique_records)

# HOLLIS ID
hollis_ids = dt["recordInfo.recordIdentifier.#text"].astype(str)
hollis_idents = [ident if ident else '' for ident in hollis_ids]

# AUTHORS : Extract authors from dt["name"], else dt["name.namePart"] 
extract_name = re.compile(r"(?:personal\W+namePart\W+([A-Za-z\.,\s\-]+).+?(?=author))+")
extract_name2 = re.compile(r"^\W*([A-Z][^{[]*)")
names = dt["name"].astype(str)
names2 = dt["name.namePart"].astype(str)
author_matches = []
for name,name2 in zip(names,names2):
    name = unicodedata.normalize('NFD', name).encode('ascii', 'ignore').decode()
    match = extract_name.findall(name)
    if match:
        match = [m.split(" (")[0] for m in match]
        match = [re.sub(r'([a-z])\.$',r'\1', m) for m in match]
        author_matches.append('; '.join(match))
    else:
        if name2 != 'nan':
            match = extract_name2.findall(name2)
            if match:
                match = [m.rstrip().rstrip(",").rstrip("'").split(" (")[0] for m in match]
                match = [re.sub(r'([a-z])\.$',r'\1', m) for m in match]
                author_matches.append('; '.join(match))
            else:
                author_matches.append('')
        else:
            author_matches.append('')
            
# Reformat author names for publication string
def reformat_author(author_name):
    name_parts = author_name.split(", ")
    first_name = ""
    last_name = ""

    # Format name parts
    if len(name_parts) > 1:
        first_name = name_parts[1].strip()
        last_name = name_parts[0].strip()

        # Construct formatted name
        formatted_name = first_name + " " + last_name
    else:
        formatted_name = author_name

    return formatted_name

# PUBDATE : Extract pubdate from dt["originInfo"]; else dt["originInfo.dateIssued"]
extract_year = re.compile(r"dateIssued': {'@encoding': 'marc', '#text': '(\d+)")
extract_year2 = re.compile(r"text': '(\d+)")
years = dt["originInfo"].astype(str)
years2 = dt["originInfo.dateIssued"].astype(str)
date_matches = []
for year,year2 in zip(years,years2):
    match = extract_year.findall(year)
    if match:
        date_matches.append(match[0])
    else:
        if year2 != 'nan':
            match = extract_year2.findall(year2)
            if match:
                date_matches.append(match[0])
            else:
                date_matches.append('')
        else:
            date_matches.append('')
            
# TITLE : Extract title from dt["titleInfo"] --> {nonSort, title, subTitle, partNumber, partName};
#      else, extract from dt["titleInfo.X"]
extract_titles = [
    # t1 : nonsort + title + subtitle
    re.compile(r"nonSort\W+(\S*)\W+title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+subTitle\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t2 : nonsort + title + partNumber + partName
    re.compile(r"nonSort\W+(\S*)\W+title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partNumber\W+([\.\w\s\-\'\&\,\:\(\)]+)\W+partName\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t3 : nonsort + title + partNumber
    re.compile(r"nonSort\W+(\S*)\W+title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partNumber\W+([\.\w\s\-\'\&\,\:\(\)]+)\W+"),         
    # t4 : nonsort + title
    re.compile(r"nonSort\W+([\.\w\s\-\']+)\W+title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t5 : title + subtitle + partNumber + partName
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+subTitle\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partNumber\W+([\/\.\w\s\-\'\&\,\:\(\)]+)\W+partName\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t6: title + subtitle + partnumber
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+subTitle\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partNumber\W+([\/\.\w\s\-\'\&\,\:\(\)]+)\W+"),
    # t7 : title + subtitle + partName
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+subTitle\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partName\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t8 : title + subtitle
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+subTitle\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t9 : title + partNumber + partName
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partNumber\W+([\/\.\w\s\-\'\&\,\:\(\)]+)\W+partName\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t10 : title + partNumber
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partNumber\W+([\/\.\w\s\-\'\&\,\:\(\)]+)"),
    # t11 : title + partName
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)\W+partName\W+([\%\.\w\s\-\'\&\,\:\(\)]+)"),
    # t12 : title
    re.compile(r"title\W+([\%\.\w\s\-\'\&\,\:\(\)]+)")
    ]

titles = dt["titleInfo"].astype(str)
titleInfo_nonsorts = dt["titleInfo.nonSort"].astype(str)
titleInfo_titles = dt["titleInfo.title"].astype(str)
titleInfo_subtitles = dt["titleInfo.subTitle"].astype(str)
titleInfo_partNames = dt["titleInfo.partName"].astype(str)

if "titleInfo.partNumber" in dt.columns:
    titleInfo_partNumbers = dt["titleInfo.partNumber"].astype(str)
else:
    dt["titleInfo.partNumber"] = np.nan
    titleInfo_partNumbers = dt["titleInfo.partNumber"].astype(str)

title_matches = []
for title, nonsort, title_2, subtitle, partno, partnm in zip(titles, titleInfo_nonsorts, titleInfo_titles, titleInfo_subtitles, titleInfo_partNumbers, titleInfo_partNames):
    if title != 'nan':
        title = unicodedata.normalize('NFD', title).encode('ascii', 'ignore').decode()
        title = title.replace('"',"'")
        t1 = extract_titles[0].search(title)
        t2 = extract_titles[1].search(title)
        t3 = extract_titles[2].search(title)
        t4 = extract_titles[3].search(title)
        t5 = extract_titles[4].search(title)
        t6 = extract_titles[5].search(title)
        t7 = extract_titles[6].search(title)
        t8 = extract_titles[7].search(title)
        t9 = extract_titles[8].search(title)
        t10 = extract_titles[9].search(title)
        t11 = extract_titles[10].search(title)
        t12 = extract_titles[11].search(title)
        
        if t1:
            title_matches.append("%s %s: %s"%(t1.groups(0)[0].rstrip(" "),t1.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'"),t1.groups(0)[2].rstrip("'").rstrip(" ")))
        elif t2:
            title_matches.append("%s %s, %s; %s"%(t2.groups(0)[0].rstrip(" "),t2.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'"),t2.groups(0)[2].rstrip(" ").rstrip(",").rstrip("''"),t2.groups(0)[3].rstrip(",").rstrip("'").rstrip(" ")))
        elif t3:
            title_matches.append("%s %s, %s"%(t3.groups(0)[0].rstrip(" "),t3.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'"),t3.groups(0)[2].rstrip(" ").rstrip(",").rstrip("''")))
        elif t4:
            title_matches.append("%s %s"%(t4.groups(0)[0].rstrip(",").rstrip("'").rstrip(" "),t4.groups(0)[1].rstrip(",").rstrip("'").rstrip(" ")))
        elif t5:
            title_matches.append("%s: %s, %s; %s"%(t5.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t5.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'"),t5.groups(0)[2].rstrip(" ").rstrip(",").rstrip("'"),t5.groups(0)[3].rstrip(" ").rstrip(",").rstrip("'")))
        elif t6:
            title_matches.append("%s: %s, %s"%(t6.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t6.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'"),t6.groups(0)[2].rstrip(" ").rstrip(",").rstrip("'")))
        elif t7:
            title_matches.append("%s: %s, %s"%(t7.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t7.groups(0)[1].rstrip(",").rstrip("'").rstrip(" "),t7.groups(0)[2].rstrip(",").rstrip("'").rstrip(" ")))
        elif t8:
            title_matches.append("%s: %s"%(t8.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t8.groups(0)[1].rstrip(",").rstrip("'").rstrip(" ")))
        elif t9:
            title_matches.append("%s, %s; %s"%(t9.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t9.groups(0)[1].rstrip(" ").rstrip(",").rstrip("''"),t9.groups(0)[2].rstrip(",").rstrip("'").rstrip(" ")))
        elif t10:
            title_matches.append("%s, %s"%(t10.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t10.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'")))
        elif t11:
            title_matches.append("%s; %s"%(t11.groups(0)[0].rstrip(" ").rstrip(",").rstrip("'"),t11.groups(0)[1].rstrip(" ").rstrip(",").rstrip("'")))
        elif t12:
            title_matches.append("%s"%(t12.groups(0)[0].rstrip(",").rstrip("'").rstrip(" ")))
        else:
            title_matches.append('')
    else:
        sec_title_str = ''
        for sec_title in [nonsort, title_2, subtitle, partno, partnm]:
            if sec_title != "nan":
                if sec_title == title_2 and subtitle != 'nan':
                    sec_title_str += (sec_title.strip().replace('"',"'") + ': ')
                elif sec_title == title_2 and partno != 'nan' and partnm != 'nan':
                    sec_title_str += (sec_title.strip().replace('"',"'") + ', ' )
                elif sec_title == title_2 and partno != 'nan':
                    sec_title_str += (sec_title.strip().replace('"',"'") + ', ')
                elif sec_title == title_2 and partnm != 'nan':
                    sec_title_str += (sec_title.strip().replace('"',"'") + ', ')
                else:
                    sec_title_str += (sec_title.strip().replace('"',"'") + ' ')
        if sec_title_str:
            title_matches.append(sec_title_str.rstrip().replace('"',"'"))
        else:
            title_matches.append('')

# IDENTIFIERS : Extract identifiers from dt["identifier"]; else dt["identifier.#text"] 
extract_doi = re.compile(r"doi\W+#text\W+(\S*)\'")
extract_oclc = re.compile(r"oclc\W+#text\W+(\S*)\'")
extract_isbn = re.compile(r"isbn\W+[@invalid\W+yes\W+]*#text\W+([\w\-]*)")
idents = dt["identifier"].astype(str)
idents2 = dt["identifier.#text"].astype(str)

doi_matches = [extract_doi.findall(doi)[0] if doi != 'nan' and extract_doi.findall(doi) else '' for doi in idents]
           
oclc_matches = []
oclc_count = 0 
for oclc_id,oclc_id2 in zip(idents,idents2):
    found = False
    if oclc_id != 'nan':
        match = extract_oclc.findall(oclc_id)
        if match:
            oclc_count += 1
            found = True
            oclc_matches.append(match[0])
    if oclc_id2 != 'nan' and found == False:
        match = extract_oclc.findall(oclc_id2)
        if match:
            oclc_count += 1
            oclc_matches.append(oclc_id2)
        else:
            oclc_matches.append('')
    elif found == False:
        oclc_matches.append('')

isbn_matches = []
isbn_count = 0
for isbn_id,isbn_id2 in zip(idents,idents2):
    found = False
    if isbn_id != 'nan':
        match = extract_isbn.findall(isbn_id)
        if match:
            isbn_count += 1
            found = True
            isbn_matches.append(match[0])
    if isbn_id2 != 'nan' and found == False:
        match = extract_isbn.findall(isbn_id2)
        if match:
            isbn_count += 1
            isbn_matches.append(isbn_id2)
        else:
            isbn_matches.append('')
    elif found == False:
        isbn_matches.append('')

# PUBLICATION : Extract publisher & city from dt["originInfo"], else dt["originInfo.publisher"] 
extract_pubs = [re.compile(r"placeTerm\W+type\W+\w+\W+text\W+([\w\s\-\']+)"),
                re.compile(r"publisher\W+([\.\w\s\-\'\&]+)")]
pubs = dt["originInfo"].astype(str)
publishers = dt["originInfo.publisher"].astype(str)
pub_matches = []
for pub, publisher in zip(pubs, publishers):
    if pub != 'nan':
        pub = unicodedata.normalize('NFD', pub).encode('ascii', 'ignore').decode()
        match_pub = extract_pubs[0].search(pub)
        match_pub2 = extract_pubs[1].search(pub)
        if match_pub and match_pub2:
            pub_matches.append("%s: %s"%(match_pub.groups(0)[0].rstrip(" ").rstrip(":").rstrip(" ").rstrip(",").rstrip(";"),match_pub2.groups(0)[0].rstrip(" ").rstrip(":").rstrip(" ").rstrip(",").rstrip(";")))
        elif match_pub:
            pub_matches.append(match_pub.group(0)[0].rstrip(" ").rstrip(":").rstrip(" ").rstrip(",").rstrip(";"))
        elif match_pub2:
            pub_matches.append(match_pub2.group(0)[0].rstrip(" ").rstrip(":").rstrip(" ").rstrip(",").rstrip(";"))
    elif publisher != 'nan':
        publisher = unicodedata.normalize('NFD', publisher).encode('ascii', 'ignore').decode()
        pub_matches.append(publisher)
    else:
        pub_matches.append('')

# String together metdata, including OCLC and ISBNs, for publication field
publications = []
for title, author, pub, year, oclc, isbn in zip(title_matches, author_matches, pub_matches, date_matches, oclc_matches, isbn_matches):

    authors = [reformat_author(author_name) for author_name in author.split("; ")]
    authors_str = ", ".join(authors)  # Concatenate author names with a delimiter
    authors_count = len(authors)
    
    if authors_count > 9:
        info = f"{title}, by {authors[0]} et al., {year}."
        
    elif authors_count <= 9:
        info = f"{title}, by {authors_str}, {year}."
        
    p = f"{info} {pub}." if pub else ''
    o = f"OCLC: {oclc}." if oclc else ''
    b = f"ISBN: {isbn}." if isbn else ''
    publications.append(f"{p} {o} {b}".strip())

    
# ABSTRACT: Extract abstracts from dt["abstract.#text"]
extract_abs = re.compile(r".*")
abstracts = dt["abstract.#text"].astype(str)
abs_matches = []
for abstract in abstracts:
    if abstract != 'nan':
        match = extract_abs.findall(abstract)
        match = [m.rstrip("--").replace(u'\xa0', u' ') for m in match]
        abs_matches.append(match[0])
    else:
        abs_matches.append('')
        
# PROPERTIES: if doi exists, append doi; else append worldcat+ISBN, else worldcat+OCLC, else worldcat+title
links = []
for doi, oclc, isbn, title in zip(doi_matches, oclc_matches, isbn_matches, title_matches):
    if doi:
        links.append({"DOI":doi})
    elif oclc:
        links.append({"ELECTR": str("http://www.worldcat.org/oclc/"+str(oclc))})
    elif isbn:
        links.append({"ELECTR": str("http://www.worldcat.org/isbn/"+str(isbn))})
    else:
        links.append('')

# Exclude items already reviewed previously
excludes_file = "/Users/sao/Documents/Python-Projects/hollis_harvest/hollis_exclusions.txt"
with open(excludes_file, 'r') as excludes:
    exclusions = [e.rstrip("\n") for e in excludes if e]

# Compile metadata into records
records = []
for exclude, ident, author, year, title, pub, link, abstract in zip(exclusions, hollis_idents, author_matches, date_matches, title_matches, publications, links, abs_matches):
    if ident not in exclusions:
        if author and year and title:
                authors = author.split("; ")
                authors = unicodedata.normalize('NFD', author).encode('ascii', 'ignore').decode()
                title = unicodedata.normalize('NFD', title).encode('ascii', 'ignore').decode()
                records.append({"hollis_id":ident,
                                "authors":authors,
                                "pubdate":year,
                                "title":title,
                                "publication":pub,
                                "properties":link,
                                "abstract":abstract})
    
# Drop duplicates
df1 = pd.json_normalize(records)
df1 = df1.drop_duplicates(subset='hollis_id')
if "properties.DOI" in df1.columns:
    df1 = df1.drop_duplicates(subset=['title','properties.DOI'], keep='last')
df1 = df1.drop_duplicates(subset=['title','pubdate'], keep='last')

# Sort by title
df1 = df1.sort_values(by=['title'])

# After properties.ELECTR & properties.DOI have been created, I can drop "properties" column where no link exists
if "properties" in df1.columns:
    df1 = df1.drop(columns=["properties"])


## ADS Reference Service: Bibcode Matching

In [None]:
# Create reference strings from new records
list_for_REFS = []
for r, doi in zip(records, doi_matches):
    
    author = r['authors']
    year = r['pubdate']
    title = r['title']
    
    if author and year and title and doi:
        ref = {
            "refstr":"%s, %s, %s, %s"%(author, year, title, doi), 
            "authors":"%s"%author, 
            "year":"%s"%year, 
            "title": "%s"%title,
            "DOI": "%s"%doi
        }
    elif author and year and title:
        ref = {
            "refstr":"%s, %s, %s"%(author, year, title), 
            "authors":"%s"%author, 
            "year":"%s"%year, 
            "title": "%s"%title
        }
    list_for_REFS.append(json.dumps(ref))

# -- REFERENCE SERVICE -- #

# ADS Prod API Token
token = '<my token here>'
domain = 'https://api.adsabs.harvard.edu/v1/'

# Read my reference strings file and make a list called 'references'
def read_file(filename):
    references = []
    with open(filename, "r") as f:
        for line in f:
            references.append(line)
    return references

# Reference Service API request, querying my 'references' list
def resolve(references):
    payload = {'parsed_reference': references}
    response = requests.post(
        url = domain + 'reference/xml',
        headers = {'Authorization': 'Bearer ' + token,
                 'Content-Type': 'application/json',
                 'Accept':'application/json'},
        data = json.dumps(payload))
    if response.status_code == 200:
        return json.loads(response.content)['resolved'], 200
    else:
        print('From reference status_code is ', response.status_code)
    return None, response.status_code
        
# Read my reference strings file
# references = read_file("/Users/sao/Documents/Python-Projects/hollis_harvest/ref_list.txt")
references = list_for_REFS
references = [ref.replace("\n","") for ref in references]
references = [json.loads(ref) for ref in references]

# Resolve my references, results in 'total results' list
print("Querying %d references with the Reference Service ..."%len(list_for_REFS))
total_results = []
for i in range(0, len(references), 32):
    results, status = resolve(references[i:i+32])
    if results:
        total_results += results

# Retrieve ref results
bibcodes = []
bibcode_counter = 0
no_match_counter = 0
for r in total_results:   
    if r['bibcode']!='...................':
        bibcodes.append(r['refstring'] + "\t" + r['bibcode'] + "\t" + r['score'])
        bibcode_counter += 1
    else:
        bibcodes.append(r['refstring'] + "\t\t\t" + r.get('comment', ''))
        no_match_counter += 1

# Drop duplicates, and sort by score, then comment
df2 = pd.DataFrame(line.split("\t") for line in bibcodes if line)
df2.columns = ['refstring','bibcode','score','comment']
df2 = df2.drop_duplicates(subset='refstring')
df2 = df2.sort_values(by=['score','comment'],ascending=False)

# Append empty results to new list for ingest
to_ingest = []
for record, result in zip(records, total_results):
    if result['bibcode'] == '...................':
        to_ingest.append(record)

# Drop duplicates, and sort by title
df3 = pd.json_normalize(to_ingest)
df3 = df3.drop_duplicates(subset='hollis_id')
df3 = df3.sort_values(by=['title'])

## Results Summary

In [None]:
# Save results to an excel file with multiple sheets
outfile = date + classification + "_review.xlsx"
with pd.ExcelWriter(filepath + outfile) as writer:
    df1.to_excel(writer, sheet_name='hollis_results', index=False)
    df2.to_excel(writer, sheet_name='ref_results', index=False)
    df3.to_excel(writer, sheet_name='ingest_new', index=False)

# Print summary
print(
    date, classification, 'RESULTS SUMMARY\n\n'
    'HOLLIS API Query Parameters:\n',
    query_params,'\n\n',
    '--> HOLLIS records generated:',len(references),'saved to',outfile,'\n',
    '--> Records matched (ADS):',bibcode_counter,'saved to',outfile,'\n',
    '--> Records for ingest:',len(to_ingest),'saved to',outfile,
     )