### Import modules

In [3]:
import datetime
import json
import hashlib
import requests
import pandas as pd

from lxml.etree import ElementTree, tostring, iterwalk, fromstring
from lxml.builder import E

### Define functions for xmls creation

In [4]:
def create_person_metadata(row):
    if row['ORCID']: orcid = 'https://orcid.org/' + row['ORCID']
    else: orcid = ''
    return E.person_name(
                            E.given_name(row['given_name']),
                            E.surname(row['surname']),
                            E.ORCID(orcid),
                            sequence=row['sequence'],
                            contributor_role=row['contributor_role']
                        )

def create_journal_metadata(row):
    return E.journal_metadata(
                                E.full_title(row['full_title']),
                                E.abbrev_title(row['abbrev_title']),
                                E.issn(row['issn'], media_type='print'),
                                E.doi_data(
                                    E.doi(row['doi']),
                                    E.resource(row['resource'])
                                    )
                                )

def create_issue_metadata(row):
    return E.journal_issue(
                            E.publication_date(
                                               E.year(row['year']),
                                               media_type='print'
                                               ),
                            E.publication_date(
                                                E.year(row['year']),
                                                media_type='online'
                                               ),
                            E.journal_volume(
                                E.volume(row['volume'])
                                ),
                            E.doi_data(
                                E.doi(row['doi.1']),
                                E.resource(row['resource.1'])
                                )
                            )

def create_article_metadata(row):
    return E.journal_article(
                            E.titles(
                                E.title(row['title'])
                                ),
                            E.contributors(
                                create_person_metadata(row)
                                ),
                            E.publication_date(
                                E.year(row['year']), 
                                media_type='print'),
                            E.publication_date(
                                E.year(row['year']), 
                                media_type='online'),
                            E.pages(
                                E.first_page(row['first_page']),
                                E.last_page(row['last_page'])
                                ),
                            E.doi_data(
                                E.doi(row['doi.2']),
                                E.resource(row['resource.2'])
                                ),
                            publication_type='full_text'
                            )

# recursively remove empty tags
def remove_empty_tags(xml_file):
    
    def recursively_empty(e):
       if e.text:
           return False
       return all((recursively_empty(c) for c in e.iterchildren()))
   
    context = iterwalk(xml_file)
    for action, elem in context:
        parent = elem.getparent()
        if recursively_empty(elem):
            parent.remove(elem)
            
def create_timestamp():
    # createss 17 digit timestamp in format (year,month,day,hours,minutes,seconds,microseconds)
    dt = datetime.datetime.utcnow()
    timestamp = '%s%03d'%(dt.strftime("%Y%m%d%H%M%S"), int(dt.microsecond/1000))
    return timestamp

### Load input data

In [6]:
df = pd.read_excel('input_data/napis_rcin_oai.xlsx').fillna('').astype(str)

### Prepare journal issues dictionary

In [None]:
journal_xmls = {}
for index, row in df.iterrows():
    key = row['year'] + '-' + row['volume']
    if key in journal_xmls:
        for item in journal_xmls[key].findall('journal_article'):
           if row['doi.2'] == item.find('doi_data').find('doi').text:
               item.find('contributors').append(create_person_metadata(row))
               break
        else:
            journal_xmls[key].append(create_article_metadata(row))
    else:
        journal_xmls[key] = E.journal(
                                    create_journal_metadata(row),
                                    create_issue_metadata(row),
                                    create_article_metadata(row)
                                    )

### Create issues xml files and send to Crossref deposit page

In [None]:
for key, value in journal_xmls.items():
    
    output_body = E.body()
    output_body.append(value)                                

    remove_empty_tags(output_body)
    timestamp = create_timestamp()     
    batch_id = hashlib.md5(tostring(output_body) + bytes(timestamp, encoding='ascii')).hexdigest()
                                 
    output_head = E.head(
                        # fill empty strings
                        E.doi_batch_id(batch_id),
                      	E.timestamp(timestamp),
                      		E.depositor( 
                      			E.depositor_name(''),
                      			E.email_address('')
                      		),
                      	E.registrant('')
                        )
    
    # schema declaration
    doi_batch_string = '<doi_batch version="4.4.2" xmlns="http://www.crossref.org/schema/4.4.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:jats="http://www.ncbi.nlm.nih.gov/JATS1" xsi:schemaLocation="http://www.crossref.org/schema/4.4.2 http://www.crossref.org/schema/deposit/crossref4.4.2.xsd"></doi_batch>'
    output_xml = fromstring(doi_batch_string)
    
    output_xml.append(output_head)
    output_xml.append(output_body)
    to_save = ElementTree(output_xml)
    path = r".\output_xmls\{batch_id}.xml".format(batch_id=batch_id)
    to_save.write(path, xml_declaration=True, pretty_print=True)
    with open(path, "r") as f:
        data = f.read()

    with open(path, "w") as f:
        f.write(data.replace('ASCII', 'UTF-8'))

    # sending files
    # load credentials
    with open('credentials.json', "r") as f:
        creds = json.load(f)
        params = {'operation':'doMDUpload',
                  'login_id': creds['login_id'],
                  'login_passwd': creds['login_passwd'],
                  'fanme':'@'+f'{batch_id}.xml'}
        
    #!!!!!!! check url !!!!!!!        
    test_url = 'https://test.crossref.org/servlet/deposit'
    main_url = 'https://doi.crossref.org/servlet/deposit'
    
    with open(path, 'rb') as file:
        files = {f'{batch_id}': file}
        # check url
        r = requests.post(test_url, files = files, data = params)
        