In [None]:
import pandas as pd
from lxml import etree
from datetime import datetime

# MANUAL VARIABLES

Check this before the run.

In [None]:
DATA_FILE_NAME = "eksig25"
BATCH_ID_NAME = DATA_FILE_NAME

START_DATE = "2025-05-12"
END_DATE = "2025-05-13"

CONF_NAME = "EKSIG 2025: DATA AS EXPERIENTIAL KNOWLEDGE AND EMBODIED PROCESSES"
CONF_ACRONYM = "EKSIG2025"
PROCEEDINGS_TITLE = CONF_NAME

# ISBN = "9781912294008"
ISBN = ""

CONF_VOLUME_DOI = "10.21606/eksig2025.cv"
CONF_VOLUME_URL = "https://dl.designresearchsociety.org/conference-volumes/67/"

MAX_AUTHORS = 9 # You set this by checking the CSV file to see what's the most number of authors a paper has.



# DO NOT TOUCH THESE
today = datetime.now()

SUBMISSION_TIMESTAMP = today.strftime("%Y%m%d%H%M%S") + "0000"

# UTILITY FUNCTIONS

Functions for running and generating code for use.

In [None]:
def view_xml_result():
    """check the state of XML at any point"""
    # Pretty-print the XML
    xml_string = etree.tostring(doi_batch, pretty_print=True, xml_declaration=True, encoding="utf-8").decode("utf-8")

    # Print the formatted XML
    print(xml_string)


def extract_date_parts(datetime_str):
    year, month, day = datetime_str.split("-")
    return [day, month, year]

def make_body_structure(_root):
    # event_metadata contains info about the particular proceeding
    event_metadata = etree.SubElement(_root, "event_metadata")
    conference_name = etree.SubElement(event_metadata, "conference_name").text = CONF_NAME
    conference_acronym = etree.SubElement(event_metadata, "conference_acronym").text = CONF_ACRONYM

    # conference_date is something we set.
    conference_date = etree.SubElement(event_metadata, "conference_date")
    conference_date.text = conf_dates_str
    attributes = ["day", "month", "year"]

    for attr, start_value, end_value in zip(attributes, start_date_parts, end_date_parts):
        conference_date.set(f"start_{attr}", start_value)
        conference_date.set(f"end_{attr}", end_value)

    # proceedings_metadata keeps info about the series.
    proceedings_metadata = etree.SubElement(conference, "proceedings_metadata")
    proceedings_metadata.set("language", "en")

    # proceedings_title
    proceedings_title = etree.SubElement(proceedings_metadata, "proceedings_title").text = CONF_NAME

    publisher = etree.SubElement(proceedings_metadata, "publisher")
    publisher_name = etree.SubElement(publisher, "publisher_name").text = "Design Research Society"


    # publication_date > month, day, year
    publication_date = etree.SubElement(proceedings_metadata, "publication_date")
    publication_date.set("media_type", "online")
    publication_month = etree.SubElement(publication_date, "month").text = start_date_parts[1]
    publication_day = etree.SubElement(publication_date, "day").text = start_date_parts[0]
    publication_year = etree.SubElement(publication_date, "year").text = start_date_parts[2]

    if len(ISBN) > 0:
        isbn = etree.SubElement(proceedings_metadata, "isbn").text = ISBN
    else:
        isbn = etree.SubElement(proceedings_metadata, "noisbn")
        isbn.set("reason", "simple_series")

    doi_data = etree.SubElement(proceedings_metadata, "doi_data")
    doi = etree.SubElement(doi_data, "doi").text = CONF_VOLUME_DOI
    resource = etree.SubElement(doi_data, "resource").text = CONF_VOLUME_URL




def generate_papers(detailed_info=False):
    for index, row in df_core.iterrows():
        print("-----\n")
        print(f"Working on row {index}: {row['title']}")

        if detailed_info:
            print("Row details:")
            for col in row.index:
                print(f"  {col}: {row[col]}")

        # Create the right XML
        conference_paper = etree.SubElement(conference, "conference_paper")
        conference_paper.set("language", "en")
        conference_paper.set("publication_type", "full_text")

        #contributors
        contributors = etree.SubElement(conference_paper, "contributors")
        #titles
        titles = etree.SubElement(conference_paper, "titles")
        title = etree.SubElement(titles, "title")
        title.text = row["title"]

        #publication_date
        publication_date = etree.SubElement(conference_paper, "publication_date")
        publication_date.set("media_type", "online")
        publication_month = etree.SubElement(publication_date, "month").text = start_date_parts[1]
        publication_day = etree.SubElement(publication_date, "day").text = start_date_parts[0]
        publication_year = etree.SubElement(publication_date, "year").text = start_date_parts[2]

        #doi_data
        doi_data = etree.SubElement(conference_paper, "doi_data")
        doi = etree.SubElement(doi_data, "doi").text = row["doi"]
        resource = etree.SubElement(doi_data, "resource").text = row["calc_url"]


        # Populate contributors
        # add author1
        author1 = etree.SubElement(contributors, "person_name")
        author1.set("sequence", "first")
        author1.set("contributor_role", "author")
        author1_given_name = etree.SubElement(author1, "given_name").text = row["author1_fname"].strip()
        author1_surname = etree.SubElement(author1, "surname").text = row["author1_lname"].strip()
        author1_affiliation = etree.SubElement(author1, "affiliations")
        author1_institution = etree.SubElement(author1_affiliation, "institution")
        author1_institution_name = etree.SubElement(author1_institution, "institution_name").text = row["author1_institution"].strip()


        # add additional authors
        for x in range(2, MAX_AUTHORS+1):
            fname = row[f"author{x}_fname"].strip()

            if not fname:  # Check if fname is an empty string
                break

            author = etree.SubElement(contributors, "person_name")
            author.set("sequence", "additional")
            author.set("contributor_role", "author")
            given_name = etree.SubElement(author, "given_name").text = fname
            surname = etree.SubElement(author, "surname").text = row[f"author{x}_lname"].strip()
            affiliation = etree.SubElement(author, "affiliations")
            institution = etree.SubElement(affiliation, "institution")
            institution_name = etree.SubElement(institution, "institution_name").text = row[f"author{x}_institution"].strip()


    print("====\n ALL DONE")


# GENERATE ROOT

In [None]:
# Define namespaces without 'xmlns:' in the keys
namespaces = {
    "xsi": "http://www.w3.org/2001/XMLSchema-instance",
    None: "http://www.crossref.org/schema/5.3.0",  # Default namespace
    "jats": "http://www.ncbi.nlm.nih.gov/JATS1",
    "fr": "http://www.crossref.org/fundref.xsd",
    "mml": "http://www.w3.org/1998/Math/MathML",
}

# Create the root element with namespaces
doi_batch = etree.Element("doi_batch", nsmap=namespaces)

doi_batch.set("version", "5.3.0")

doi_batch.set(
    "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation",
    "http://www.crossref.org/schema/5.3.0 https://www.crossref.org/schemas/crossref5.3.0.xsd"
)

# Add head and body
head = etree.SubElement(doi_batch, "head")
body = etree.SubElement(doi_batch, "body")

# POPULATE HEAD


In [None]:
today_short = today.strftime("%y%m%d")

doi_batch_id = etree.SubElement(head, "doi_batch_id").text = BATCH_ID_NAME + "__" + today_short
timestamp = etree.SubElement(head, "timestamp").text = SUBMISSION_TIMESTAMP

depositor = etree.SubElement(head, "depositor")
depositor_name = etree.SubElement(depositor, "depositor_name").text = "desres:desres"
email_address = etree.SubElement(depositor, "email_address").text = ("dl@designresearchsociety.org")

registrant = etree.SubElement(head, "registrant").text = "Digital Library"

In [None]:
conf_dates_str = str(pd.read_excel(DATA_FILE_NAME + ".xls", usecols=["conference_dates"]).iloc[0, 0] or '')


start_date_parts = extract_date_parts(START_DATE)
end_date_parts = extract_date_parts(END_DATE)



In [None]:
# Clear the body before running this to make sure we don't have duplicates
body.clear()


conference = etree.SubElement(body, "conference")
make_body_structure(conference)
view_xml_result()

<?xml version='1.0' encoding='utf-8'?>
<doi_batch xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.crossref.org/schema/5.3.0" xmlns:jats="http://www.ncbi.nlm.nih.gov/JATS1" xmlns:fr="http://www.crossref.org/fundref.xsd" xmlns:mml="http://www.w3.org/1998/Math/MathML" version="5.3.0" xsi:schemaLocation="http://www.crossref.org/schema/5.3.0 https://www.crossref.org/schemas/crossref5.3.0.xsd">
  <head>
    <doi_batch_id>eksig25__250510</doi_batch_id>
    <timestamp>202505102131290000</timestamp>
    <depositor>
      <depositor_name>desres:desres</depositor_name>
      <email_address>dl@designresearchsociety.org</email_address>
    </depositor>
    <registrant>Digital Library</registrant>
  </head>
  <body>
    <conference>
      <event_metadata>
        <conference_name>EKSIG 2025: DATA AS EXPERIENTIAL KNOWLEDGE AND EMBODIED PROCESSES</conference_name>
        <conference_acronym>EKSIG2025</conference_acronym>
        <conference_date start_day="12" end_day="13" sta

# LOAD COLUMNS

In [None]:
df = pd.read_excel(DATA_FILE_NAME+".xls")

content_cols = ["title", "calc_url", "doi"]

for x in range(1, MAX_AUTHORS+1):
    for y in ["fname", "lname", "institution"]:
        content_cols.append(f"author{x}_{y}")

df_core = df[content_cols]
df_core = df_core.fillna('').astype(str)
# df_core.head(10)
# df_core.columns

# CODE FOR GENERATING CONFERENCE PAPERS

In [None]:
body.clear()
# conference is the container for everything in body
conference = etree.SubElement(body, "conference")
make_body_structure(conference)
generate_papers()
# view_xml_result()

-----

Working on row 0: Which Interaction? Mapping Feedback and Feedforward in Data Physicalization(s)
-----

Working on row 1: An Intuitive Approach to Enstoriment during Demonstrator Design Process: A Pilot Case Study
-----

Working on row 2: Collaborative Theorizing with Energy Data
-----

Working on row 3: How Do Design Students Use Data in Physicalization Courses?
-----

Working on row 4: Craftsman Gaze Data Study to Enhance Craft Material Selection and Facilitate Knowledge Transfer
-----

Working on row 5: Materializing More-than-Human Data through Mapping Wools
-----

Working on row 6: Materializing Data: A Macramé-Inspired Framework for Evaluating the Effectiveness of Creative Participatory Research
-----

Working on row 7: Memories as Materialized Data for Designing Emotionally Durable Handbags
-----

Working on row 8: Embodied Data: Gleaning Narratives of Living with Chronic Gut Diseases to Guide the Design Process
-----

Working on row 9: Body Maps as A Source of Women’s Em

# SAVE XML

In [None]:
xml_string = etree.tostring(doi_batch, pretty_print=True, xml_declaration=True, encoding="utf-8").decode("utf-8")

output_file = f"{DATA_FILE_NAME}.xml"
# Open the file in write mode and save the XML string
with open(output_file, "w", encoding="utf-8") as f:
    f.write(xml_string)

print(f"XML has been saved to {output_file}")

XML has been saved to eksig25.xml
