In [1]:
import os
import re
from pandas.api.types import CategoricalDtype
import numpy as np
import pandas as pd
import math
import csv
import json
from unidecode import unidecode

# Read Proceedings Information
Update the `proceedingsInfo.csv` file according to your conference and the filed downloaded from PCS.

In [2]:
dfVenues = pd.read_csv("proceedingsInfo.csv")
dfVenues.head()

Unnamed: 0,Name,Code,File,NameCommittee,NameReviewers,Prefix,Order,UseQOALASessions
0,Paper,paper,tp23a,Associated Chairs,Reviewers,pn,1,True
1,Late-Breaking Work,lbw,tp23b,Associated Chairs,Reviewers,lbw,2,False
2,Demonstration,demo,tp23c,Jury Member,Reviewers,demo,3,False
3,Doctoral Consortium,dc,tp23d,Jury Member,Reviewers,dc,4,False
4,Workshop,ws,tp23e,Jury Member,Reviewers,ws,5,False


# Generate Committee Files

In [3]:
def getCommittee(venueCode):
    path = f"./data-PCS/{venueCode}_committee.csv"
    if (not os.path.isfile(path)):
        print(f"{venueCode} has no committee file")
        return []
    
    df = pd.read_csv(path)
    df = df[df["Reviews assigned"] != 0]
    
    if (len(df) == 0):
        print(f"{venueCode} has no reviewers")
        return []

    lstText = []
    df["Family name"] = df["Family name"].str.title()
    df["First name"] = df["First name"].str.title()
    #df["Middle name"] = df["Middle name"].str.title()
    df = df.sort_values(["Family name", "First name", "Middle initial"])

    for i, e in df.iterrows():
        for i in range (1,7):
            if isinstance(e[f"Affil {i} Institution"], str):
                break
        aff = ""
        if isinstance(e[f"Affil {i} Institution"], str):
            aff = f'{e[f"Affil {i} Institution"]}, {e[f"Affil {i} Country"]}'

        aff = aff.replace("&", "\\&")
        if isinstance(e["Middle initial"], str):
            lstText.append(f'{e["First name"]} {e["Middle initial"]} {e["Family name"]}, \\emph{{{aff}}}\\\\')
        else:
            lstText.append(f'{e["First name"]} {e["Family name"]}, \\emph{{{aff}}}\\\\')

    if (len(df) > 50):   
        lstText.insert(0, "\\begin{multicols}{2}")
        lstText.append("\end{multicols}")
    else:
        
        lstText.insert(0, "%\\begin{multicols}{2}")
        lstText.append("%\end{multicols}")
    lstText.append("")
    return lstText

def getReviews(venueCode):
    path = f"./data-PCS/{venueCode}_reviewers.csv"
    if (not os.path.isfile(path)):
        print(f"{venueCode} has no reviwer file")
        return []
    
    df = pd.read_csv(path)
    df = df[df["Reviews assigned"] != 0]
    
    if (len(df) == 0):
        print(f"{venueCode} has no reviewers")
        return []
    
    # Ensure that all names start with a capital first latter.
    df["Family name"] = df["Family name"].apply(lambda x: x[0].title()+x[1:] if len(x)>2 else x)
    df["First name"] = df["First name"].apply(lambda x: x[0].title()+x[1:] if len(x)>2 else x)
    if "Middle name"in df.columns:
        df["Middle name"] = df["Middle name"].apply(lambda x: x[0].title()+x[1:] if len(x)>2 else x)
    
    df = df.sort_values(["Family name", "First name", "Middle initial"])
    lstText = []
    lstText.append("\\begin{multicols}{3}")
    for i, e in df.iterrows():
        if isinstance(e["Middle initial"], str):
            lstText.append(f'{e["First name"]} {e["Middle initial"]} {e["Family name"]}\\\\')
        else:
            lstText.append(f'{e["First name"]} {e["Family name"]}\\\\')
    lstText.append("\\end{multicols}")
    lstText.append("")
    return lstText

In [4]:
for i, e in dfVenues.iterrows():
    lstExport = []
    lstExport.append("% If this venue/track has subcommittees, you might want to split the \subsection{Committee Member} into different \subsubsections for the different committees.")
    lstExport.append("")    
    lstExport.append(f"\\subsection{{{e.Name} Chairs}}")
    lstExport.append("First Chair Full Name, \emph{Affiliation, Country}\\\\")
    lstExport.append("Second Chair Full Name, \emph{Affiliation, Country}")
    lstExport.append("")
    lstExport.append(f"%\subsection*{{Assistants to {e.Name} Chairs}}")
    lstExport.append("%First Assistants Full Name, \emph{Affiliation}\\\\")
    lstExport.append("%Second Assistants Full Name, \emph{Affiliation}")
    lstExport.append("")
    lstExport.append("")
    
    commitee = getCommittee(e.File)
    if (len(commitee) > 0):
        lstExport.append(f"\subsection{{{e.NameCommittee}}}")
        lstExport.extend(commitee)

    reviewers = getReviews(e.File)
    if (len(reviewers) > 0):
        lstExport.append(f"\subsection{{{e.NameReviewers}}}")
        lstExport.extend(reviewers)
    
    if (len(lstExport) > 0):
        with open(f'committee/committee-{e.Code}.tex', 'w') as fp:
            fp.write('\n'.join(lstExport))
            

tp23c has no reviwer file
tp23d has no reviwer file
tp23e has no reviwer file


# Loading ACM E-Rights CSV File
To get the CSV, navigate to https://cms.acm.org/cms_proceeding_papers_public.cfm?proceedingID=YOURPROCEEDINGSID&confID=YOURCONFERENCEID, at the top left press the `Create CSV` button and copy-and-pasted the content from the new window into the `export.csv` in the `data-erights` folder.

In [5]:
file1 = open("./data-erights/export.csv", 'r')
lines = file1.readlines()
ids = []
lstLines = []
for x in lines[1:]:
    e = x[1:].split('","')

    if (not("WITHDRAWN" in x)):
        lstLines.append(e)
        ids.append(x.split(",")[0].replace('"', ""))
    else:
        print("WITHDRAWN", x.split(",")[0].replace('"', ""))
    
dfACM = pd.DataFrame(lstLines)
dfACM.columns = ["ID","Title","Author","Email","DL Paper Type","Rights Granted","Third Party","Aux. Material","Video Recording","Artistic Images","Govt. Employees","Open Access","DOI","Authorizer","Statement","CC License","Non-ACM Copyright"]
dfACM['Email'] = dfACM.Email.apply(lambda x: x.split(" ")[0])
dfACM['Signed'] = dfACM['Non-ACM Copyright'].apply(lambda x: x[:-3]) != "load For"
dfACM.Title = dfACM.Title.apply(lambda x: x.split(" \\setcopyright{")[0])
dfACM["Prefix"] = dfACM.ID.apply(lambda x: re.sub(r'[0-9]', '', x))

dfACM["TitleRaw"] = dfACM.Title.str.replace('"', '')
dfACM["TitleRaw"] = dfACM.TitleRaw.str.replace('[', '', regex=False)

## Might needs to be applied to fix LaTex issues.
#dfACM["TitleRaw"] = dfACM.TitleRaw.str.replace('`', '', regex=False)
#dfACM["TitleRaw"] = dfACM.TitleRaw.str.replace('“', '', regex=False)
#dfACM["TitleRaw"] = dfACM.TitleRaw.str.replace("'", '', regex=False)
#dfACM["TitleRaw"] = dfACM.TitleRaw.str.replace('(', '', regex=False)
#dfACM["TitleRaw"] = dfACM.TitleRaw.str.replace('#', '', regex=False)
#dfACM["TitleRaw"] = dfACM.TitleRaw.str.lower()

# Remove duplicates, this is possible when the contact authors can not sign the copyright for all authors.
dfACM = dfACM.drop_duplicates("ID") 

dfACM = dfACM[["ID", "Prefix", "Title", "Author", "DOI"]]

#dfACM.head()

In [6]:
df = pd.merge(dfACM, dfVenues[["Prefix", "Name", "Code"]], on="Prefix")

myOrder = CategoricalDtype(
    dfVenues.sort_values("Order").Prefix.to_list(), 
    ordered=True
)

df.Prefix = df.Prefix.astype(myOrder)
df = df.sort_values(["Prefix", "Title"])

#df.head()

# Load Session Data From QOALA
QOALA is the SIGCHI tool to schedule conferences, see https://services.sigchi.org/qoala. Exort the session data from QOALA as `.json` file.

In [7]:
with open("./data-QOALA/export.json", 'r') as f:
    qoala = json.load(f)
    
if (qoala["schemeVersion"] != 7):
    print("WARNING: This script was not tested with the QOALA expert scheme version. It might not fully working.")
    
dfQPapers = pd.DataFrame(qoala["contents"])

if len(dfQPapers) > 0:

    dfX = dfQPapers[dfQPapers.sessionIds.apply(lambda x: len(x) > 1)]
    if len (dfX) > 0:
        print(f'WARNING: The following papers are in more than one session: {dfX.importedId.to_list()}')

    dfQPapers.sessionIds = dfQPapers.sessionIds.apply(lambda x: x[0])

    dfQSessions = pd.DataFrame(qoala["sessions"])
    dfQSessions = dfQSessions.rename(columns={"id":"sessionId", "name":"SessionName"})

    dfQoala = pd.merge(dfQPapers, dfQSessions[["sessionId", "SessionName"]], left_on="sessionIds", right_on="sessionId", how="left")[["importedId", "title", "SessionName"]]
    dfQoala["File"] = dfQoala.importedId.apply(lambda x: x.split("-")[0])
    dfQoala["IdRaw"] = dfQoala.importedId.apply(lambda x: x.split("-")[1])

    myMap = dfVenues.set_index("File")["Prefix"].to_dict()
    dfQoala["Prefix"] = dfQoala.File.map(myMap)

    dfQoala["ID"] = dfQoala["Prefix"] + dfQoala["IdRaw"]
    dfQoala.head()
else:
    print("WARNING: QOALA data for the papers is not available.")

In [8]:
if ("SessionName" in df.columns):
    del df["SessionName"]
df = pd.merge(df, dfQoala[["ID", "SessionName"]], on="ID", how="outer")

for i, e in dfVenues.iterrows():
    if (not e.UseQOALASessions):
        df.loc[df.Prefix == e.Prefix, "SessionName"] = e.Name
#df.head()

# Generate the TOC

In [9]:
lstExport = []
lstExport.append("%% This file lists all items that are in the proceedings in the ACM DL. This again varies depending on if you have a companion proceedings or not.")
lstExport.append("%% Note: For conferences that will publish the full papers in PACMHCI, then the full papers are not to be listed here.")
lstExport.append("")
lastPrefix = ""
counterTOC = 1

tapsTOC=[]
lstAuthorIndex = []
lstDetails = []
for j, f in dfVenues.sort_values("Order").iterrows():
    dfTrack = df[df.Prefix == f.Prefix]
    
    counter = 1
    lastSessionName = ""
    
    lstExport.append(f'\\subsection{{{f.Name}}}')
    
    if (f.UseQOALASessions):
        dfTrack = dfTrack.sort_values(["SessionName", "Name"])
    else: 
        dfTrack = dfTrack.sort_values(["Name"])
    
    for i, e in dfTrack.iterrows():
        
        if (lastSessionName != e.SessionName) & (f.UseQOALASessions):
            lastSessionName = e.SessionName
            if (counter != 1):
                lstExport.pop()
                lstExport.append(f'\\end{{enumerate}}')
                lstExport.append("")
            lstExport.append(f'\\subsubsection{{{e.SessionName}}}')
            lstExport.append(f'\\begin{{enumerate}}')
        elif (counter == 1):
            lstExport.append(f'\\begin{{enumerate}}')
        
        lstExport.append(f'%PCS ID: {e.ID}')
        TOC_ID = f"{e.Code.upper()}{counter:03}"
        lstExport.append(f'\\item[\\href{{{e.DOI}}}{{\\textbf{{{TOC_ID}}}}}]')

        xx = f'\\href{{{e.DOI}}}{{\\textbf{{{e.Title}}}}}\\\\'
        lstExport.append(xx.replace("&", "\\&").replace("#", "\\#"))
        for x in e.Author.split(";"):
            x = x.split(":")
            xx = f"{x[0]}, \\emph{{({x[1]})}}\\\\"
            lstAuthorIndex.append([x[0], TOC_ID])
            lstExport.append(xx.replace("&", "\\&").replace("#", "\\#").replace(",", ", ").replace("  ", " "))

        lstExport.append("")

        counter = counter + 1
        counterTOC = counterTOC + 1

        lstDetails.append({"ID": e.ID, "TOC_ID":TOC_ID, "Order":counterTOC})
    
    lstExport.pop()
    lstExport.append(f'\\end{{enumerate}}')
    lstExport.append("")
    lstExport.append("")

if (len(lstExport) > 0):
    with open(f'./content/content.tex', 'w') as fp:
        fp.write('\n'.join(lstExport))
        

# Generate the TOC for APTARA
This puts the TOC entries into "sessions." Sessions are the structure ACM used in the ACM DL; for example, see the structure here on the left side: https://dl.acm.org/doi/proceedings/10.1145/3544548
The list is to be sent via email to APTARA so they can sort them accordingly and prepare for the ACM upload.

In [10]:
dfAptaraExport = pd.merge(df, pd.DataFrame(lstDetails), on="ID")
dfAptaraExport = dfAptaraExport.rename(columns={"Name": "SessionName"})
dfAptaraExport = dfAptaraExport[["Order", "ID", "TOC_ID", "Title", "SessionName"]]
dfAptaraExport.head()
dfAptaraExport.to_csv("./export/TOC-for-APTARA.csv", index=False)

# Generate Author Index for the Back Matter

In [11]:
dfAI = pd.DataFrame(lstAuthorIndex)
dfAI.columns = ["Name", "Submission"]
dfAI = dfAI.groupby("Name").Submission.apply(lambda x: ", ".join(x))
dfAI = dfAI.reset_index()
dfAI["NameRaw"] = dfAI.Name.str.replace('"', '')
dfAI["NameRaw"] = dfAI.NameRaw.apply(lambda x: unidecode(x))
dfAI["NameRaw"] = dfAI.NameRaw.str.replace('[', '', regex=False)
dfAI["NameRaw"] = dfAI.NameRaw.str.lower()
dfAI = dfAI.sort_values("NameRaw")

with open(f'./content/index.tex', 'w') as fp:
    fp.write('\\begin{multicols}{2}\n')
    for i, e in dfAI.iterrows():
        fp.write(f'{e.Name} \dotfill {e.Submission}\\\\\n')

    fp.write('\\end{multicols}')