# Process LhARA author and institute data

Read the author and institute files.  

If have problems with characters ï»¿ appearing at beginning of files, make sure they are saved as `CSV (Comma delimited) *.csv)`, not `CSV UTF-8 (Comma delimited) *.csv)`. This removes the Byte Order Mark (BOM) which is what the weird characters are!

In [3]:
import numpy as np
import pandas as pd
#
author_file = "LhARA-authors-Dev.csv" 
#
with open(author_file, 'r') as f:
    #
    author_df = pd.read_csv(f, index_col = None)
#
author_df = author_df.sort_values(by=['Surname'])
author_df[author_df['Surname'] == "Hardiman"]

Unnamed: 0,First name,Surname,Initials,Institute A,Institute B,Institute C,Institute D,Notes
31,Claire,Hardiman,C.,IC-RadPhys,,,,


In [5]:
institute_file = "LhARA-institutes-Dev.csv" 
#
with open(institute_file, 'r') as f:
    #
    institute_df = pd.read_csv(f, index_col = 0)
institute_df

Unnamed: 0_level_0,Address,Notes
Institute key,Unnamed: 1_level_1,Unnamed: 2_level_1
Berk,"Lawrence Berkeley National Laboratory, 1~Cyclo...",
Birm-Cancer,"Department of Cancer and Genomic Sciences, Col...",
Birm-MedPhys,"Department of Medical Physics, University Hosp...",
Birm-Phys,"School of Physics and Astronomy, University of...",
CERN-DG,"DG Unit, CERN, CH-1211 Geneva~23, Switzerland.",
CI,"Cockcroft Institute, Daresbury Laboratory, Sci...",
CLF,"Central Laser Facility, STFC Rutherford Applet...",
Corerain,"Corerain Technologies, 14F,~Changfu Jinmao Bui...",
Curie-Orsay,"Institut Curie-Orsay Research Center, Bat a Ca...",
Curie-Paris,"Institut Curie, Universit\'e PSL, CNRS UMR3347...",


Make author list in tex format.

In [8]:
Debug = False
#
n_authors = len(author_df)
#
author_list = [] 
#
# Array containing numbers of first, second, third etc. institutes for each author 
institute_arr = np.zeros((n_authors, 4)).astype(int)
n_inst_arr = np.zeros(n_authors).astype(int)
#
# Dictionary linking institute keys to institute numbers
institute_dict = {}
#
inst_number = 1
for na in range(0, n_authors):
    author_list.append(author_df.loc[na, "Initials"] + "\\," + author_df.loc[na, "Surname"])
    #
    n_inst_arr[na] = 0
    for inst in author_df.loc[na, ["Institute A", "Institute B", 
                                   "Institute C", "Institute D"]].dropna():
        #
        # If institute number already created, use it.
        if  inst in institute_dict.keys():
            institute_arr[na, n_inst_arr[na]] = institute_dict[inst]
        #
        # If no institute number, make a new one
        else:
            institute_dict[inst] = inst_number
            institute_arr[na, n_inst_arr[na]] = institute_dict[inst]
            inst_number += 1
        n_inst_arr[na] += 1
#
# Create dictionary linking institute number to institute key
inv_institute_dict = {val: key for key, val in institute_dict.items()}
#
if Debug:
    for na in range(0, n_authors):
        print(" ")
        print("Author",author_list[na])
        print("Institute numbers",institute_arr[na, 0:n_inst_arr[na]])
        for ni in range(0, n_inst_arr[na]):
            print("Institute key(s)",inv_institute_dict[institute_arr[na, ni]])
        for ni in range(0, n_inst_arr[na]):
            print("Institute names(s)",
                  institute_df.loc[inv_institute_dict[institute_arr[na, ni]], "Name"])

In [10]:
Debug = False
#
# Create LaTeX string for authors, starting with \noindent
author_string = '\\noindent '
for na in range(0, n_authors):
    inst_str = str(institute_arr[na, 0]) # First institute for this author
    for ni in range(1, n_inst_arr[na]):
        inst_str += ", " + str(institute_arr[na, ni]) # Second, third institutes for this author
    author_string += author_list[na] + "$^{" + inst_str + "}$, " # Put it all together
#
author_string = author_string[0:-2] + ". \\newline" # Make a space after the author section
if Debug:
    print(author_string)

In [12]:
with open("LhARA-Authors.tex", "w") as f:
    f.write(author_string)

Make institute list in tex format

In [15]:
Debug = False
#
n_insts = len(institute_dict)
institute_list = []
#
for ni in range(1, n_insts + 1):
    institute_list.append(institute_df.loc[inv_institute_dict[ni], "Address"])
#
# Make LaTeX code for institutes, \noindent, then indices and address for each institute
institute_string = '\\noindent '
for ni in range(1, n_insts + 1):
    institute_string += '$^{' + str(ni) + '}$' + institute_list[ni - 1] + ' \\newline'
#
institute_string = institute_string[0:-9] # Remove the last \newline etc.
#
if Debug:
    print(institute_string)

In [17]:
with open("LhARA-Institutes.tex", "w") as f:
    f.write(institute_string)

Make skeleton TeX file

In [20]:
%%writefile LhARA-Authors-Institutes.tex

\documentclass[a4paper,11pt]{scrartcl}

\usepackage{graphicx}
\usepackage[utf8]{inputenc} %-- pour utiliser des accents en français
\usepackage{amsmath,amssymb,amsthm} 
\usepackage[round]{natbib}
\usepackage{url}
\usepackage{xspace}
\usepackage[left=20mm,top=20mm]{geometry}
\usepackage{algorithmic}
\usepackage{subcaption}
\usepackage{mathpazo}
\usepackage{booktabs}
\usepackage{hyperref}
% \usepackage{draftwatermark}

\newcommand{\ie}{ie}
\newcommand{\eg}{eg}
\newcommand{\reffig}[1]{Figure~\ref{#1}}
\newcommand{\refsec}[1]{Section~\ref{#1}}

\setcapindent{1em} % for captions of Figures

\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}

%\title{My title}
%\author{Hugo Ledoux\\ \url{h.ledoux@tudelft.nl}}

\date{\today}

\begin{document}

%\maketitle

\input{LhARA-Authors}

\input{LhARA-Institutes}

%
%\section{Introduction}

%Text

%\bibliographystyle{plainnat}
%\bibliography{/Users/hugo/references/references}

\end{document}

Overwriting LhARA-Authors-Institutes.tex


Run TeX

In [23]:
Debug = False
#
AUTHORS = 'LhARA-Authors-Institutes.tex'
comString = 'pdflatex $AUTHORS'
outString = get_ipython().getoutput(comString)
if "error" in outString or Debug:
    print(" ")
    print("Output from pdflatex:")
    print(outString)

Look at PDF

In [25]:
AUTHORS = 'LhARA-Authors-Institutes.pdf'
comString = '"C:\Program Files\PDF24\pdf24-Reader.exe" $AUTHORS'
outString = get_ipython().getoutput(comString)
print(" ")
print("Output from pdf24-Reader:")
print(outString)

 
Output from pdf24-Reader:
[]
