In [1]:
import pandas as pd
# this is to avoid getting warnings
pd.options.mode.chained_assignment = None

In [2]:
# load the file to a DataFrame
df = pd.read_excel('/home/macbuse/RECRUTEMENT/MCF_153_LAST.xls')

In [3]:
df.columns

Index(['Campagne', 'Session', 'Corps', 'Section', 'Autre section', 'Article',
       'N° emploi', 'Emplois liés', 'Profil', 'Job profile',
       'Research fields EURAXESS', 'Implant', 'Numéro SI local',
       'Numéro Gesup', 'Localisation', 'Vacance', 'Type de candidature',
       'Chgt de section', 'Civilité', 'Nom', 'Nom d'usage ou marital',
       'Prénom', 'Né(e) le', 'Unnamed: 23', 'Unnamed: 24', 'Rapporteur1',
       'Rapporteur2', 'Avis local', 'Lieu de naissance', 'N° candidat',
       'Référence qualif', 'Numen', 'Etat dossier', 'Détail suivi',
       'Situation professionnelle', 'Lieu d'exercice',
       'Ville ou pays d'exercice', 'Nationalité', 'Adresse1', 'Adresse2',
       'Adresse3', 'Code postal', 'Code étranger', 'Ville', 'Pays',
       'Téléphone', 'Autre tél', 'Fax', 'Email', 'Candidat local', 'Chercheur',
       'Titre thèse', 'Date soutenance', 'Lieu soutenance', 'Directeur Thèse',
       'Jury', 'N° de qualif', 'Activités enseignement', 'Theme', 'motif',
       

---

# Define string constants that we'll use later

- all strings are unicode in Python 3 this simplifies things a lot

https://docs.python.org/3/howto/unicode.html

In [4]:
title = "TEST"

_header_html = f'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0       
Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 
<html xmlns="http://www.w3.org/1999/xhtml" lang="fr" xml:lang="fr">                                                               
<head> <title>{title}</title>                                         
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />                                                             
'''     


_header_tex = r'''\documentclass[a4paper, 12pt, titlepage]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amssymb}
\usepackage[french]{babel}
\selectlanguage{french}
\usepackage{enumerate}
\usepackage{graphicx}


\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\def\no{\noindent}
\def\vs{\vskip 1cm}
\def\ben{\begin{enumerate}}
\def\een{\end{enumerate}}

\begin{document}
\pagestyle{empty}
'''




_thesis_fields =  ['Titre thèse', 'Date soutenance', 
                  'Lieu soutenance', 'Directeur Thèse', 
                   'Jury']                                 
                                                                           
  
_activity_fields = [ 'Situation professionnelle', 
                    'Activités enseignement','Activités administratives']   

_name_fields = ['Nom', 'Prénom', 'Né(e) le']

_web_fields = ['google','arxiv']


In [7]:
df[_name_fields][:10]

Unnamed: 0,Nom,Prénom,Né(e) le
0,ADICEAM,LAKI MICHEL FAUSTIN,20/09/1988
1,ALDANA DOMINGUEZ,CLARA LUCIA,04/12/1976
2,ANNI,SAMUELE,20/06/1985
3,ARANCIBIA ROBERT,NICOLAS JOSE,08/07/1985
4,BALKANOVA,OLGA,27/06/1988
5,BEI,FRANCESCO,27/08/1985
6,BELOTTO DA SILVA,ANDRE RICARDO,04/03/1987
7,BENGOECHEA,PALOMA,11/01/1984
8,BENZERGA,MOHAMED,18/02/1990
9,BETEA,DAN DUMITRU,30/05/1984


---

## Helper functions to generate requests

In [8]:
def fix_name(row):
    ss = "{Prénom} {Nom}".format(**row)
    ss = ss.replace('-',' ')
    return ss.replace(' ','+')

def arxiv(row):
    name = fix_name(row)
    url = f'https://arxiv.org/search/?query={name}'
    url += '&searchtype=all&abstracts=show&order=-announced_date_first&size=50'                  
    request = f'<a href = "{url}" target = "blank">'    
    request += '  {Prénom} {Nom} </a>'.format(**row)
    return request

def google(row):
    name = fix_name(row)
    #maybe should do a +math to disambiguate
    url = f'<a href = "https://www.google.fr/search?q={name}+math"'
    url += ' target = "blank">{Prénom} {Nom} </a>'.format(**row)
    return url

---

# Make a web page for all the candidates

In [9]:
# this is a trick to get only valid rows
candidates = df[~df.iloc[:,0].isnull()]

names = candidates[_name_fields]
theses  = candidates[_thesis_fields]
activity = candidates[_activity_fields]

# now generate some extra fields
dico_names = names.to_dict(orient='records')
candidates['web_name'] = [ fix_name(_) for _ in dico_names]
candidates['google'] = [ google(x) for x in  dico_names]
candidates['arxiv'] =  [ arxiv(x) for x in dico_names]

web = candidates[_web_fields]

def table2blocks(dff):
    '''write a dictionnary to rows in html'''
    dico = dff.to_dict(orient='records')
    
    blocks = []
    for row in dico:
        block = [f'<b>{key} : </b> {value} <br>\n' for key,value in row.items()]
        blocks.append('\n'.join(block) )
    return blocks


text_blocks = [ table2blocks(_) for _ in [names, web, theses, activity] ]

with open('web.htm','w') as fp:
    fp.write(_header_html)
    for candidate in zip(*text_blocks):
        fp.write('\n<br>\n'.join(candidate))
        fp.write('<br><hr><br>')
            

In [10]:
candidates['web_name']

0        LAKI+MICHEL+FAUSTIN+ADICEAM
1       CLARA+LUCIA+ALDANA+DOMINGUEZ
2                       SAMUELE+ANNI
3      NICOLAS+JOSE+ARANCIBIA+ROBERT
4                     OLGA+BALKANOVA
                   ...              
103              FRANCESCO+VENEZIANO
104                     JUAN+VIU+SOS
105                 YANNICK+VOGLAIRE
106               GABRIEL+ZALAMANSKY
107                    HUAFENG+ZHANG
Name: web_name, Length: 108, dtype: object

---

# Converting templates using regular expressions 

convert the templates in gregTexDatapy to Python3 using **re**

we need to use ```string.format(**dictionary)```
to print to a file

In [89]:
dd = df.to_dict(orient='records')
dd[0].keys()

dict_keys(['Campagne', 'Session', 'Corps', 'Section', 'Autre section', 'Article', 'N° emploi', 'Emplois liés', 'Profil', 'Job profile', 'Research fields EURAXESS', 'Implant', 'Numéro SI local', 'Numéro Gesup', 'Localisation', 'Vacance', 'Type de candidature', 'Chgt de section', 'Civilité', 'Nom', "Nom d'usage ou marital", 'Prénom', 'Né(e) le', 'Unnamed: 23', 'Unnamed: 24', 'Rapporteur1', 'Rapporteur2', 'Avis local', 'Lieu de naissance', 'N° candidat', 'Référence qualif', 'Numen', 'Etat dossier', 'Détail suivi', 'Situation professionnelle', "Lieu d'exercice", "Ville ou pays d'exercice", 'Nationalité', 'Adresse1', 'Adresse2', 'Adresse3', 'Code postal', 'Code étranger', 'Ville', 'Pays', 'Téléphone', 'Autre tél', 'Fax', 'Email', 'Candidat local', 'Chercheur', 'Titre thèse', 'Date soutenance', 'Lieu soutenance', 'Directeur Thèse', 'Jury', 'N° de qualif', 'Activités enseignement', 'Theme', 'motif', 'Activités recherche', 'Activités administratives', 'Autres diplômes', 'Travaux', 'Titres', 'N

# the original template 

Load the original file as text

In [None]:
with open('./gregTexData.py','r') as fp:
    txt = fp.read()

---

# regexp magic



you don't need to run this next cell

In [158]:
import re

def callback(m):
    return '{{{}}}'.format(m.group(1))

def fix_tex(m):
    m = m.group(1).replace('{','{{')
    #standard string replacements
    m = m.replace('}','}}')
    return m
    
ss = txt

ss = ss.replace("ur'","r'")

#ss = ss.replace('\','\\')

#regexp to change the formatting to python3
#you have to do it in this order 

# escape { and } in tex
ss = re.sub("('''.*?''')", fix_tex,ss,flags=re.DOTALL)
# replace the formatting %( ) with { }
ss = re.sub('%\((.*?)\)s',callback,ss)


with open('p3Templates.py','w') as fp:
    fp.write(ss)

---

# Testing

you do need to run this
- if you are testing and don't want to restart the kernel the use ```reload```

In [11]:
#import importlib
#importlib.reload(p3Templates)

import p3Templates

In [12]:
print(p3Templates.texTemplateMCF[:200])



\begin{{center}}
{{\Large {Corps} {N° emploi:.0f}-{Numéro Gesup}: Rapport sur la candidature de }}
\end{{center}}
\vs

\no
{{\bf {{\large Nom}} :}} {Nom}\\
{{\bf Prénom :}} {Prénom}\\
{{\bf Date de 


---

# tests

---

---

# with some random candidate


so 
- I dumped it to a file
- compiled this and it compiled  OK except for **babel**
- normally you should run this as a loop

```
for candidate in dd:
    pass
```

multiple .tex files  for the model reports

In [13]:
dd = candidates.to_dict(orient='records')
candidate = dd[3]

latex_report = p3Templates.texTemplateMCF.format(**candidate)

name = candidate['web_name'].replace('+','_')

with open( f"{name}.tex",'w') as fp:
    fp.write(_header_tex)
    fp.write(latex_report)

---

# non auditionnés

single .tex file 

you should change the values in  p3Templates.committee
- either in the file and reload
- dynamically like below

In [14]:
p3Templates.committee

{'PRESIDENT': 'Erwan Lanneau',
 'VOTANTS': 14,
 'NULS': 0,
 'OUI': 14,
 'NON': 0,
 'SCALE': 0.2,
 'DATE': 'now'}

In [15]:

# you should have this in the xls
# this is just for a test
candidates['motif'] = ['ab']*len(candidates)

dd = candidates.to_dict(orient='records')
# to test if it skips auditionned candidates
dd[2]['motif'] = ''


committee = p3Templates.committee
committee['DATE'] = 'today'
# signature resizing
committee['SCALE'] = .5


with open( "non_audition.tex",'w') as fp:
    # making a single .tex file
    # one page per candidate
    fp.write(_header_tex)
    # only do the first five
    for candidate in dd:
        if not candidate['motif']:
            # print the name to check
            print("Audition : {Nom}".format(**candidate))
            continue
        # u need to add the commitee date
        candidate.update(committee)
        latex_report = p3Templates.nonAudition.format(**candidate)
        fp.write(latex_report)
        fp.write(p3Templates.motifs[candidate['motif']])
        fp.write('\n\\newpage\n')
    # don't forget this
    fp.write('\\end{document}')

Audition : ANNI
