In [1]:
import os
import glob
from pathlib import Path
import pandas as pd
import numpy as np
from PyPDF2 import PdfReader, PdfWriter
import shutil
from shutil import copyfile
import subprocess
from concurrent.futures import ThreadPoolExecutor
import re
from datetime import datetime

# Function to compile and clean up LaTeX files in parallel
def compile_latex(tex_file):
    tex_path = Path(tex_file)
    
    # Compile the .tex file to PDF
    subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent)
    print(f"Compiled: {tex_path}")

    # Remove intermediate files
    for ext in ['aux', 'log', 'out','txt']:
        temp_file = tex_path.with_suffix(f'.{ext}')
        if temp_file.exists():
            temp_file.unlink()
            #print(f"Deleted: {temp_file}")

def compile_cv_jmp(tex_file, output_pdf, output_dir):
    tex_path = Path(tex_file)
    
    try:
        # First run pdflatex to generate auxiliary files
        subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent, check=True)
        print(f"Compiled (1st pass): {tex_file}")

        # Run BibTeX if .aux file exists (required for bibliography generation)
        aux_file = tex_path.with_suffix('.aux')
        if aux_file.exists():
            subprocess.run(['bibtex', tex_path.stem], cwd=tex_path.parent, check=True)
            print(f"Ran BibTeX for: {tex_file}")

        # Run pdflatex twice more to ensure references are resolved
        subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent, check=True)
        subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent, check=True)
        print(f"Compiled (final pass): {tex_file}")

        # Move the PDF to the desired output location (Misc folder)
        pdf_file = tex_path.with_suffix('.pdf')
        if pdf_file.exists():
            shutil.move(str(pdf_file), output_pdf)
            print(f"Moved: {pdf_file} to {output_pdf}")
    except subprocess.CalledProcessError as e:
        print(f"Error compiling {tex_file}: {e}")


# Function to clean up intermediate LaTeX files
def cleanup_intermediate_files(directory):
    intermediate_extensions = ['aux', 'log', 'out', 'toc', 'nav', 'snm', 'lof', 'lot', 'fls', 'fdb_latexmk', 'blg', 'bbl', 'synctex.gz']
    for ext in intermediate_extensions:
        for file in directory.glob(f'*.{ext}'):
            try:
                file.unlink()  # Delete the intermediate file
                print(f"Deleted: {file}")
            except Exception as e:
                print(f"Error deleting {file}: {e}")


def compile_cv_jmp(tex_file, output_pdf):
    tex_path = Path(tex_file)
    
    try:
        # First run pdflatex to generate auxiliary files
        subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent, check=True)
        print(f"Compiled (1st pass): {tex_file}")

        # Run BibTeX if .aux file exists (required for bibliography generation)
        aux_file = tex_path.with_suffix('.aux')
        if aux_file.exists():
            try:
                subprocess.run(['bibtex', tex_path.stem], cwd=tex_path.parent, check=True)
                print(f"Ran BibTeX for: {tex_file}")
            except:
                pass
        # Run pdflatex twice more to ensure references are resolved
        subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent, check=True)
        subprocess.run([pdflatex_path, '-interaction=nonstopmode', tex_path], cwd=tex_path.parent, check=True)
        print(f"Compiled (final pass): {tex_file}")

        # Move the PDF to the desired output location (Misc folder)
        pdf_file = tex_path.with_suffix('.pdf')
        if pdf_file.exists():
            shutil.move(str(pdf_file), output_pdf)
            print(f"Moved: {pdf_file} to {output_pdf}")
    except subprocess.CalledProcessError as e:
        print(f"Error compiling {tex_file}: {e}")

def git_commit_and_push(repo_dir, message):
    # Ensure the repository path is valid
    if not repo_dir.is_dir():
        raise ValueError(f"{repo_dir} is not a valid directory")

    try:
        # Stage changes (add all modified files)
        subprocess.run([git_executable, "-C", str(repo_dir), "add", "."], check=True)

        # Commit changes with the provided message
        subprocess.run([git_executable, "-C", str(repo_dir), "commit", "-m", message], check=True)

        # Push changes to the remote repository
        subprocess.run([git_executable, "-C", str(repo_dir), "push"], check=True)

        print("Changes committed and pushed successfully.")
        
    except subprocess.CalledProcessError as e:
        print("An error occurred while committing or pushing changes:", e)


In [None]:
# Set the root directory for the Job Market Hacker python file and tex files
root_dir = Path("C:/Users/Thomas/Dropbox/Apps/Overleaf/Others")
save_dir = Path("C:/Users/Thomas/Dropbox/Drive/Job Market")
last_name = 'Thomas_Lee'

# Define the path to the Git executable from GitHub Desktop
git_executable = r"C:\Users\Thomas\AppData\Local\GitHubDesktop\app-3.4.8\resources\app\git\cmd\git.exe"

# Define the path to your Git repository
git_dir = Path("C:/Users/Thomas/Documents/GitHub/lshyun92.github.io")

# Commit message for the changes
commit_message = "Update"

# Set the location of your CV, Job Market Paper, Teaching evaluation
cv_file = root_dir / 'Misc/CV.pdf'
jmp_file = root_dir / 'Misc/JMP.pdf'
div_file = root_dir / 'Misc/Diversity.pdf'
eval_file = root_dir / 'Teaching/Results.pdf'


# Delete and recreate existing folders?
delete_existing_folders = True
compile = True

# Path to pdflatex executable
pdflatex_path = r"C:\Users\Thomas\AppData\Local\Programs\MiKTeX\miktex\bin\x64\pdflatex.exe"


# Define the URL directly (instead of reading from a file)
url = "https://docs.google.com/spreadsheets/d/1evlTQYFrQgVEXmRB8dvV9UASXXf5mvTdq_-W_6KhQaY/export?format=csv&gid=0"

# Print the URL to ensure it's correct
print(f"Using URL: {url}")

# Load the data directly from the URL into a pandas DataFrame
try:
    jobs_web = pd.read_csv(url)
    print("Data loaded successfully!")
    #print(jobs_web)
except FileNotFoundError as e:
    print(f"File not found error: {e}")
except Exception as e:
    print(f"An error occurred: {e}")

# Drop unnecessary columns
columns_to_drop = ['Industry', 'Link', 'Note', 'UTD Ranking']
jobs_web = jobs_web.drop(columns=columns_to_drop, errors='ignore')
jobs_web = jobs_web.replace('nan', '').dropna(subset=['Univ'])
jobs_web['Submitted'] = jobs_web['Submitted'].fillna('')
#jobs_web = jobs_web[~jobs_web['Complete'].isna()]
# Drop submitted schools
#jobs_web = jobs_web[jobs_web['Submitted'] != 'Yes']

# Create directories if they don't exist
(save_dir / 'Applications').mkdir(exist_ok=True)
(save_dir / 'Applications/Submitted').mkdir(exist_ok=True)
# Check the result of conversion to confirm all dates are converted properly
# Move submitted applications to 'Submitted' folder
for _, job in jobs_web.iterrows():
    # Today's date for comparison
    today = datetime.today().date()
    deadline = pd.to_datetime(job['Deadline'], errors='coerce').date() if pd.notna(job['Deadline']) else None
    if job['Submitted'] == 'Yes':
        uni_name = job['Univ']
        # Improved handling of the `Deadline` and `Rolling` fields
        try:
            if deadline == datetime.strptime("1/1/2024", "%m/%d/%Y").date():
                due = '0Rolling'
            else: #if job['Rolling'] != 'Yes' :
                # Convert 'Deadline' to datetime if it's a string and format it
                due = pd.to_datetime(job['Deadline'], errors='coerce').strftime("%Y-%m-%d") if pd.notna(job['Deadline']) else 'InvalidDate'

            # Define paths and move the folder
            old_path = save_dir / f'Applications/[{due}] [{uni_name}]'
            new_path = save_dir / f'Applications/Submitted/[{due}] [{uni_name}]'
            shutil.move(str(old_path), str(new_path))
            print(f'Moved [{due}_{uni_name}] to Submitted!')

        except AttributeError as e:
            print(f"Error processing job deadline for {job['Univ']}: {e}")
        except Exception as e:
             # Attempt to remove the folder if it exists to clean up
            if 'old_path' in locals() and old_path.exists():
                try:
                    shutil.rmtree(str(old_path))  # Use shutil.rmtree for directories
                    print(f"Removed {uni_name}.")
                except Exception as remove_error:
                    print(f"Failed to remove folder {old_path}: {remove_error}")
            #print(f'Unable to move [{due}] [{uni_name}] to Submitted: {e}')



jobs_web = jobs_web[jobs_web['Submitted'] != 'Yes']
jobs_web = jobs_web.dropna(subset=['Deadline'])
jobs_web1 = jobs_web# Save the filtered jobs list to a CSV, with `Name` column formatted as 'Dept \\ Univ'
jobs_web.to_csv(r'{}/Jobs_export.csv'.format(root_dir),index=False, encoding='utf-8',float_format="%.0f")

print("Jobs_export.csv Updated!")




print('')
print('')

# Read Job details from the Jobs spreadsheet
Job_details = pd.read_csv('{}/Jobs_export.csv'.format(root_dir))

jobs_n = len(Job_details) 

if compile == 1:
    # List of LaTeX files to compile
    tex_files = [root_dir / f'{doc}.tex' for doc in ['Cover', 'Research', 'Teaching', 'Diversity']]

    # Compile all LaTeX files in parallel
    with ThreadPoolExecutor() as executor:
        executor.map(compile_latex, tex_files)
    # Compile Documents in LaTeX
    #os.system('pdflatex  Cover.tex' )
    #os.system('pdflatex  Research.tex' )
    #os.system('pdflatex  Teaching.tex' )
    #os.system('pdflatex  Diversity.tex' )

#Remove any log, aux, or out files keeping only pdf files
    fileList = glob.glob('{}/*.log'.format(root_dir))
    for item in fileList:
        try:
            os.remove(item)
        except:
            pass
    fileList = glob.glob('{}/*.aux'.format(root_dir))
    for item in fileList:
        try:
            os.remove(item)
        except:
            pass
    fileList = glob.glob('{}/*.out'.format(root_dir))
    for item in fileList:
        try:
            os.remove(item)
        except:
            pass

#print("Non-pdf files removed!")

# Path for batch pdf of application documents
RS_path = '{}/Research.pdf'.format(root_dir)
TS_path = '{}/Teaching.pdf'.format(root_dir)
DS_path = '{}/Diversity.pdf'.format(root_dir)
CL_path = '{}/Cover.pdf'.format(root_dir)


# Read in each PDF
CL_pdf = PdfReader(CL_path)
TS_pdf = PdfReader(TS_path)
RS_pdf = PdfReader(RS_path)
DS_pdf = PdfReader(DS_path)

# The next section of code will find the end of each sub-document within the 
# batch PDF so that the documents can be spliced.
# NOTE: Cover letters are designed to only be one page. If you have cover letters
# longer than one page, this will fail. You can repeat the same logic used for 
# the other documents if you need.

# NOTE: This syntax will fail for any document that is only 1 page, hence the try/except syntax.
try:
    TS_page_end = TS_pdf.trailer["/Root"]["/PageLabels"]["/Nums"]
    while {'/S': '/D'} in TS_page_end: TS_page_end.remove({'/S': '/D'})
    while 0 in TS_page_end: TS_page_end.remove(0)
except:
    pass
try:
    RS_page_end = RS_pdf.trailer["/Root"]["/PageLabels"]["/Nums"]
    while {'/S': '/D'} in RS_page_end: RS_page_end.remove({'/S': '/D'})
    while 0 in RS_page_end: RS_page_end.remove(0)
except:
    pass
try:
    DS_page_end = DS_pdf.trailer["/Root"]["/PageLabels"]["/Nums"]
    while {'/S': '/D'} in DS_page_end: DS_page_end.remove({'/S': '/D'})
    while 0 in DS_page_end: DS_page_end.remove(0)
except:
    pass





TS_start = 0
RS_start = 0
DS_start = 0

TS_count = 0
RS_count = 0
DS_count = 0


### Begin the loop over each job application ###
print("Begin the loop over each job application")
# Loop over each entry in the job details CSV
for page in range(0,jobs_n):
     #Store the name of the job from header "Univ"  from Job Details CSV
    name = Job_details.Univ[page]
    #print(name+" Start!")
    # Process the name string for creating the application folder
    name = name.replace("\&", "&")
    name = name.replace(" \\\\ ", "] [")
    print('[{}]'.format(name))
    name_save = name.replace(" ", "")
    #Store the "Deadline" header from Job Details CSV (due date of application)
    due = Job_details.Deadline[page]
    #Remove date slashes and replace with underscores
    due = datetime.strptime(due, "%m/%d/%Y").strftime("%Y-%m-%d")
    #due = due.replace("/", "_")
    
    # The naming convention allows for sort by due date. All applications 
    # with a rolling deadline begin with 0 so as to be placed at the top
    if Job_details.Rolling[page] == 'Yes' and datetime.strptime(due, "%Y-%m-%d").date() == datetime.strptime("1/1/2024", "%m/%d/%Y").date():
        due = '0Rolling'
    # Where to save each application. The naming format puts due date first 
    # (for sort purposes), then Name from Job Details
    savepath = '{}/Applications/[{}] [{}]'.format(save_dir,due,name)
    # PDF filename to save the split cover letters
    output_filename = '{}/{}_Cover_Letter.pdf'.format(savepath,last_name)

    # Create directory for Application
    if os.path.exists(savepath):
        if delete_existing_folders == 1:
            print('   Previous application for [{}] exists. Deleting!'.format(name))
            shutil.rmtree(savepath)
            os.mkdir(savepath)
        else:
            print('   Application for [{}] already exists'.format(name))
            continue   
            # If application folder exists, continue
    else:
        os.mkdir(savepath)
       

   
    
    # Begin by splitting cover letters into individual pages
    pdf_writer_CL = PdfWriter()
    pdf_writer_CL.add_page(CL_pdf.pages[page])
    
    
    
    #Split Cover Letter pdf into pages and write the page
    #ONLY WORKS WITH SINGLE PAGE Cover Letter
    with open(output_filename, 'wb') as out:
        pdf_writer_CL.write(out)


    ### Begin processing statement documents. In order for the loop to process
    # a document, there must be something in the XXXXStatement column of the
    # Job_details csv
    
    # Teaching Statement
    if ~Job_details.TeachingStatementCat.isna()[page]:   
        try:
            # Load the page number for the end of subdocument
            TS_end = TS_page_end[TS_count]
        except:
            # If single document, use the number of pages of the main document
            TS_end = len(TS_pdf.pages) 
        # Create blank pdf    
        pdf_writer_TS = PdfWriter()
        # Append each page to the blank PDF until it reachest the end
        while TS_start < TS_end:
            pdf_writer_TS.add_page(TS_pdf.pages[TS_start])
            TS_start = TS_start + 1
        
        # Output the PDF into the current application folder
        TS_output_filename = '{}/{}_Teaching.pdf'.format(savepath,last_name)
        with open(TS_output_filename, 'wb') as out:
            pdf_writer_TS.write(out)
        # Advance the end page counter for the next application
        TS_start = TS_end
        TS_count = TS_count + 1

    # Research Statement
    if ~Job_details.ResearchStatementCat.isna()[page]:
        try:
            # Load the page number for the end of subdocument
            RS_end = RS_page_end[RS_count]
        except:
            # If single document, use the number of pages of the main document
            RS_end = len(RS_pdf.pages) 
        # Create blank pdf    
        pdf_writer_RS = PdfWriter()
        # Append each page to the blank PDF until it reachest the end
        while RS_start < RS_end:
            pdf_writer_RS.add_page(RS_pdf.pages[RS_start])
            RS_start = RS_start + 1
        
        # Output the PDF into the current application folder        
        RS_output_filename = '{}/{}_Research.pdf'.format(savepath,last_name)
        with open(RS_output_filename, 'wb') as out:
            pdf_writer_RS.write(out)
        # Advance the end page counter for the next application        
        RS_start = RS_end
        RS_count = RS_count + 1

    # Diversity Statement
    if ~Job_details.DiversityStatementCat.isna()[page]:
        try:
            # Load the page number for the end of subdocument            
            DS_end = DS_page_end[DS_count]
        except:
            # If single document, use the number of pages of the main document            
            DS_end = len(DS_pdf.pages)
            #DS_end = 1
        # Create blank pdf     
        pdf_writer_DS = PdfWriter()
        # Append each page to the blank PDF until it reachest the end
        while DS_start < DS_end:
            pdf_writer_DS.add_page(DS_pdf.pages[DS_start])
            DS_start = DS_start + 1
        # Output the PDF into the current application folder        
        DS_output_filename = '{}/{}_Diversity.pdf'.format(savepath,last_name)
        with open(DS_output_filename, 'wb') as out:
            pdf_writer_DS.write(out)
        # Advance the end page counter for the next application        
        DS_start = DS_end
        DS_count = DS_count + 1

    # Copy teaching evaluation into Application folder
    if Job_details.TeachingStatementCat[page] == "full":
        dst = '{}/{}_Teaching_Eval.pdf'.format(savepath,last_name)
        copyfile(eval_file,dst)
        
    # Copy teaching evaluation into Application folder
    if Job_details.TeachingStatementCat[page] == "full":
        dst = '{}/{}_Diversity.pdf'.format(savepath,last_name)
        copyfile(div_file,dst)


    # Copy CV into Application folder
    dst = '{}/{}_CV.pdf'.format(savepath,last_name)
    copyfile(cv_file,dst)


    # Copy Job Market Paper  
    dst = '{}/{}_JMP.pdf'.format(savepath,last_name)
    copyfile(jmp_file,dst)

    # I've added the ability to copy additional papers/documents to the 
    # application folder using the SecondPaper and ThirdPaper columns
    # in the Job_details csv. 
    try:
        if (Job_details.SecondPaper[page]!= ""):
            paper_file = Job_details.SecondPaper[page]
            src = '{}/Misc/{}'.format(root_dir,paper_file)
            dst = '{}/{}'.format(savepath,paper_file)
            copyfile(src,dst)
    except:
         if not np.isnan(Job_details.SecondPaper[page]):
             paper_file = Job_details.SecondPaper[page]
             src = '{}/Misc/{}'.format(paper_file)
             dst = '{}/{}'.format(savepath,paper_file)
             copyfile(src,dst) 
    try:
         if (Job_details.ThirdPaper[page]!= ""):
            paper_file = Job_details.ThirdPaper[page]
            src = '{}/Misc/{}'.format(paper_file)
            dst = '{}/{}'.format(savepath,paper_file)
            copyfile(src,dst)
    except:
         if not np.isnan(Job_details.ThirdPaper[page]):
             paper_file = Job_details.ThirdPaper[page]
             src = '{}/Misc/{}'.format(paper_file)
             dst = '{}/{}'.format(savepath,paper_file)
             copyfile(src,dst) 

    print('   [{}] Application created!'.format(name))

# Remove batch pdf files
fileList = glob.glob('{}/*.pdf'.format(root_dir))
for item in fileList:
    try:
        os.remove(item)
    except:
        pass
        
# Remove batch aux files
fileList = glob.glob('{}/*.aux'.format(root_dir))
for item in fileList:
    try:
        os.remove(item)
    except:
        pass
        

Using URL: https://docs.google.com/spreadsheets/d/1evlTQYFrQgVEXmRB8dvV9UASXXf5mvTdq_-W_6KhQaY/export?format=csv&gid=0
Data loaded successfully!
Jobs_export.csv Updated!


Compiled: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\Research.tex
Compiled: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\Diversity.tex
Compiled: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\Teaching.tex
Compiled: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\Cover.tex
Begin the loop over each job application
[Northeastern University]
   Previous application for [Northeastern University] exists. Deleting!
   [Northeastern University] Application created!
[Tulane University]
   Previous application for [Tulane University] exists. Deleting!
   [Tulane University] Application created!
[University of Texas at San Antonio]
   Previous application for [University of Texas at San Antonio] exists. Deleting!
   [University of Texas at San Antonio] Application created!
[Binghamton University]
   Previous application for [Bingh

Updating CV and JMP

In [2]:
# Path to pdflatex executable
pdflatex_path = r"C:\Users\Thomas\AppData\Local\Programs\MiKTeX\miktex\bin\x64\pdflatex.exe"

# Set the root directory for LaTeX files
root_dir = Path("C:/Users/Thomas/Dropbox/Apps/Overleaf/Others")
misc_dir = root_dir / "Misc"
applications_dir = root_dir / "Applications"
git_dir = Path("C:/Users/Thomas/Documents/GitHub/lshyun92.github.io")

# Paths for .tex files
cv_tex_file = root_dir / 'CV.tex'  # CV.tex in the Misc folder
main_draft_tex_file = Path("C:/Users/Thomas/Dropbox/Apps/Overleaf/School Bonds/main_draft.tex")  # main_draft.tex in School Bonds folder

# Paths to compiled PDFs that will be placed in the Misc folder
compiled_cv_pdf = misc_dir / 'CV.pdf'
compiled_main_draft_pdf = misc_dir / 'JMP.pdf'




# Compile CV.tex and main_draft.tex, and move to Misc folder
compile_cv_jmp(cv_tex_file, compiled_cv_pdf)
compile_cv_jmp(main_draft_tex_file, compiled_main_draft_pdf)

# Update to github
shutil.copy(compiled_cv_pdf, git_dir / 'CV.pdf')
shutil.copy(compiled_main_draft_pdf, git_dir / 'JMP.pdf')
# Run the function to commit and push changes
git_commit_and_push(git_dir, commit_message)
# Example: Copy compiled PDFs to each job application folder
#copy_pdfs_to_applications(applications_dir, compiled_cv_pdf, compiled_main_draft_pdf, 'Thomas_Lee')

# Clean up intermediate LaTeX files in the root_dir
cleanup_intermediate_files(root_dir)

Compiled (1st pass): C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.tex
Compiled (final pass): C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.tex
Moved: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.pdf to C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\Misc\CV.pdf
Compiled (1st pass): C:\Users\Thomas\Dropbox\Apps\Overleaf\School Bonds\main_draft.tex
Ran BibTeX for: C:\Users\Thomas\Dropbox\Apps\Overleaf\School Bonds\main_draft.tex
Compiled (final pass): C:\Users\Thomas\Dropbox\Apps\Overleaf\School Bonds\main_draft.tex
Moved: C:\Users\Thomas\Dropbox\Apps\Overleaf\School Bonds\main_draft.pdf to C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\Misc\JMP.pdf
Changes committed and pushed successfully.
Deleted: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.aux
Deleted: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.log
Deleted: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.out
Deleted: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\CV.blg
Deleted: C:\Users\Thomas\Dropbox\Apps\Overleaf\Others\