Skip to content

Commit

Permalink
Merge pull request #7 from temken/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Timon Emken committed Apr 24, 2020
2 parents c8e2c4d + c670572 commit 11670ba
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 40 deletions.
10 changes: 7 additions & 3 deletions comparxiv/command_line.py
Expand Up @@ -6,10 +6,12 @@ def main():
parser = argparse.ArgumentParser(description="Comparxiv v" + comparxiv.version + ", developed by " + comparxiv.author + " ("+ comparxiv.year + ") - Compare two versions of an arXiv preprint.")
parser.add_argument("-T","--keep_temp_files", help="Do not delete temporary files in the end.",
action="store_true")
parser.add_argument("-L","--show_pdflatex_output", help="Show the terminal output of pdflatex.",
parser.add_argument("-L","--show_latex_output", help="Show the terminal output of pdflatex.",
action="store_true")
parser.add_argument("-P","--dont_open_pdf", help="Do not automatically open the generated pdf in the end.",
action="store_true")
parser.add_argument("-E","--dont_compare_equations", help="Run latexdiff with the flag --math-markup=0.",
action="store_true")
parser.add_argument("arxiv_ID", help = "The arXiv ID of the paper to be compared, e.g. \'1905.06348\'.",
type = check_arxiv_ID)
parser.add_argument("version_A", help = "The reference version of the preprint to be compared. (Default: 1)",
Expand All @@ -28,9 +30,11 @@ def main():
elif user_given_version > 1:
args.version_A = user_given_version-1
args.version_B = user_given_version

elif args.version_A == args.version_B:
raise argparse.ArgumentTypeError("Versions to compare are identical.")

comparxiv.print_title(args.arxiv_ID,args.version_A,args.version_B)
comparxiv.compare_preprints(args.arxiv_ID,args.version_A,args.version_B,args.keep_temp_files,args.show_pdflatex_output,args.dont_open_pdf)
comparxiv.compare_preprints(args.arxiv_ID,args.version_A,args.version_B,args.keep_temp_files,args.show_latex_output,args.dont_open_pdf,args.dont_compare_equations)

def check_version_input(value):
try:
Expand Down
93 changes: 60 additions & 33 deletions comparxiv/comparxiv.py
Expand Up @@ -2,17 +2,18 @@

import os
import sys
import arxiv
import requests

from sys import platform
from tqdm import tqdm

version = '0.1.3'
version = '0.1.4'
author = 'Timon Emken'
year = '2020'

temp_folder = ".temp_comparxiv/"
def compare_preprints(arxiv_ID, version_a, version_b,keep_temp,show_latex_output,dont_open_pdf):
def compare_preprints(arxiv_ID, version_a, version_b,keep_temp,show_latex_output,dont_open_pdf,dont_compare_equations):

#Check if old or new arxiv ID
if "/" in arxiv_ID:
Expand All @@ -30,48 +31,56 @@ def compare_preprints(arxiv_ID, version_a, version_b,keep_temp,show_latex_output
temp_folder_b = './' + temp_folder + 'temp_' + ID_b+'/'
diff_file = os.path.split(arxiv_ID)[-1]+"_v"+str(version_a)+"v"+str(version_b)

# #1. Download and unpack files
print_paper_information(arxiv_ID,version_a,version_b)

#1. Download and unpack files
print("1.) Download and unpack source files:")
download_from_arxiv(arxiv_ID,version_a)
download_from_arxiv(arxiv_ID,version_b)

unpack_source_files(arxiv_ID,version_a,temp_folder_a)
unpack_source_files(arxiv_ID,version_b,temp_folder_b)

#2. Identify the .tex and .bbl files.
#2.1 tex files
print("\n2.1) Identify master tex files:")
master_file_a = identify_master_tex_file(temp_folder_a,arxiv_ID)
master_file_b = identify_master_tex_file(temp_folder_b,arxiv_ID)
#2.2 bbl files
print("\n2.2) Identify bbl files:")
bbl_file_a = identify_bbl_file(temp_folder_a,arxiv_ID)
bbl_file_b = identify_bbl_file(temp_folder_b,arxiv_ID)

#3. Latexdiff
#3.1 tex files
print("Run latexdiff on .tex files:")
print("\t",temp_folder_a+master_file_a)
print("\t",temp_folder_b+master_file_b)
print("\n3.1) Run latexdiff on the tex files.")

latexdiff_command_tex = "latexdiff "
if show_latex_output == False:
latexdiff_command_tex = "latexdiff --ignore-warnings "+temp_folder_a+master_file_a+" "+temp_folder_b+master_file_b+">"+temp_folder_b+diff_file+".tex"
else:
latexdiff_command_tex = "latexdiff "+temp_folder_a+master_file_a+" "+temp_folder_b+master_file_b+">"+temp_folder_b+diff_file+".tex"
latexdiff_command_tex += "--ignore-warnings "
if dont_compare_equations:
latexdiff_command_tex += "--math-markup=0 "

latexdiff_command_tex += temp_folder_a+master_file_a+" "+temp_folder_b+master_file_b+">"+temp_folder_b+diff_file+".tex"

os.system(latexdiff_command_tex)

#3.2 Try to run latexdiff on bbl.
if bbl_file_a != None and bbl_file_b != None:
print("\nRun latexdiff on .bbl files:")
print("\t",temp_folder_a+bbl_file_a)
print("\t",temp_folder_b+bbl_file_b)
print("\n3.2) Run latexdiff on the bbl files.")
if show_latex_output == False:
latexdiff_command_bbl = "latexdiff --ignore-warnings "+temp_folder_a+bbl_file_a+" "+temp_folder_b+bbl_file_b+">"+temp_folder_b+diff_file+".bbl"
else:
latexdiff_command_bbl = "latexdiff "+temp_folder_a+bbl_file_a+" "+temp_folder_b+bbl_file_b+">"+temp_folder_b+diff_file+".bbl"
os.system(latexdiff_command_bbl)

#4. Run pdflatex
print("\n4.) Generate a pdf with pdflatex.")
Generate_PDF(diff_file,temp_folder_b,show_latex_output)

#5. If unsuccessful, try again with a copy of the version b .bbl file.
if bbl_file_b != None and os.path.isfile(temp_folder_b+diff_file+".pdf") == False:
print("\nCopy the .bbl file of version b.")
print("\tWarning: No pdf could be generated. Copy the .bbl file of version b and try again.")
os.system("cp "+ temp_folder_b + bbl_file_b + " " + temp_folder_b + diff_file+".bbl")
Generate_PDF(diff_file,temp_folder_b,show_latex_output)

Expand All @@ -84,28 +93,44 @@ def compare_preprints(arxiv_ID, version_a, version_b,keep_temp,show_latex_output
#7. If successful copy the .pdf.
if success:
os.system("mv " +temp_folder_b+diff_file+".pdf" + " ./" + diff_file+".pdf")
print("\nFinished: success.")
if dont_open_pdf == False:
if platform == "linux" or platform == "linux2":
os.system("xdg-open "+diff_file+".pdf")
elif platform == "darwin":
os.system("open "+diff_file+".pdf")
print("\nSuccess!")

else:
print("\nFinished: failure. No pdf file could be generated.")
print("\nFail! No pdf file could be generated.\nTroubleshooting:")
print("\t1.) To see more terminal output run:\n\t\t'comparxiv --show_latex_output "+arxiv_ID+" "+str(version_a)+" " + str(version_b) +"'")
print("\t2.) In some cases latex math environments cause problems with latexdiff. Try running:\n\t\t'comparxiv --dont_compare_equations "+arxiv_ID+" "+str(version_a)+" " + str(version_b) +"'")

#8. Delete temporary files
if keep_temp == False:
remove_temporary_files(ID_a)

return success

def print_paper_information(arxiv_ID,vA,vB):
papers = arxiv.query(query="",
id_list=[arxiv_ID + "v" + str(vA),arxiv_ID + "v" + str(vB)],
max_results=2)
print("Title:\t\t",papers[1].title)
if papers[0].title != papers[1].title:
print("Old title:\t",papers[0].title)

if len(papers[1].authors) == 1:
print("Author:\t",papers[1].authors[0],"\n")
elif len(papers[1].authors) > 6:
print("Authors:\t",papers[1].authors[0],"et al.","\n")
else:
print("Authors:\t",", " . join(papers[1].authors),"\n")

def Generate_PDF(file, folder, show_latex_output):
os.chdir(folder)
pdflatex_command = "pdflatex -interaction=nonstopmode "+file+".tex"
if show_latex_output == False:
pdflatex_command += " 2>&1 > /dev/null"
print("Compile .tex file via")
print("\t",pdflatex_command,"\n")
os.system(pdflatex_command)
os.system(pdflatex_command)
os.chdir("../..")
Expand Down Expand Up @@ -143,7 +168,7 @@ def download_from_arxiv(arxiv_ID,version):
filepath = "./"+temp_folder+arxiv_ID+"v"+str(version)

if os.path.isfile(filepath) == False:
url="https://arxiv.org/e-print/"+arxiv_ID+"v"+str(version)
url="https://arxiv.org/src/"+arxiv_ID+"v"+str(version)
download_from_url(url,filepath)
else:
print("Download of source files for "+arxiv_ID+"v"+str(version)+" not necessary.")
Expand All @@ -156,49 +181,51 @@ def unpack_source_files(arxiv_ID,version,path_destination):
path_source = "./"+temp_folder+os.path.split(version_ID)[-1]
else:
path_source = "./"+temp_folder+version_ID

print(path_destination,path_source)

# Create folder for temporary files
print("Unpack source files of",version_ID,"to",path_destination,".")
if os.path.isfile(path_source) and os.path.exists(path_destination) == False:
os.makedirs(path_destination)
# Unpack files
os.system('tar -xzf '+path_source +' -C '+ path_destination)

def identify_master_tex_file(path,arxiv_ID):
master_file = None
tex_files = []
for file in os.listdir(path):
files = os.listdir(path)
for file in files:
if file.endswith(".tex") and (file.startswith(arxiv_ID) or file.startswith(os.path.split(arxiv_ID)[-1]))== False:
tex_files.append(file)
if len(tex_files) == 1:
master_file = tex_files[0]
else:
if len(tex_files) > 1:
for file in tex_files:
with open(path+file) as f:
if 'begin{document}' in f.read():
master_file = file
break
else:
print("Error in identify_master_tex_file(): Among the ",len(tex_files)," tex files, no master file could be identified.")
os.abort()
return master_file
elif len(tex_files) == 1:
master_file = tex_files[0]
elif len(tex_files) == 0 and len(files)==1:
os.rename(path + file, path + file + ".tex")
master_file = file + ".tex"
if master_file == None:
print("Error in identify_master_tex_file(): Among the ",len(tex_files)," tex files, no master file could be identified.")
os.abort()
else:
print("\t",arxiv_ID+path[-4:-1],": ",master_file)
return master_file

def identify_bbl_file(path, arxiv_ID):
# Possibility a: A .bbl file exists.
for file in os.listdir(path):
if file.endswith('.bbl') and not file.startswith(arxiv_ID):
bbl_file = file
print("Bibliography (.bbl) file in",path,":\t",bbl_file)
break
# Possibility b: No .bbl file exists.
else:
bbl_file = None
print("No .bbl file found in\t",path)

print("\t",arxiv_ID+path[-4:-1],": ",bbl_file)
return bbl_file

def remove_temporary_files(arxiv_ID):
print("Delete temporary files.")
os.system("rm -r "+ temp_folder)

def print_title(ID,v1,v2):
Expand Down
8 changes: 6 additions & 2 deletions comparxiv/tests/test_comparxiv.py
Expand Up @@ -4,6 +4,10 @@
from ..comparxiv import *

def test_comparxiv():
test_preprints = ["hep-ph/0612065","1709.06573","1905.05776"]
test_preprints = ["hep-ph/0612065","1709.06573","1901.04503","1905.05776"]
keep_temp_files = False
show_latex_output = False
dont_open_pdf = True
dont_compare_equations = False
for ID in test_preprints:
assert compare_preprints(ID, 1, 2, False, False, True)
assert compare_preprints(ID, 1, 2, keep_temp_files, show_latex_output, dont_open_pdf, dont_compare_equations)
5 changes: 3 additions & 2 deletions setup.py
Expand Up @@ -5,7 +5,7 @@ def readme():
return f.read()

setup(name='comparxiv',
version='0.1.3',
version='0.1.4',
description='Compare two versions of an arXiv preprint with latexdiff.',
long_description = readme(),
long_description_content_type='text/markdown',
Expand All @@ -18,7 +18,8 @@ def readme():
install_requires=[
'tqdm',
'argparse',
'requests'
'requests',
'arxiv'
],
entry_points = {
'console_scripts': ['comparxiv=comparxiv.command_line:main'],
Expand Down

0 comments on commit 11670ba

Please sign in to comment.