In [1]:
import sys
!{sys.executable} -m pip install python-hebrew-numbers
!{sys.executable} -m pip install pdfrw



In [74]:
#imports various packages
import json, urllib.request
from urllib.parse import urlparse, parse_qs
import os, platform, subprocess, csv
import math
from hebrew_numbers import int_to_gematria
import pdfrw
from pdfrw import PdfReader, PdfWriter

In [75]:
#this function pulls a text from Sefaria's github repo, given a string with the name of the text in the repo's format
#this will presumably be replaced with pulling a text from a downloaded copy of the sefaria repo
def pull_text(string_for_link):
    link = "https://raw.githubusercontent.com/Sefaria/Sefaria-Export/master/json/"+string_for_link.replace(" ","%20")+".json"
    print(link)
    with urllib.request.urlopen(link) as url:
        text_json = json.loads(url.read().decode())
    return text_json

In [76]:
#this generates a list of links between texts in sefaria
#this is used to link comments in the gemara to gemara they're on
def pull_links():
    link_list = []#blank list to be filled in
    for i in range(9):#this increments through all the github files that contain links
        location = os.path.join(os.getcwd(),"links","links"+str(i)+".csv")
        with open(location, encoding="utf-8") as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_reader)#skips first row
            for row in csv_reader:
                if row[2] == "commentary":#only interested in commentaries
                    link = []
                    link.append(row[0])
                    link.append(row[1])
                    link_list.append(link)
    return link_list

In [77]:
link_list = pull_links()#generates the list of comment links

In [78]:
def match_comment(comment_str, links):#matches a comment with the gemara it's on
    for link in links:#for every link in the list
        #if the text is in the list, return the text it's linked to
        if comment_str in link[0]:
            return link[1]
        elif comment_str in link[1]:
            return link[0]
    return

In [79]:
def get_index_json(masekhet):
    #this gets the index json for a particular masekhet, which includes info about perakim.
    masekhet = masekhet.replace(" ","_")
    link = "https://raw.githubusercontent.com/Sefaria/Sefaria-Export/master/schemas/"+masekhet+".json"
    with urllib.request.urlopen(link) as url:
        index_json = json.loads(url.read().decode())
    chaps = index_json["alts"]["Chapters"]["nodes"]
    return chaps

In [80]:
def find_perek(name):
    if " on " in name:
        masekhet_start = name.find(" on ")+len(" on ")
        name = name[masekhet_start:]
    name = name.split(' ')
    if len(name)==2:
        masekhet = name[0]
    else:
        masekhet = name[0]+" "+name[1]
    daf,line = name[-1].split(":")
    chapters = get_index_json(masekhet)
    ref = masekhet+" "+daf
    for chapter in chapters:
        for page in chapter["refs"]:
            if ":" not in page and page==ref:
                return chapter["title"],chapter['heTitle']
            elif ":" in page and ref in page:
                sections = page.split(":")[1]
                sections = sections.split("-")
                if len(sections)>1:
                    if int(sections[0])<=int(line)<=int(sections[1]):
                        return chapter["title"],chapter['heTitle']
                elif len(sections) == 1:
                    if int(sections[0])==int(line):
                        return chapter["title"],chapter['heTitle']
    return ref

In [81]:
def match_chapters(text_json, links):
    #adds perek breaks to text json
    refs = []
    text_json["text_perakim"] = []#original text, but with perakim breaks noted
    text_json["chap_list"] = []#list of chapter titles
    daf_i = 1#initializing counters.
    #Sefaria puts a blank spot for daf 1, so it starts with 1 not 2.
    title = text_json["title"]
    current_perek = ""
    j=0
    for daf in text_json["text"]:
        if daf != []:#if daf isn't empty
            comment_i = 1#comment counter
            for comment in daf:
                daf_num = math.floor(daf_i)#rounds daf down from 0.5 to get real number
                if daf_num == daf_i:
                    amud = "a"
                else:
                    amud = "b"#for daf with 0.5, it's daf X amud b
                daf_ref = str(daf_num)+amud #makes davening number, like 4b
                new_ref = title +" "+ daf_ref + ":"+str(comment_i)
                #adds masekhet name to daf reference
                if "commentary" in text_json["categories"]:
                    gemara_ref = ""
                    for link in links:#for every link in link list
                        if link[0] == new_ref:#if the link is do the relevant daf
                            gemara_ref = link[1]
                            break
                    if "-" in gemara_ref:#if a reference spans a daf
                        gemara_ref = gemara_ref.split("-")[0]#returns the first part
                    if gemara_ref != "":
                        ref_perek = find_perek(gemara_ref)#looks up the perek of the daf of gemara
                        if ref_perek != current_perek:#if the reference is a new perek
                            current_perek = ref_perek#set current perek
                            perek_info = {"name_en":ref_perek[0],"name_he":ref_perek[1]}
                            text_json["text"][j].insert(comment_i-1,perek_info)
                            #the above adds a dict with info on the perek into the text json
                else:
                    ref_perek = find_perek(new_ref)
                    if ref_perek != current_perek:#if the reference is a new perek
                        current_perek = ref_perek#set current perek
                        perek_info = {"name_en":ref_perek[0],"name_he":ref_perek[1]}
                        if perek_info not in text_json["text"] and perek_info != {'name_en': 'N', 'name_he': 'i'}:
                            text_json["text"][j].insert(comment_i-1,perek_info)
                        #the above adds a dict with info on the perek into the text json
                comment_i += 1
        daf_i += 0.5
        j += 1
    return text_json

In [82]:
def make_body_json(hebrew_text, english_text, settings, commentaries):
    output = []
    parbreak = "\n\n"
    title = hebrew_text["heTitle"]
    title_command = r"\newcommand{\texttitle}{"+title+"}"#sets title
    chap_num = 0
    structure = hebrew_text["schema"]["nodes"]
    for chapter in hebrew_text["text"].items():
        chap_title_eng = chapter[0]
        chap_content = chapter[1]
        chap_title_he = ""
        for chap_name in structure:
            if chap_title_eng == chap_name["enTitle"]:
                chap_title_he = chap_name["heTitle"]
                break
        if settings["newpage"] == 1:
            output.append(r"\clearpage")
        new_chap_text = r"\newchap{"+chap_title_he+"}"
        output.append(new_chap_text)
        if type(chap_content) == list:# if the chapter has text, rather than subsections:
            i=0
            for par in chap_content:
                #prints next block of text
                textblock = ""
                comments = []
                if type(par)==list:
                    j=0
                    for chunk in par:
                        if english_text != None and english_text["text"][chap_title_eng] != []:
                            new_text = make_section_json(chunk,english_text["text"][chap_title_eng][i][j], settings, commentaries,chap_title_eng,i,j)
                        else:
                            new_text = make_section_json(chunk,None, settings, commentaries,chap_title_eng,i,j)
                        if new_text != "":
                            output.append(new_text)
                        j+=1
                elif type(par)==str:
                    if english_text != None and english_text["text"][chap_title_eng] != []:
                        new_text = make_section_json(par,english_text["text"][chap_title_eng][i], settings, commentaries,chap_title_eng,i,None)
                    else:
                        new_text = make_section_json(par,None, settings, commentaries,chap_title_eng,i,None)
                    if new_text != "":
                        output.append(new_text)
                i += 1
        elif type(chap_content)==dict:#if chapter contains sections
            chap_structure = structure[chap_num]["nodes"]
            sect_num = 0
            for section in chap_content.items():
                sect_title_eng = section[0]
                sect_content = section[1]
                sect_title_he = ""
                for sect_name in chap_structure:#[chap_title_eng]:
                    if sect_title_eng == sect_name["enTitle"]:
                        sect_title_he = sect_name["heTitle"]
                        break
                new_section_text = r"\newsection{"+sect_title_he+"}"
                output.append(new_section_text)
                if type(section[1]) == list:
                    j = 0
                    for par in section[1]:
                        #prints next block of text
                        textblock = ""
                        comments = []
                        while type(par)==list:
                            new_par = ""
                            for item in par:
                                new_par += item
                            par = new_par
                        if type(par) != dict:
                            textblock += par
                        if english_text != None and english_text["text"][chap_title_eng][sect_title_eng] != []:
                            textblock_eng = ""
                            if type(english_text["text"][chap_title_eng][sect_title_eng])==list and len(english_text["text"][chap_title_eng][sect_title_eng]) == 1:
                                par_eng = english_text["text"][chap_title_eng][sect_title_eng][0]
                            else:
                                par_eng = english_text["text"][chap_title_eng][sect_title_eng][j]
                           # except:
                            #    print(j,english_text["text"][chap_title_eng][sect_title_eng])
                            while type(par_eng) == list:
                                new_par_eng = ""
                                for item in par_eng:
                                    new_par_eng += item
                                par_eng = new_par_eng
                            if type(par_eng) != dict:
                                textblock_eng += par_eng
                            new_text = make_section_json(textblock,textblock_eng, settings, commentaries,chap_title_eng,sect_title_eng,j)
                        else:
                            new_text = make_section_json(textblock,None, settings, commentaries,chap_title_eng,sect_title_eng,j)
                        if new_text != "":
                            output.append(new_text)
                        j += 1
                elif type(section[1]) == dict:#if section contains subsections
                    sect_structure = chap_structure[sect_num]["nodes"]
                    for subsection in section[1].items():
                        subsect_title_eng = subsection[0]
                        subsect_content = subsection[1]
                        subsect_title_he = ""
                        for subsect_name in sect_structure:
                            if subsect_title_eng == subsect_name["enTitle"]:
                                subsect_title_he = subsect_name["heTitle"]
                                break
                        new_subsection_text = r"\newsubsection{"+subsect_title_he+"}"
                        output.append(new_subsection_text)
                        subsect_i = 0
                        for par in subsection[1]:
                            #prints next block of text
                            textblock = ""
                            comments = []
                            while type(par)==list:
                                new_par = ""
                                for item in par:
                                    new_par += item
                                par = new_par
                            if type(par) != dict:
                                textblock += par
                            if english_text != None and english_text["text"][chap_title_eng][sect_title_eng] != []:#[subsect_i]
                                #try:
                                eng_subsect = english_text["text"][chap_title_eng][sect_title_eng]
                                textblock_eng = ""
                                while type(eng_subsect) == list:
                                    new_par_eng = ""
                                    for item in eng_subsect:
                                        new_par_eng + item
                                    eng_subsect = new_par_eng
                                if type(eng_subsect) != dict:
                                    textblock_eng += eng_subsect
                                    #new_par_eng = english_text["text"][chap_title_eng][sect_title_eng][subsect_i]
                             #   except:
                              #      print(english_text["text"][chap_title_eng][sect_title_eng])
#                                 for item in english_text["text"][chap_title_eng][sect_title_eng]:
#                                     new_par_eng += item + parbreak
                                new_text = make_section_json(textblock,textblock_eng, settings, commentaries,chap_title_eng,sect_title_eng,subsect_title_eng)
                            else:
                                new_text = make_section_json(textblock,None, settings, commentaries,chap_title_eng,sect_title_eng,subsect_title_eng)
                            if new_text != "":
                                output.append(new_text)
                            subsect_i += 1

                sect_num += 1
        chap_num += 1
    if settings["layout"] == "twocol" and english_text == None:
        line_i = 0
        newsection_bool = True
        in_cols = False
        lastline = ""
        for line in output:
            if "newsection" in line or "newchap" in line or "newsubsection" in line:
                newsection_bool = True
                if in_cols == True:
                    #print("END TWOCOLS")
                    if settings["newpage"] < 2:
                        end_multicols = "\n"+r"\end{multicols}\newpage"+"\n"
                    else:
                        end_multicols = "\n"+r"\end{multicols}"+"\n"
                    output[line_i-1] = output[line_i-1]+end_multicols
                    in_cols = False
            elif "newsection" not in line and "newchap" not in line and "renewcommand" not in line and "fancy" not in line:
                if newsection_bool == True:
                    newsection_bool = False
                    #output[line_i] = r"\twocol{"+line
                    output[line_i] = r"\begin{multicols}{2}"+"\n"+line
                    in_cols = True
                    #print("BEGIN TWOCOLS")

            lastline = line
            line_i += 1
        #output.append(r"}")
        output.append(r"\end{multicols}")
        output.append(r"\newpage")
        
    return title_command, output, title

In [83]:
def make_section_json(hebrew_text,english,settings,commentaries,chap,sect,subsect):
#def make_section(hebrew_text, english, settings, chap_num, mishna_num, commentaries):
    hebrew_text = footnoteremove(hebrew_text)
    english = footnoteremove(english)
    with open('resources/text_replacements.csv',encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for row in csv_reader:
            if row[0] in hebrew_text:
                hebrew_text = hebrew_text.replace(row[0],row[1])
    if english != None and english != "":
        with open('resources/english_replacements.csv',encoding='utf-8') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_reader)
            for row in csv_reader:
                if row[0] in english:
                    english = english.replace(row[0],row[1])
    #turns a block of text into a latex section using the \textblock or \twocol command
    if english != None:
        english = english.replace("[","{[")
        english = english.replace("]","]}")
        output = r"\hebeng{"+hebrew_text+"}{"+english+"}"
        if settings["newpage"] == 1:
            output += r"\newpage"
    elif settings["layout"] == "twocol":
        #output= r"\twocol{"+hebrew_text+"}"
        output = hebrew_text
    elif hebrew_text=="":
        output = ""
    else:
        output= r"\textblock{"+hebrew_text+"}"
    commentary_i = 1
    for commentary in commentaries:
        #output+="\n"
        output+=get_comments_json(commentary,chap,sect,subsect,commentary_i)
        commentary_i += 1
    output = removeformatting(output)
    return output

In [130]:
def get_comments_json(commentary,chap,sect,subsect,comment_i):
    if subsect != None:
        try:
            comments = commentary['text'][chap][sect][subsect]
        except IndexError:
            return ""
    else:
        try:
            comments = commentary['text'][chap][sect]
        except IndexError:
            return ""
    comment_text = "%\n"+r"\comment"+chr(comment_i+96)+"{"
    blank_comment = True
    if type(comments)==str:
        blank_comment = False
        comment_text = comments
    else:
        for comment in comments:
            if comment == "":
                continue
            if comment[-1] == " ":
                comment = comment[:-1]
            blank_comment = False
            comment_text += comment
#     content = chap[j-1]
#     if type(content) == list:
#         if content == []:
#             return ""
#         else:
#             content = content[0]

#     comment_text += content
    comment_text += "}%endcomment"
    comment_text = comment_text.replace(r"\par","")
    comment_text = comment_text.replace("<b>",r"\textrm{\textbf{")
    comment_text = comment_text.replace("</b>",r"}}")
    if blank_comment == True:
        return ""
    else:
        return comment_text
#     if "{}" in comment_text:
#         return ""
#     else:
#         return comment_text

In [85]:
def make_body(hebrew_text, english_text, settings, commentaries):
    output = []
    chap_num = 1
    mishna_num = 1# lav davka mishna, just the smaller divisions of the text
    title = hebrew_text["heTitle"]
    title_command = r"\newcommand{\texttitle}{"+title+"}"#sets title
    try:
        divisions_en = hebrew_text["sectionNames"] #gets names of the sections for the specific text
    except:
        divisions_en = ["Chapter","Paragraph"]
        hebrew_text["sectionNames"] = divisions_en
    divisions_he = []
    if type(hebrew_text["text"])==dict:
        hebrew_text["originalText"] = hebrew_text["text"]
        hebrew_text["text"] = structure_fixer(hebrew_text["originalText"])
    if english_text != None:
        if type(english_text["text"])==dict:
            english_text["originalText"] = english_text["text"]
            english_text["text"] = structure_fixer(english_text["originalText"])
    #print(english_text["text"])
    #the following uses the CSV of section names to get the Hebrew sections names
    with open('resources/section_names.csv', encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for row in csv_reader:
            for division in divisions_en:
                if row[0] == division:
                    divisions_he.append(row[1])
    if "Daf" in divisions_en:
        #if the text is based on dappim, run the script to add perakim notations
        hebrew_text = match_chapters(hebrew_text,link_list)       
    for perek in hebrew_text["text"]:
        if any(perek):
            if type(perek[0]) == dict and "name_he" in perek[0].keys():
                #if there's a new perek dict note, add the LaTeX code for the new perek
                new_chap_text = r"\newchap{"+parse_perek_title(perek[0])+"}"
                if new_chap_text not in output:
                    output.append(new_chap_text)
            if "Daf" in divisions_en:
                #this adds daf numbers for each new daf, ignoring the amud break
                daf = ((chap_num+1)/2)
                if daf == round(daf):
                    daftitle = int_to_gematria(round(daf), gershayim=False)
                    if settings["newpage"] == 1:
                        output.append(r"\clearpage")
                    output.append(r"\newsection{דף "+daftitle+"}")              
            else:
                if settings["newpage"] == 1:
                    output.append(r"\clearpage")
                output.append(r"\newchap{"+divisions_he[0]+" "+int_to_gematria(chap_num, gershayim=False)+"}")
            for par in perek:
                #prints next block of text
                textblock = ""
                comments = []
                if type(par) == dict and "name_he" in par.keys() and par["name_en"] != "Chapter 1":
                    if textblock != "":
                        if english_text != None and english_text["text"][chap_num-1] != []:
                            new_text = make_section(textblock,english_text["text"][chap_num-1][mishna_num-1], settings, chap_num, mishna_num, commentaries)
                        else:
                            new_text = make_section(textblock,None, settings, chap_num, mishna_num, commentaries)
                        output.append(new_text)
                else:
                    while type(par)==list:
                        new_par = ""
                        for item in par:
                            new_par += item
                        par = new_par
                    if type(par) != dict:
                        textblock += par
                if hebrew_text["sectionNames"][1] == "Verse" or hebrew_text["sectionNames"][1] == "Mishnah":
                    textblock = r"\vsnum{"+str(mishna_num)+"}"+textblock
                    if english_text != None:
                        english_text["text"][chap_num-1][mishna_num-1] = r"\vsnumeng{"+str(mishna_num)+"}"+english_text["text"][chap_num-1][mishna_num-1]
                if english_text != None and english_text["text"][chap_num-1] != []:
                    new_text = make_section(textblock,english_text["text"][chap_num-1][mishna_num-1], settings, chap_num, mishna_num, commentaries)
                else:
                    new_text = make_section(textblock,None, settings, chap_num, mishna_num, commentaries)
                if new_text != "":
                    output.append(new_text)
                mishna_num += 1
        chap_num += 1
        mishna_num = 1
    if settings["layout"] == "twocol" and english_text == None:
        line_i = 0
        newsection_bool = True
        in_cols = False
        lastline = ""
        for line in output:
            if "newsection" in line or "newchap" in line:
                newsection_bool = True
                if in_cols == True:
                    #print("END TWOCOLS")
                    if settings["newpage"] < 2:
                        end_multicols = "\n"+r"\end{multicols}\newpage"+"\n"
                    else:
                        end_multicols = "\n"+r"\end{multicols}"+"\n"
                    output[line_i-1] = output[line_i-1]+end_multicols
                    in_cols = False
            elif "newsection" not in line and "newchap" not in line and "renewcommand" not in line and "fancy" not in line:
                if newsection_bool == True:
                    newsection_bool = False
                    #output[line_i] = r"\twocol{"+line
                    output[line_i] = r"\begin{multicols}{2}"+"\n"+line
                    in_cols = True
                    #print("BEGIN TWOCOLS")

            lastline = line
            line_i += 1
        #output.append(r"}")
        output.append(r"\end{multicols}")
        output.append(r"\newpage")
    return title_command, output, title

In [86]:
def parse_perek_title(perekDict):
    chap_num = perekDict["name_en"].replace("Chapter ","")
    title = r"פרק \hebrewnumeral{"+chap_num+r"} "+perekDict["name_he"]
    return title

In [87]:
def removeformatting(text):
    while "<" in text and ">" in text:
        loc1 = text.find("<")
        loc2 = text.find(">",loc1)+1
        text = text.replace(text[loc1:loc2],"")
    return text

In [88]:
def footnoteremove(text):
    if text == None:
        return None
    if type(text) == list:
        print(text)
    markbegin = "<sup class=\"footnote-marker\">"
    markend = "</sup>"
    while markbegin in text:
        note_location_begin = text.find(markbegin)
        note_location_end = text.find(markend, note_location_begin)+len(markend)
        text = text[0:note_location_begin]+" "+text[note_location_end:]
#     notebegin = "<i class=\"footnote\">"
#     noteend = "</i>"
#     while notebegin in text:
#         text = text.replace(notebegin,r" <small>(",1)
#         notestartloc = text.find(notebegin)
#         noteendloc = text.find(noteend,notestartloc)
#         while "<i>" in text[notestartloc:noteendloc]:
#             next_loc = text.find("<i>", noteendloc)+1
#             next_loc = text.find("</i>",next_loc)+1
#     #text = text.replace(noteend,r")</small> ")
    
#     while markbegin in text:
#         markstart = text.find(markbegin)
#         markend = text.find(markend,markstart)+6
#         text = text.replace(text[markstart:markend],"")
    
    text = text.replace("<i class=\"footnote\">","<i>")
    while "<i data-" in text:
        note_begin = text.find("<i data-")
        note_end = text.find(">",note_begin+5)+1
        note = text[note_begin:note_end]
        text = text[0:note_begin]+"<i>"+text[note_end:]
    return text

In [90]:
def make_section(hebrew_text, english, settings, chap_num, mishna_num, commentaries):
    hebrew_text = footnoteremove(hebrew_text)
    english = footnoteremove(english)
    with open('resources/text_replacements.csv',encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for row in csv_reader:
            if row[0] in hebrew_text:
                hebrew_text = hebrew_text.replace(row[0],row[1])
    if english != None and english != "":
        with open('resources/english_replacements.csv',encoding='utf-8') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_reader)
            for row in csv_reader:
                if row[0] in english:
                    english = english.replace(row[0],row[1])
    #turns a block of text into a latex section using the \textblock or \twocol command
    if english != "" and english != None:
        english = english.replace("[","{[")
        english = english.replace("]","]}")
        output = r"\hebeng{"+hebrew_text+"}{"+english+"}"
        if settings["newpage"] == 1:
            output += r"\newpage"
    elif settings["layout"] == "twocol":
        #output= r"\twocol{"+hebrew_text+"}"
        output = hebrew_text
    elif hebrew_text=="":
        output = ""
    else:
        output= r"\textblock{"+hebrew_text+"}"
    comment_i = 1
    for commentary in commentaries:
        #output+="\n"
        output+=get_comments(chap_num,mishna_num,commentary,comment_i)
        comment_i += 1
    output = removeformatting(output)
    return output

In [91]:
#this adds commands for part-specific formatting
#it outputs a list of lines to be written to the beginning of that part
#basically duplicates the formatting commands for the overall document
def add_part_format(part_i, settings):
    output = []
    part_settings = settings["texts"][part_i]["format"]
    if "hebfont" in part_settings.keys():
        font = r"\setmainfont{"+part_settings["hebfont"]+r"}"
    else:
        font = r"\setmainfont{"+settings["hebfont"]+r"}"
    output.append(font)
    if "commentfont" in part_settings.keys():
        commentfont = part_settings["commentfont"]
    elif settings["commentfont"]!="":
        commentfont = settings["commentfont"]
    else:
        commentfont = settings["hebfont"]
    comment_command = r"\setsansfont{"+commentfont+"}"
    output.append(comment_command)
    output = []

    if settings["headpos"] == "center":
        odd_header = r"\fancyhead[CO]{"
        even_header = r"\fancyhead[CE]{"
    elif settings["headpos"] == "inner":
        odd_header = r"\fancyhead[RO]{"
        even_header = r"\fancyhead[LE]{"
    if "evenhead" in part_settings.keys():
        evenhead = part_settings["evenhead"]
    else:
        evenhead = settings["evenhead"]
    if "oddhead" in part_settings.keys():
        oddhead = part_settings["oddhead"]
    else:
        oddhead = settings["oddhead"]
    if evenhead == "title":
        even_header += r"\partname"
    elif evenhead == "chapter":
        even_header += r"\chapname"
    elif evenhead == "titlechapter":
        even_header += r"\partname\space\textendash\space \chapname"
    elif evenhead == "daf":
        even_header += r"\sectname"
    elif evenhead == "chapdaf":
        even_header += r"\chapname \space\textendash\space \sectname"
    if oddhead == "title":
        odd_header += r"\partname"
    elif oddhead == "chapter":
        odd_header += r"\chapname"
    elif oddhead == "titlechapter":
        odd_header += r" \partname\space\textendash\space \chapname"
    elif oddhead == "daf":
        odd_header += r"\sectname"
    elif oddhead == "chapdaf":
        odd_header += r"\chapname \space\textendash\space \sectname"
    odd_header += "}"
    even_header += "}"
    output.append(odd_header)
    output.append(even_header)

    if "fontsize" in part_settings.keys():
        fontsize = part_settings["fontsize"]
    else:
        fontsize = settings["fontsize"]
    if "spacing" in part_settings.keys():
        skip = fontsize * part_settings["spacing"]
    else:
        skip = fontsize * settings["spacing"]
    if "engfontsize"in part_settings.keys():
        engsize = part_settings["engfontsize"]
    elif "engfontsize" in settings.keys():
        engsize = settings["engfontsize"]
    else:
        engsize = settings["fontsize"]
    fontsizestr = r"\renewcommand{\sethebfont}{\fontsize{"+str(fontsize)+r"pt}{"+str(round(skip,1))+r"pt} \selectfont}\sethebfont"
    engsizestr = r"\renewcommand{\setengfont}{\fontsize{"+str(engsize)+r"pt}{"+str(round(skip,1))+r"pt} \selectfont}\setengfont"
    output.append(fontsizestr)
    output.append(engsizestr)
    return output

In [92]:
def get_comments(i,j,commentary,comment_i):
    try:
        chap = commentary["text"][i-1]
    except IndexError:
        return ""
    if len(chap)<j:
        return ""
    #command_name = r"\comment_"+str(comment_i)
    comment_text = "%\n"+r"\comment"+chr(comment_i+96)+"{"
#     for comment in chap[j-1]:
#         if comment == "":
#             continue
#         if comment[-1] == " ":
#             comment = comment[:-1]
#         comment_text += comment
    content = chap[j-1]
    if type(content) == list:
        if content == []:
            return ""
        else:
            content = content[0]
    content = content.replace(r"\par","")
    content = content.replace("<b>",r"\textrm{\textbf{")
    content = content.replace("</b>",r"}}")
    comment_text += content
    comment_text += "}%endcomment"
    #comment_text = comment_text.replace(r"\quad }","}")
    if "{}" in comment_text:
        return ""
    else:
        return comment_text

In [93]:
def structure_fixer(json):
    if type(json) is list:
        return json
    text = []
    titles = []
    for part in json.values():
        if type(part) is list:
            text.append(part)
        elif type(part) is dict:
            text_part = []
            for subpart in part.values():
                if type(subpart) is list:
                    text_part.append(subpart)
                elif type(subpart) is dict:
                    text_subpart = []
                    for subsubpart in subpart.values():
                        if type(subsubpart) is list:
                            text_subpart.append(subsubpart)
                    text_part.append(text_subpart)
            text.append(text_part)
            #print(part)
    return text

In [94]:
#test = pull_text("Halakhah/Acharonim/Chofetz Chaim/Hebrew/Chofetz Chaim")

In [95]:
#structure_fixer(test["text"])

In [96]:
def set_format(template_lines,settings):
    output = []
    #this sets the format for the text in the LaTeX preamble.
    #ths line[0:-1] is the text of the line in LaTeX being converted by the script.
    #the -1 is needed to exclude the \n for line break at the end of each line.
    #these work by converting a LaTeX comment for a specific formatting piece to a command, based on what's in the settings json.
    if len(settings["texts"])>1:
        titlehead = r"\partname"
    else:
        titlehead = r"\texttitle"
    for line in template_lines:
        if line[0:-1] in settings.keys():
            setting_output = line[0:-1] + "="+settings[line[0:-1]]+",\n"
            output.append(setting_output)
        elif line[0:-1] == "%setfontsize":
            fontsize = settings["fontsize"]
            skip = fontsize * settings["spacing"]
            fontsizestr = r"\fontsize{"+str(fontsize)+r"pt}{"+str(round(skip,1))+r"pt} \selectfont"
            output.append(fontsizestr)
        elif line[0:-1] == "%engfontsize":
            if "engfontsize" in settings.keys():
                fontsize = settings["engfontsize"]
            else:
                fontsize = settings["fontsize"]
            skip = fontsize * settings["spacing"]
            fontsizestr = r"\fontsize{"+str(fontsize)+r"pt}{"+str(round(skip,1))+r"pt} \selectfont"
            output.append(fontsizestr)
        elif line[0:-1] == "%sethebfont":
            if settings["hebboldfont"] == "":
                font = r"\setmainfont{"+settings["hebfont"]+r"}"
            else:
                font = r"\setmainfont[BoldFont = {"+settings["hebboldfont"]+r'}]{'+settings["hebfont"]+r"}"
            output.append(font)
        elif line[0:-1] == "%setcommentfont":
            if "commentfont" in settings.keys():
                font = r"\setsansfont{"+settings["commentfont"]+"}"
                output.append(font)
        elif line[0:-1] == "%setengfont" and settings["engfont"] != 0:
            engfont = r'\newfontfamily\englishfont{'+settings["engfont"]+r'}'
            output.append(engfont)
        elif line[0:-1] == "%setparskip" and settings["parskip"] != 0:
            parskip = r'\setlength{\parskip}{'+settings["parskip"]+'}'
            output.append(parskip)
        elif line[0:-1] == "%pagenumber":
            if settings["pagenumloc"] == "topouter":
                pagenum = r"\fancyhead[LO,RE]{num}"
            elif settings["pagenumloc"] == "bottommiddle":
                pagenum = r"\fancyfoot[C]{num}"
            if settings["pagenumheb"] == True:
                pagenum = pagenum.replace("num",r"\hebrewnumeral{\thepage}")
            else:
                pagenum = pagenum.replace("num",r"\thepage")
            output.append(pagenum)
        elif line[0:-1] == "%header":
            if settings["headpos"] == "center":
                odd_header = r"\fancyhead[CO]{"
                even_header = r"\fancyhead[CE]{"
            elif settings["headpos"] == "inner":
                odd_header = r"\fancyhead[RO]{"
                even_header = r"\fancyhead[LE]{"
            if settings["evenhead"] == "title":
                even_header += titlehead
            elif settings["evenhead"] == "chapter":
                even_header += r"\chapname"
            elif settings["evenhead"] == "titlechapter":
                even_header += titlehead + r" \space\textendash\space \chapname"
            elif settings["evenhead"] == "daf":
                even_header += r"\sectname"
            elif settings["evenhead"] == "chapdaf":
                even_header += r"\chapname \space\textendash\space \sectname"
            if settings["oddhead"] == "title":
                odd_header += titlehead
            elif settings["oddhead"] == "chapter":
                odd_header += r"\chapname"
            elif settings["oddhead"] == "titlechapter":
                odd_header += titlehead + r" \space\textendash\space \chapname"
            elif settings["oddhead"] == "daf":
                even_header += r"\sectname"
            elif settings["oddhead"] == "chapdaf":
                even_header += r"\chapname \space\textendash\space \sectname"
            
            odd_header += "}"
            even_header += "}"
            output.append(odd_header)
            output.append(even_header)
        elif line[0:-1] == "%chapfontsize":
            if "chapfontsize" in settings.keys():
                headerfontcommand = r"\fontsize{"+settings["chapfontsize"]+"}{"+settings["chapfontsize"]+r"}\selectfont"
            else:
                headerfontcommand = r"\LARGE"
            output.append(headerfontcommand)
        elif line[0:-1] == "%setcolumnsep":
            output.append(r"\setlength{\columnsep}{"+settings["colsep"]+"}")
        elif line[0:-1] == "%twocolfootnote" and settings["twocolfootnotes"] == 1:
            output.append(r"\usepackage{dblfnote}\DFNalwaysdouble")
        else:
            output.append(line)
    return output

In [97]:
def get_bib_info(json):
    #puts bibliographic info in a dict
    source_data = {}
    source_data["source"] = json["versionSource"]
    if "license" in json.keys():
        source_data["license"] = json["license"]
    else:
        source_data["license"] = "CC-BY"
    source_data["version"] = json["versionTitle"]
    source_data["heTitle"] = json["heTitle"]
    return source_data

def print_source_data(source_list):
    output = []
    output.append(r"\begin{itemize}")
    #puts every piece of bibliographic info into a copyright notice
    for source in source_list:
        if "Copyright" in source["license"]:
            return ["NC",source["version"]]
        if "NC" in source["license"]:
            print("A source selected has a non-commercial license.\nDo not use the PDF version of this text commercially.")
        versiontitle = source["version"].replace("-",r"\textendash ")
        output.append(r"\item[$\bullet$] "+versiontitle)
        if len(source_list) > 1:
            output.append(r"\begin{itemize}")
        output.append(r"\item[$\bullet$] License: "+source["license"])
        output.append(r"\item[$\bullet$] Source: \url{"+source["source"]+"}")
        if len(source_list) > 1:
            output.append(r"\end{itemize}")
    output.append(r"\end{itemize}")
    return output

In [98]:
#reads template file
inputpath = os.path.join("resources","input.tex")
coverinpath = os.path.join("resources","input_cover.tex")
def pullinput(inputpath):
    with open(inputpath, 'r', encoding='utf-8') as infile:
        template_lines = list(infile.readlines())
    return template_lines

In [99]:
#this limits the input to the specific range specified in the text settings
def limit_output(lines,textsettings):
    if "dafrange" in textsettings.keys():
        start_end = textsettings["dafrange"].split("-")
        start = int_to_gematria(start_end[0], gershayim=False)
        end = int_to_gematria(start_end[1], gershayim=False)
        content = False
        limited_lines = []
        for line in lines:
            if content == False and r"\newsection{דף "+start in line:
                content = True
                limited_lines.append(line)
            elif content == True and r"\newsection{דף "+end in line:
                content = False
                for line in limited_lines:
                    if r"\newsection{דף " in line:
                        line = line.replace(r"\newsection{דף ", r"\newchap{דף ")
                return limited_lines
            elif content == True:
                limited_lines.append(line)
            else:
                continue
    elif textsettings["range"] == "all" or "range" not in textsettings.keys():
        return lines
    else:
        array = range_str(textsettings["range"])
        sorted_lines = []
        limited_lines = []
        onechap = []
        for line in lines:
            if "newchap" not in line:
                onechap.append(line)
            else:
                sorted_lines.append(onechap)
                onechap = [line]
        for chap in array:
            for line in sorted_lines[chap]:
                limited_lines.append(line)
        return limited_lines

In [100]:
#this parses the string describing the range
def range_str(string):
    rng_array = []
    ranges = string.split(",")
    for numbers in ranges:
        if "-" not in numbers:
            rng_array.append(int(numbers))
        else:
            start_end = numbers.split("-")
            start = int(start_end[0])
            end = int(start_end[1])
            for number in range(start,end+1):
                rng_array.append(number)
    return rng_array

In [101]:
#when "blockcomment" is selected, moves comments to the end of the perek
def move_comments(lines, title, newpage):
    collected_comments = []
    output = []
    i=1
    for line in lines:
        if r"\comment" in line:
            comment_start = line.index(r"\comment")+10
            comment_end = line.index("}%endcomment")
            comment = line[comment_start:comment_end]
            line = line[:comment_start-10]+line[comment_end+12:]
            output.append(line)
            collected_comments.append(comment)
        elif r"\newchap" in line or r"\newsection" in line or r"\addpart" in line:
            if collected_comments != []:
                comments_str = r"\blockcomment{"+title+"}{"
                for comment in collected_comments:
                    comments_str += comment+r"\\"
                comments_str += "\n}%endcomment"
                if newpage == 1:
                    comments_str += r"\newpage"
                output = output + [comments_str] + [line]
                collected_comments = []
            else:
                output.append(line)
        else:
            output.append(line)
    if collected_comments != []:
        comments_str = r"\blockcomment{"+title+"}{"
        for comment in collected_comments:
            comments_str += comment+r"\\"
        comments_str += r"\n}%endcomment"
        if newpage == 1:
            comments_str += r"\newpage"
        output = output + [comments_str]
    return output

In [102]:
def block_fix(text):
    for i in range(1,len(text)):
        if r"\blockcomment{" in text[i] and text[i-1][0:-1] == r"\clearpage}":
                text[i-1] = "}\n"
        if "}%endcomment" in text[i]:
            text[i] = text[i].replace("}%endcomment","}\clearpage %endcomment")
    return text

In [103]:
#converts input into output
def writeoutput(outputpath, template, formatting):
    sources = []
    parts = []
    template_with_settings = set_format(template,formatting)#reads settings
    for text in formatting["texts"]:
        if "translation" not in text.keys():
            text["translation"] = ""
        if "range" not in text.keys():
            text["range"] = "all"
        part_format = text["format"]
        #print(part_format)
        for setting in formatting.items():
            if setting[0] not in part_format.keys() and setting[0] != "texts":
         #       print(setting)
                part_format[setting[0]] = setting[1]
        if part_format["layout"]=="twocol":
            part_format["newpage"] = 0
        #print(part_format)
        sefaria_json = pull_text(text["link"])#pulls json from Sefaria
        bib_data = get_bib_info(sefaria_json)
        sources.append(bib_data)#puts bibliographic info in sources list
        commentaries = []
        for commentary in text["commentary"]:
            comments_json = pull_text(commentary)
            bib_data = get_bib_info(comments_json)
            commentary_title = bib_data["heTitle"]
            sources.append(bib_data)
            commentaries.append(comments_json)
        if text["translation"]!= "":
            #pulls translation, if any, and adds to bibliographic list
            english_json = pull_text(text["translation"])
            sources.append(get_bib_info(english_json))
            if type(sefaria_json["text"]) == dict:
                sefaria_result = make_body_json(sefaria_json,english_json,part_format,commentaries)
            else:
                sefaria_result = make_body(sefaria_json,english_json,part_format,commentaries)
        else:
            if type(sefaria_json["text"]) == dict:
                sefaria_result = make_body_json(sefaria_json,None,part_format,commentaries)
            else:
                sefaria_result = make_body(sefaria_json,None,part_format,commentaries)
#         print(sefaria_result[1])
        content_limited = limit_output(sefaria_result[1],text)

        if part_format["commentstyle"] == "blocks" and text["commentary"] != []:
            content_limited = move_comments(content_limited, commentary_title, formatting["newpage"])
            if part_format["newpage"] == 1:
                content_limited = block_fix(content_limited)
        part = {"title":sefaria_result[2],"content":content_limited}
        parts.append(part)
#         parts.append(sefaria_result[1])
#         titles.append(sefaria_result[2])
#         title_command = sefaria_result[0]
        source_listing = print_source_data(sources)
        if source_listing[0] == "NC":#stops the script if the license doesn't allow the text to run
            print(source_listing[1] + " has a license which does not allow creation of this text.")
            return
    title_command = r"\newcommand{\texttitle}{"+formatting["titleheb"]+"}"
    with open(outputpath, 'w', encoding='utf-8') as outfile:#, open("test.txt",'w',encoding='utf-8') as testfile:
        for line in template_with_settings:
            if line == "%title_here\n":
                outfile.write(title_command)
            elif line == "%license info\n":
                for item in source_listing:
                    outfile.write(item)
                    outfile.write("\n")
            elif line == "%body_here\n":
                if len(parts) == 1:
                    for newline in parts[0]["content"]:
                        if newline[-1]==" ":
                            newline = newline[0:-1]
                        outfile.write(newline+"\n")
                        #testfile.write(newline+"\n")
                elif len(parts)>1:
                    part_num = 0
                    for part in parts:
                        outfile.write(r"\addpart{"+part["title"]+r"}\renewcommand{\partname}[1]{"+part["title"]+"}\n")                       
                        part_format = add_part_format(part_num,formatting)
                        #part_format = []
                        for line in part_format:
                            outfile.write(line+"\n")
                        for newline in part["content"]:
                            outfile.write(newline+"\n")
                            #testfile.write(newline+"\n")
                        part_num += 1
            else:
                outfile.write(line)
                if "\n" not in line:
                    outfile.write("\n")
    cover_title_heb = title_to_cover(title_command)
    #eng_title_cover = 
    if "title" in formatting.keys():
         return formatting["title"]
    else:
        return cover_title_heb

In [104]:
def make_cover(outputpath,cover_template,title,settings,pages):
    coverOutPath = outputpath.replace(".tex","_cover.tex")
    inches = calc_spine_width(pages,settings)
    with open(coverOutPath,'w',encoding='utf-8') as outfile:
        for line in cover_template:
            if line == "%hebtitle\n":
                outfile.write(title)
            elif line == "%backgroundcolor\n":
                background_command = r"\definecolor{background}{HTML}{"+settings["covercolor"]+"}"
                outfile.write(background_command+"\n")
            elif line == "%textcolor\n":
                text_command = r"\definecolor{text}{HTML}{"+settings["covertextcolor"]+"}"
                outfile.write(text_command+"\n")
            elif line == "%height\n":
                if settings["covertype"] == "hardcover":
                    pageheight = float(settings["paperheight"].replace("in",""))+1.5
                elif settings["covertype"] == "softcover":
                    pageheight = float(settings["paperheight"].replace("in",""))
                coverheight = "coverheight="+str(pageheight)+"in,\n"
                outfile.write(coverheight)
            elif line == "%width\n":
                if settings["covertype"] == "hardcover":
                    pagewidth = float(settings["paperwidth"].replace("in",""))+.75
                elif settings["covertype"] == "softcover":
                    pagewidth = float(settings["paperwidth"].replace("in",""))
                coverwidth = "coverwidth="+str(pagewidth)+"in,\n"
                outfile.write(coverwidth)
            elif line == "%spinewidth\n":
                outfile.write("spinewidth="+str(inches)+"in,\n")
            elif line == "%bleedwidth\n":
#                 if settings["covertype"] == "hardcover":
                outfile.write("bleedwidth=.125in,\n")
#                 elif settings["covertype"] == "softcover":
#                     outfile.write("bleedwidth=0in,\n")
            elif "%spinetextheight" in line:
                spine_txt_ht = min(float(inches),0.375)
                inches_spine_text = str(0.85 * spine_txt_ht)
                spine_ht_command = r"\fontsize{"+inches_spine_text+"in}{"+inches_spine_text+"in}\selectfont"
                outfile.write(spine_ht_command)
            elif "%backtext" in line:
                if "backtext" in settings.keys():
                    outfile.write(settings["backtext"])
            elif "%sethebfont" in line:
                font = r"\setmainfont{"+settings["hebfont"]+r"}"
                outfile.write(font)
            elif "%setengfont" in line:
                engfont = r'\newfontfamily\englishfont{'+settings["engfont"]+r'}'
                outfile.write(engfont)
            else:
                outfile.write(line)
                if "\n" not in line:
                    outfile.write("\n")

In [105]:
def calc_spine_width(pages,settings):
    if settings["covertype"]=="softcover":
        return (pages / 444) + 0.06
    elif settings["covertype"]=="hardcover":
        with open('resources/spine_width.csv', encoding='utf-8') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            spine_width = 0
            for row in csv_reader:
                if int(row[0])<=pages<=int(row[1]):
                    return row[2]

In [106]:
def title_to_cover(heb_title):
    heb_title = heb_title[24:-1]
    heb_title_cover_list = heb_title.split()[::-1]
    title_for_cover_heb = ""
    for word in heb_title_cover_list:
        title_for_cover_heb += word + ' '
    title_for_cover_heb = title_for_cover_heb[0:-1]
    return title_for_cover_heb

In [107]:
#flips PDF for print on demand
def flip_PDF(inpfn):
    rotate = 180
    #ranges = [[int(y) for y in x.split('-')] for x in ranges]
    outname = inpfn.split(".pdf")
    outfn = outname[0]+".rotated."+outname[1]+"pdf"
#     print(outfn)
    #outfn = 'rotate.%s' % os.path.basename(inpfn)
    trailer = PdfReader(inpfn)
    pages = trailer.pages

    ranges = [[1, len(pages)]]

    for onerange in ranges:
        onerange = (onerange + onerange[-1:])[:2]
        for pagenum in range(onerange[0]-1, onerange[1]):
            pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or
                                         0) + rotate) % 360

    outdata = PdfWriter(outfn)
    outdata.trailer = trailer
    outdata.write()
    return 

In [108]:
def compile_latex(outputname):
    subprocess.run(['xelatex', '-interaction=nonstopmode', outputname], shell=True)
    subprocess.run(['xelatex', '-interaction=nonstopmode', outputname], shell=True)
    pdf_name = outputname.replace(".tex",".pdf")
    flip_PDF(pdf_name)
    pages = len(PdfReader(pdf_name).pages)
    if pages < 24 and book_settings["covertype"] == "hardcover":
        print("Hardcover books shorter than 24 pages cannot be printed on demand")
    elif pages >= 800:
        print("Warning: PDF output is over 800 pages, which is too long for print-on-demand.")
    return pages

In [131]:
template_lines = pullinput(inputpath)
# for line in template_lines:
#     print(line)
outputname = "output.tex"
with open('book_settings(4).json',encoding='utf=8') as json_file:
    book_settings = json.load(json_file)
title_heb = writeoutput(outputname,template_lines,book_settings)

https://raw.githubusercontent.com/Sefaria/Sefaria-Export/master/json/Jewish%20Thought/Acharonim/Maharal/Gevurot%20Hashem/Hebrew/Gevurot%20Hashem,%20with%20footnotes%20and%20annotations%20by%20Rabbi%20Yehoshua%20D.%20Hartman,%20Machon%20Yerushalyim,%202015-2020.json
https://raw.githubusercontent.com/Sefaria/Sefaria-Export/master/json/Jewish%20Thought/Acharonim/Maharal/Notes%20by%20Rabbi%20Yehoshua%20Hartman%20on%20Gevurot%20Hashem/Hebrew/Gevurot%20Hashem,%20with%20footnotes%20and%20annotations%20by%20Rabbi%20Yehoshua%20D.%20Hartman,%20Machon%20Yerushalyim,%202015-2020.json
A source selected has a non-commercial license.
Do not use the PDF version of this text commercially.


In [125]:
outputname = "output.tex"
pages = compile_latex(outputname)

In [79]:
covertemplate = pullinput(coverinpath)
make_cover(outputname,covertemplate,title_heb,book_settings,pages)
coveroutputname = outputname.replace(".tex","_cover.tex")
subprocess.run(['xelatex', '-interaction=nonstopmode', coveroutputname])
flip_PDF(coveroutputname.replace(".tex",".pdf"))