In [1]:
import sys
!{sys.executable} -m pip install pylatex

Collecting pylatex
  Downloading https://files.pythonhosted.org/packages/8a/76/015a1d785221d9b0d2ad80759d892a6d9d0a8a05daffc52202311ea3d652/PyLaTeX-1.4.1.tar.gz (84kB)
Collecting ordered-set (from pylatex)
  Downloading https://files.pythonhosted.org/packages/f5/ab/8252360bfe965bba31ec05112b3067bd129ce4800d89e0b85613bc6044f6/ordered-set-4.0.2.tar.gz
Building wheels for collected packages: pylatex, ordered-set
  Building wheel for pylatex (setup.py): started
  Building wheel for pylatex (setup.py): finished with status 'done'
  Created wheel for pylatex: filename=PyLaTeX-1.4.1-cp37-none-any.whl size=42829 sha256=c5f9b218136444d7f78848b537d394b47930d4f9495c22f1c67e2e5e3b290c8c
  Stored in directory: C:\Users\nkasimer\AppData\Local\pip\Cache\wheels\a2\25\3a\2cc0a6219d95ce34f1f7439a6427c62ca262ebaeb5969db89f
  Building wheel for ordered-set (setup.py): started
  Building wheel for ordered-set (setup.py): finished with status 'done'
  Created wheel for ordered-set: filename=ordered_set-4.0.

In [4]:
import json, urllib.request
from urllib.parse import urlparse, parse_qs
import os, platform, subprocess, csv

In [5]:
def pull_text(string_for_link):
    link = "https://raw.githubusercontent.com/Sefaria/Sefaria-Export/master/json/"+string_for_link+".json"
    print(link)
    with urllib.request.urlopen(link) as url:
        text_json = json.loads(url.read().decode())
    return text_json

In [6]:
def make_body(hebrew_text, english_text, settings):
    output = []
    chap_num = 1
    mishna_num = 1
    title = hebrew_text["heTitle"]
    title_command = r"\newcommand{\texttitle}{"+title+"}"
    maj_div = hebrew_text["sectionNames"][0]
    for perek in hebrew_text["text"]:
        if any(perek):
            if maj_div == "Daf":
                daf = ((chap_num+1)/2)
                if daf == round(daf):
                    output.append(r"\newchap{דף \hebrewnumeral{"+str(round(daf))+"}}")
                    #print("daf")
            else:
                output.append(r"\addchap{פרק \hebrewnumeral{"+str(chap_num)+"}}")
        if settings["levels"]>1:
            for mishna in perek:
                output.append(r"\addsec{משנה \hebrewnumeral{"+str(mishna_num)+"}}")
                if english_text != None:
                    english_mishna = english_text["text"][chap_num-1][mishna_num-1]
                else:
                    english_mishna = None
                output.append(make_section(mishna,english_mishna,settings,chap_num, mishna_num))
                mishna_num += 1
        elif settings["levels"]==1:
            perek_he = ""
            perek_en = ""
            for item in perek:
                #print(type(item))
                perek_he += item + "\n"+r"\par"
                if english_text != None:
                    perek_en += english_text["text"][chap_num-1][mishna_num-1]
                else:
                    perek_en = None
                mishna_num += 1
            output.append(make_section(perek_he, perek_en, settings, chap_num, mishna_num))
            #print("comment")
        chap_num += 1
        mishna_num = 1
    return title_command, output

In [7]:
def make_section(hebrew_text, english, settings, chap_num, mishna_num):
    if english != None:
        english = english.replace("[","{[")
        english = english.replace("]","]}")
        output = r"\textblock{"+hebrew_text+"}{"+english+"}"
    elif settings["layout"] == "twocol":
        output= r"\twocol{"+hebrew_text+"}"
    else:
        output= r"\textblock{"+hebrew_text+"}"
    with open('resources/html_tags_to_tex.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for row in csv_reader:
            if row[0] in output:
                output = output.replace(row[0],row[1])
    return output

In [8]:
def set_format(template_lines,settings):
    output = []
    for line in template_lines:
        if line[0:-1] in settings.keys():
            setting_output = line[0:-1] + "="+settings[line[0:-1]]+",\n"
            output.append(setting_output)
        elif line[0:-1] == "%setfontsize":
            fontsize = settings["fontsize"]
            skip = fontsize * settings["spacing"]
            fontsizestr = r"\fontsize{"+str(fontsize)+r"pt}{"+str(round(skip,1))+r"pt} \selectfont"
            output.append(fontsizestr)
        elif line[0:-1] == "%sethebfont":
            if settings["hebboldfont"] == None:
                font = r"\setmainfont{"+settings["hebfont"]+r"}"
            else:
                font = r"\setmainfont[BoldFont = {"+settings["hebboldfont"]+r'}]{'+settings["hebfont"]+r"}"
            output.append(font)
        elif line[0:-1] == "%setengfont" and settings["engfont"] != None:
            engfont = r'\newfontfamily\englishfont{'+settings["engfont"]+r'}'
            output.append(engfont)
        elif line[0:-1] == "%setparskip" and settings["parskip"] != None:
            parskip = r'\setlength{\parskip}{'+settings["parskip"]+'}'
            output.append(parskip)
        elif line[0:-1] == "%pagenumber":
            if settings["pagenumloc"] == "topouter":
                pagenum = r"\fancyhead[LO,RE]{num}"
            elif settings["pagenumloc"] == "bottommiddle":
                pagenum = r"\fancyfoot[C]{num}"
            if settings["pagenumheb"] == True:
                pagenum = pagenum.replace("num",r"\hebrewnumeral{\thepage}")
            else:
                pagenum = pagenum.replace("num",r"\thepage")
            output.append(pagenum)
        elif line[0:-1] == "%header":
            if settings["headpos"] == "center":
                odd_header = r"\fancyhead[CO]{"
                even_header = r"\fancyhead[CE]{"
            elif settings["headpos"] == "inner":
                odd_header = r"\fancyhead[RO]{"
                even_header = r"\fancyhead[LE]{"
            if settings["evenhead"] == "title":
                even_header += r"\texttitle"
            elif settings["evenhead"] == "chapter":
                even_header += r"\chapname"
            elif settings["evenhead"] == "titlechapter":
                even_header += r"\texttitle \space\textendash\space \chapname"
            if settings["oddhead"] == "title":
                odd_header += r"\texttitle"
            elif settings["oddhead"] == "chapter":
                odd_header += r"\chapname"
            elif settings["oddhead"] == "titlechapter":
                odd_header += r"\texttitle \space\textendash\space \chapname"
            odd_header += "}"
            even_header += "}"
            output.append(odd_header)
            output.append(even_header)
        elif line[0:-1] == "%chapfontsize":
            if "chapfontsize" in settings.keys():
                headerfontcommand = r"\fontsize{"+settings["chapfontsize"]+"}{"+settings["chapfontsize"]+r"}\selectfont"
            else:
                headerfontcommand = r"\LARGE"
            output.append(headerfontcommand)
        else:
            output.append(line)
    return output

In [9]:
output_settings = {
    "text": "Talmud/Bavli/Commentary/Ramban/Seder%20Tahorot/Chiddushei%20Ramban%20on%20Niddah/Hebrew/Chiddushei%20HaRamban%2C%20Jerusalem%201928-29",
    "translation":None,
    "paperheight" : "11in",
    "paperwidth" : "8.5in",
    "hebfont":"Frank Ruehl CLM",
    "hebboldfont":None,
    "engfont":"EB Garamond",
    "top" : "0.5in",
    "bottom" :"0.5in",
    "inner" : "0.7in",
    "outer" : "0.5in",
    "fontsize":10.5,
    "spacing":2,
    "english":False,
    "newpage":False,
    "levels":1,
    "layout":"twocol",
    "parskip":"8pt",
    "pagenumloc":"topouter",
    "pagenumheb":False,
    "headpos":"center",
    "evenhead":"title",
    "oddhead":"chapter",
    "chapfontsize":"16pt"
}

In [24]:
def get_bib_info(json):
    source_data = {}
    source_data["source"] = json["versionSource"]
    source_data["license"] = json["license"]
    source_data["version"] = json["versionTitle"]
    return source_data

def print_source_data(source_list):
    output = []
    output.append(r"\begin{itemize}")
    for source in source_list:
        if "NC" in source["license"] or "Copyright" in source["license"]:
            return ["NC",source["version"]]
        versiontitle = source["version"].replace("-",r"\textendash ")
        output.append(r"\item "+versiontitle)
        if len(source_list) > 1:
            output.append(r"\begin{itemize}")
        output.append(r"\item License: "+source["license"])
        output.append(r"\item Source: \url{"+source["source"]+"}")
        if len(source_list) > 1:
            output.append(r"\end{itemize}")
    output.append(r"\end{itemize}")
    return output

In [25]:
inputpath = os.path.join("resources","input.tex")
def pullinput(inputpath):
    with open(inputpath, 'r', encoding='utf-8') as infile:
        template_lines = list(infile.readlines())
    return template_lines

def writeoutput(outputpath, template, formatting):
    sources = []
    template_with_settings = set_format(template,formatting)
    sefaria_json = pull_text(formatting["text"])
    sources.append(get_bib_info(sefaria_json))
    if formatting["translation"]!= None:
        english_json = pull_text(formatting["translation"])
        sources.append(get_bib_info(english_json))
        sefaria_result = make_body(sefaria_json,english_json,formatting)
    else:
        sefaria_result = make_body(sefaria_json, None, formatting)
    body = sefaria_result[1]
    title_command = sefaria_result[0]
    source_listing = print_source_data(sources)
    if source_listing[0] == "NC":
        print(source_listing[1] + " has a license which does not allow creation of this text.")
        return
    with open(outputpath, 'w', encoding='utf-8') as outfile:
        for line in template_with_settings:
            if line == "%title_here\n":
                outfile.write(title_command)
            elif line == "%license info\n":
                for item in source_listing:
                    outfile.write(item)
                    outfile.write("\n")
            elif line == "%body_here\n":
                for newline in body:
                    outfile.write(newline)
                    outfile.write("\n")
            else:
                outfile.write(line)
                if "\n" not in line:
                    outfile.write("\n")

In [26]:
template_lines = pullinput(inputpath)
outputname = "output.tex"
writeoutput(outputname,template_lines,output_settings)

https://raw.githubusercontent.com/Sefaria/Sefaria-Export/master/json/Talmud/Bavli/Commentary/Ramban/Seder%20Tahorot/Chiddushei%20Ramban%20on%20Niddah/Hebrew/Chiddushei%20HaRamban%2C%20Jerusalem%201928-29.json


In [27]:
subprocess.run(['xelatex', '-interaction=nonstopmode', outputname])

CompletedProcess(args=['xelatex', '-interaction=nonstopmode', 'output.tex'], returncode=1)