Skip to content

Commit

Permalink
Merge pull request #223 from t-bltg/accents
Browse files Browse the repository at this point in the history
Support diacritics (accents, ...)
  • Loading branch information
korsbo authored Jul 22, 2022
2 parents 8b860d3 + bf1f534 commit 0541bbf
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 26 deletions.
24 changes: 24 additions & 0 deletions src/latexify_function.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
@doc doc"""
latexify(args...; kwargs...)
Latexify a string, an expression, an array or other complex types.
```julia-repl
julia> latexify("x+y/(b-2)^2")
L"$x + \frac{y}{\left( b - 2 \right)^{2}}$"
julia> latexify(:(x/(y+x)^2))
L"$\frac{x}{\left( y + x \right)^{2}}$"
julia> latexify(["x/y" 3//7 2+3im; 1 :P_x :(gamma(3))])
L"\begin{equation}
\left[
\begin{array}{ccc}
\frac{x}{y} & \frac{3}{7} & 2+3\mathit{i} \\
1 & P_{x} & \Gamma\left( 3 \right) \\
\end{array}
\right]
\end{equation}
"
```
"""
function latexify(args...; kwargs...)
kwargs = merge(default_kwargs, kwargs)
result = process_latexify(args...; kwargs...)
Expand Down
19 changes: 13 additions & 6 deletions src/latexraw.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ latexraw(args...; kwargs...) = process_latexify(args...; kwargs..., env=:raw)

function _latexraw(inputex::Expr; convert_unicode=true, kwargs...)
## Pass all arrays or matrices in the expr to latexarray
inputex = postwalk(x -> x isa Expr && x.head in [:hcat, :vcat, :vect, :typed_vcat, :typed_hcat] ?
inputex = postwalk(x -> Meta.isexpr(x, [:hcat, :vcat, :vect, :typed_vcat, :typed_hcat]) ?
latexarray(expr_to_array(x); kwargs...)
: x,
inputex)
Expand All @@ -68,12 +68,12 @@ function _latexraw(inputex::Expr; convert_unicode=true, kwargs...)
function recurseexp!(ex)
prevOp = Vector{Symbol}(undef, length(ex.args))
fill!(prevOp, :none)
if ex.head==:call && ex.args[1] in (:sum, :prod) && ex.args[2] isa Expr && ex.args[2].head == :generator
if Meta.isexpr(ex, :call) && ex.args[1] in (:sum, :prod) && Meta.isexpr(ex.args[2], :generator)
op = ex.args[1]
term = latexraw(ex.args[2].args[1])
gen = ex.args[2].args[2]
itervar = latexraw(gen.args[1])
if gen.args[2] isa Expr && gen.args[2].head == :call && gen.args[2].args[1] == :(:)
if Meta.isexpr(gen.args[2], :call) && gen.args[2].args[1] == :(:)
# sum(x_n for n in n_0:N) => \sum_{n=n_0}^{N} x_n
lower = latexraw(gen.args[2].args[2])
upper = latexraw(gen.args[2].args[end])
Expand Down Expand Up @@ -111,9 +111,11 @@ function _latexraw(args...; kwargs...)
end
_latexraw(arr::Union{AbstractArray, Tuple}; kwargs...) = _latexarray(arr; kwargs...)
_latexraw(i::Nothing; kwargs...) = ""
_latexraw(i::SubString; kwargs...) = latexraw(Meta.parse(i); kwargs...)
_latexraw(i::SubString; parse=true, kwargs...) = latexraw(parse ? Meta.parse(i) : i; kwargs...)
_latexraw(i::SubString{LaTeXStrings.LaTeXString}; kwargs...) = i
_latexraw(i::Rational; kwargs...) = i.den == 1 ? latexraw(i.num; kwargs...) : latexraw(:($(i.num)/$(i.den)); kwargs...)
_latexraw(i::QuoteNode; kwargs...) = _latexraw(i.value)

function _latexraw(z::Complex; kwargs...)
if iszero(z.re)
isone(z.im) && return LaTeXString(get(kwargs, :imaginary_unit, "\\mathit{i}"))
Expand Down Expand Up @@ -142,15 +144,20 @@ function _latexraw(i::Symbol; convert_unicode=true, snakecase=false, safescripts
return LaTeXString(str)
end

function _latexraw(i::String; kwargs...)
_latexraw(i::String; parse=true, kwargs...) = _latexraw(Val(parse), i; kwargs...)

_latexraw(::Val{false}, i::String; convert_unicode=true, kwargs...) =
LaTeXString(convert_unicode ? unicode2latex(i) : i)

function _latexraw(::Val{true}, i::String; kwargs...)
try
ex = Meta.parse(i)
return latexraw(ex; kwargs...)
catch ParseError
error("""
in Latexify.jl:
You are trying to create latex-maths from a `String` that cannot be parsed as
an expression.
an expression: `$i`.
`latexify` will, by default, try to parse any string inputs into expressions
and this parsing has just failed.
Expand Down
2 changes: 1 addition & 1 deletion src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ end

function _executable(expr)
return postwalk(expr) do ex
if ex isa Expr && ex.head == :$
if Meta.isexpr(ex, :$)
return ex.args[1]
end
return ex
Expand Down
96 changes: 83 additions & 13 deletions src/unicode2latex.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,61 @@
const unicodedict = Dict{Char, String}(
import Base.Unicode

"""
latex_diacritics(c::Char)
- generate latex escape codes for diacritics of the latin alphabet (upper and lower case), see https://en.wikibooks.org/wiki/LaTeX/Special_Characters#Escaped_codes
- also generate a subset of the following sequence, when the single char normalization is available:
- 'à' => "\\`{a}" # grave
- 'á' => "\\'{a}" # acute
- 'ä' => "\\"{a}" # umlaut (trema, dieresis)
- 'a̋' => "\\H{a}" # hungarian umlaut (double acute)
- 'ã' => "\\~{a}" # tilde
- 'â' => "\\^{a}" # circumflex
- 'a̧' => "\\c{a}" # cedilla
- 'ą' => "\\k{a}" # ogonek
- 'ā' => "\\={a}" # macron (bar above)
- 'a̱' => "\\b{a}" # bar under
- 'ȧ' => "\\.{a}" # dot above
- 'ạ' => "\\d{a}" # dot under
- 'å' => "\\r{a}" # ring
- 'ă' => "\\u{a}" # breve
- 'ǎ' => "\\v{a}" # caron (háček)
"""
function latex_diacritics(c::Char)
c = lowercase(c)
out = []
for p in (
'`' => 0x300, # latex sequence \`{c} maps to 'c' * Char(0x300) := "c̀"
"'" => 0x301,
'^' => 0x302,
'~' => 0x303,
'=' => 0x304,
'u' => 0x306,
'.' => 0x307,
'"' => 0x308,
'r' => 0x30a,
'H' => 0x30b,
'v' => 0x30c,
'd' => 0x323,
'c' => 0x327,
'k' => 0x328,
'b' => 0x331,
)
latex_escape, mark = p.first, Char(p.second)
lower, upper = c * mark, uppercase(c) * mark
# e.g. ('y' * Char(0x30a) == "ẙ") != (Char(0x1e99) == 'ẙ'), although they look the same
for p in (lower => "\\textrm{\\$latex_escape{$c}}", upper => "\\textrm{\\$latex_escape{$(uppercase(c))}}")
push!(out, p)
alias = length(p.first) == 1 ? p.first : Unicode.normalize(p.first)
if alias != p.first
push!(out, (length(alias) == 1 ? first(alias) : alias) => p.second)
end
end
end
out
end

const unicodedict = Dict{Union{Char,String}, String}(
'𝑅' => raw"\mathit{R}",
'' => raw"\blockrighthalf",
'' => raw"\varheartsuit",
Expand Down Expand Up @@ -2450,26 +2507,39 @@ const unicodedict = Dict{Char, String}(
'Χ' => raw"\Chi",
'' => raw"\neovsearrow",
'' => raw"\bullet",
)
(latex_diacritics.('a':'z')...)...,
)

function unicode2latex(str::String; safescripts=false)
isascii(str) && return str
str_array = [get(unicodedict, char, char) for char in str]
str_length = length(str_array)

for (i, char) in enumerate(str)
if str_array[i] isa String
if i < str_length && str_array[i+1] isa Char && (isletter(str_array[i+1]) || isdigit(str_array[i+1]))
str_array[i] = "{$(str_array[i])}"
c_or_s = sizehint!(Union{Char,String}[], length(str))

it = Iterators.Stateful(str)
while !isempty(it)
c = popfirst!(it)
push!(
c_or_s, # see en.wikipedia.org/wiki/Combining_character
if Unicode.category_code(something(peek(it), '0')) == Unicode.UTF8PROC_CATEGORY_MN
c * popfirst!(it)
else
c
end
end
)
end
str_array = map(k -> get(unicodedict, k, k), c_or_s)

str = join(str_array)
str = merge_subscripts(str; safescripts=safescripts)
str = merge_superscripts(str; safescripts=safescripts)
it = Iterators.Stateful(str_array)
while !isempty(it)
if (x = popfirst!(it)) isa String
if (xx = peek(it)) isa Char && (isletter(xx) || isdigit(xx))
str_array[it.taken] = "{$x}"
end
end
end

return str
str = merge_subscripts(join(str_array); safescripts=safescripts)
return merge_superscripts(str; safescripts=safescripts)
end

"""
Expand Down
8 changes: 4 additions & 4 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,11 @@ end
safereduce(f, args) = length(args) == 1 ? f(args[1]) : reduce(f, args)

function expr_to_array(ex)
ex.head == :typed_vcat && (ex = Expr(:vcat, ex.args[2:end]...))
ex.head == :typed_hcat && (ex = Expr(:hcat, ex.args[2:end]...))
ex.head == :ref && (ex = Expr(:vect, ex.args[2:end]...))
ex.head === :typed_vcat && (ex = Expr(:vcat, ex.args[2:end]...))
ex.head === :typed_hcat && (ex = Expr(:hcat, ex.args[2:end]...))
ex.head === :ref && (ex = Expr(:vect, ex.args[2:end]...))
## If it is a matrix
if ex.args[1] isa Expr && ex.args[1].head == :row
if Meta.isexpr(ex.args[1], :row)
return eval(ex.head)(map(y -> permutedims(y.args), ex.args)...)
else
if ex.head == :hcat
Expand Down
2 changes: 1 addition & 1 deletion test/latexify_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ reset_default()
@test latexify("x * y") ==
raw"$x \cdot y$"


@test latexify("Plots.jl") isa LaTeXString
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ using Test
@testset "latextabular tests" begin include("latextabular_test.jl") end
@testset "mdtable tests" begin include("mdtable_test.jl") end
@testset "DataFrame Plugin" begin include("plugins/DataFrames.jl") end
@testset "unocode2latex" begin include("unicode2latex.jl") end
@testset "unicode2latex" begin include("unicode2latex.jl") end
@testset "cdot test" begin include("cdot_test.jl") end
@testset "numberformatters" begin include("numberformatters_test.jl") end
@testset "utils test" begin include("utils_test.jl") end
24 changes: 24 additions & 0 deletions test/unicode2latex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,27 @@ raw"\begin{equation}
", "\r\n"=>"\n")

@test latexify("αaβ") == raw"${\alpha}a\beta$"

@test latexify("αaβ").s == raw"${\alpha}a\beta$"

@test latexify("ÀéÜ"; parse=false).s == raw"$\textrm{\`{A}}\textrm{\'{e}}\textrm{\\\"{U}}$"

@test latexify("w̋Ṽî"; parse=false).s == raw"$\textrm{\H{w}}\textrm{\~{V}}\textrm{\^{i}}$"

@test latexify("çĘf̄"; parse=false).s == raw"$\textrm{\c{c}}\textrm{\k{E}}\textrm{\={f}}$"

@test latexify("ṞȯX̣"; parse=false).s == raw"$\textrm{\b{R}}\textrm{\.{o}}\textrm{\d{X}}$"

@test latexify("ẙĞž"; parse=false).s == raw"$\textrm{\r{y}}\textrm{\u{G}}\textrm{\v{z}}$"

s = 'y' * Char(0x30a) * 'x' * Char(0x302) * 'a' * Char(0x331)
@test latexify(s; parse=false).s == raw"$\textrm{\r{y}}\textrm{\^{x}}\textrm{\b{a}}$"

s = 'Y' * Char(0x30a) * 'X' * Char(0x302) * 'A' * Char(0x331)
@test latexify(s; parse=false).s == raw"$\textrm{\r{Y}}\textrm{\^{X}}\textrm{\b{A}}$"

s = 'i' * Char(0x308) * 'z' * Char(0x304) * 'e' * Char(0x306)
@test latexify(s; parse=false).s == raw"$\textrm{\\\"{i}}\textrm{\={z}}\textrm{\u{e}}$"

s = 'I' * Char(0x308) * 'Z' * Char(0x304) * 'E' * Char(0x306)
@test latexify(s; parse=false).s == raw"$\textrm{\\\"{I}}\textrm{\={Z}}\textrm{\u{E}}$"

0 comments on commit 0541bbf

Please sign in to comment.