In [1]:
import requests
import pandas as pd

URI = "https://github.com/ViktorQvarfordt/unicode-latex/blob/master/latex-unicode.json?raw=true"

response = requests.get(URI)
if response.status_code == 200:
    data = response.json()
else:
    raise Exception(f"Failed to fetch data: {response.status_code}")
data

{'\\mathexclam': '!',
 '\\mathoctothorpe': '#',
 '\\mathdollar': '$',
 '\\mathpercent': '%',
 '\\mathampersand': '&',
 '\\lparen': '(',
 '\\rparen': ')',
 '\\mathplus': '+',
 '\\mathcomma': ',',
 '\\mathperiod': '.',
 '\\mathslash': '/',
 '\\mathcolon': ':',
 '\\mathsemicolon': ';',
 '\\less': '<',
 '\\equal': '=',
 '\\greater': '>',
 '\\mathquestion': '?',
 '\\mathatsign': '@',
 '\\lbrack': '[',
 '\\backslash': '\\',
 '\\rbrack': ']',
 '\\lbrace': '{',
 '\\vert': '|',
 '\\rbrace': '}',
 '\\mathsterling': '£',
 '\\mathyen': '¥',
 '\\mathsection': '§',
 '\\neg': '¬',
 '\\pm': '±',
 '\\mathparagraph': '¶',
 '\\cdotp': '·',
 '\\times': '×',
 '\\matheth': 'ð',
 '\\div': '÷',
 '\\Zbar': 'Ƶ',
 '\\Alpha': 'Α',
 '\\Beta': 'Β',
 '\\Gamma': 'Γ',
 '\\Delta': 'Δ',
 '\\Epsilon': 'Ε',
 '\\Zeta': 'Ζ',
 '\\Eta': 'Η',
 '\\Theta': 'Θ',
 '\\Iota': 'Ι',
 '\\Kappa': 'Κ',
 '\\Lambda': 'Λ',
 '\\Mu': 'Μ',
 '\\Nu': 'Ν',
 '\\Xi': 'Ξ',
 '\\Omicron': 'Ο',
 '\\Pi': 'Π',
 '\\Rho': 'Ρ',
 '\\Sigma': 'Σ',
 '\\Tau': 'Τ

In [10]:
# Convert the JSON data to a pandas DataFrame
df = pd.DataFrame({"latex": data.keys(), "unicode": data.values()})
df

Unnamed: 0,latex,unicode
0,\mathexclam,!
1,\mathoctothorpe,#
2,\mathdollar,$
3,\mathpercent,%
4,\mathampersand,&
...,...,...
2512,^R,ᴿ
2513,^T,ᵀ
2514,^U,ᵁ
2515,^V,ⱽ


In [11]:
#first, remove all backslashes
df["latex"] = df["latex"].str.replace("\\", "", regex=False)
df.head(20)

Unnamed: 0,latex,unicode
0,mathexclam,!
1,mathoctothorpe,#
2,mathdollar,$
3,mathpercent,%
4,mathampersand,&
5,lparen,(
6,rparen,)
7,mathplus,+
8,mathcomma,","
9,mathperiod,.


In [12]:
#leave only non-ascii characters
df = df.loc[df["unicode"].str.contains(r"[^\x00-\x7F]")]
df

Unnamed: 0,latex,unicode
24,mathsterling,£
25,mathyen,¥
26,mathsection,§
27,neg,¬
28,pm,±
...,...,...
2512,^R,ᴿ
2513,^T,ᵀ
2514,^U,ᵁ
2515,^V,ⱽ


In [13]:
#converts braces to underscores
df["latex"] = df["latex"].str.replace("{", "_", regex=False)
df["latex"] = df["latex"].str.replace("}", "", regex=False)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["latex"] = df["latex"].str.replace("{", "_", regex=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["latex"] = df["latex"].str.replace("}", "", regex=False)


Unnamed: 0,latex,unicode
24,mathsterling,£
25,mathyen,¥
26,mathsection,§
27,neg,¬
28,pm,±
...,...,...
2512,^R,ᴿ
2513,^T,ᵀ
2514,^U,ᵁ
2515,^V,ⱽ


In [14]:
df[df["latex"].str.contains("_")].head(20)

Unnamed: 0,latex,unicode
140,mathbb_C,ℂ
142,mathcal_g,ℊ
143,mathcal_H,ℋ
144,mathfrak_H,ℌ
145,mathbb_H,ℍ
148,mathcal_I,ℐ
150,mathcal_L,ℒ
152,mathbb_N,ℕ
154,mathbb_P,ℙ
155,mathbb_Q,ℚ


In [16]:
df["latex"] = df["latex"].str.replace("math", "", regex=False)

24      sterling
25           yen
26       section
27           neg
28            pm
          ...   
2512          ^R
2513          ^T
2514          ^U
2515          ^V
2516          ^W
Name: latex, Length: 2493, dtype: object

In [19]:
# Are there redundant entries?
df["latex"].is_unique

True

In [20]:
df.to_csv("math_unicode.csv", index=False, header=False)