From f002a65791cc54bade6d473a94fe03edc3e55745 Mon Sep 17 00:00:00 2001
From: Lukas Blecher <luk.blecher@gmail.com>
Date: Wed, 20 Apr 2022 18:10:07 +0200
Subject: [PATCH 1/3] add api

---
 pix2tex/api/app.py       | 44 ++++++++++++++++++++++++++++++++++++++++
 pix2tex/api/run.py       | 21 +++++++++++++++++++
 pix2tex/api/streamlit.py | 33 ++++++++++++++++++++++++++++++
 setup.py                 | 21 +++++++++++++------
 4 files changed, 113 insertions(+), 6 deletions(-)
 create mode 100644 pix2tex/api/app.py
 create mode 100644 pix2tex/api/run.py
 create mode 100644 pix2tex/api/streamlit.py

diff --git a/pix2tex/api/app.py b/pix2tex/api/app.py
new file mode 100644
index 0000000..b79941f
--- /dev/null
+++ b/pix2tex/api/app.py
@@ -0,0 +1,44 @@
+from http import HTTPStatus
+from fastapi import FastAPI, File, UploadFile
+from PIL import Image
+from io import BytesIO
+from pix2tex.cli import initialize, call_model
+
+model = None
+app = FastAPI(title='pix2tex API')
+
+
+def read_imagefile(file) -> Image.Image:
+    image = Image.open(BytesIO(file))
+    return image
+
+
+@app.on_event('startup')
+async def load_model():
+    global model
+    if model is None:
+        model = initialize()
+
+
+@app.get('/')
+def root():
+    '''Health check.'''
+    response = {
+        'message': HTTPStatus.OK.phrase,
+        'status-code': HTTPStatus.OK,
+        'data': {},
+    }
+    return response
+
+
+@app.post('/predict/')
+async def predict(file: UploadFile = File(...)):
+    global model
+    image = Image.open(file.file)
+    pred = call_model(*model, img=image)
+    response = {
+        'message': HTTPStatus.OK.phrase,
+        'status-code': HTTPStatus.OK,
+        'data': pred,
+    }
+    return response
diff --git a/pix2tex/api/run.py b/pix2tex/api/run.py
new file mode 100644
index 0000000..e265b2f
--- /dev/null
+++ b/pix2tex/api/run.py
@@ -0,0 +1,21 @@
+from multiprocessing import Process
+import subprocess
+import os
+
+
+def start_api(path='.'):
+    subprocess.call(['uvicorn', 'app:app'], cwd=path)
+
+
+def start_frontend(path='.'):
+    subprocess.call(['streamlit', 'run', 'streamlit.py'], cwd=path)
+
+
+if __name__ == '__main__':
+    path = os.path.realpath(os.path.dirname(__file__))
+    api = Process(target=start_api, kwargs={'path': path})
+    api.start()
+    frontend = Process(target=start_frontend, kwargs={'path': path})
+    frontend.start()
+    api.join()
+    frontend.join()
diff --git a/pix2tex/api/streamlit.py b/pix2tex/api/streamlit.py
new file mode 100644
index 0000000..44735d6
--- /dev/null
+++ b/pix2tex/api/streamlit.py
@@ -0,0 +1,33 @@
+from msilib.schema import Icon
+import requests
+from PIL import Image
+import streamlit
+
+if __name__ == '__main__':
+    streamlit.set_page_config(page_title='LaTeX-OCR')
+    streamlit.title('LaTeX OCR')
+    streamlit.markdown('Convert images of equations to corresponding LaTeX code.\n\nThis is based on the `pix2tex` module. Check it out [![github](https://img.shields.io/badge/LaTeX--OCR-visit-a?style=social&logo=github)](https://github.com/lukas-blecher/LaTeX-OCR)')
+
+    uploaded_file = streamlit.file_uploader(
+        'Upload an image an equation',
+        type=['png', 'jpg'],
+    )
+
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file)
+        streamlit.image(image)
+    else:
+        streamlit.text('\n')
+
+    if streamlit.button('Convert'):
+        if uploaded_file is not None and image is not None:
+            with streamlit.spinner('Computing'):
+                response = requests.post('http://127.0.0.1:8000/predict/', files={'file': uploaded_file.getvalue()})
+            if response.ok:
+                latex_code = response.json()['data']
+                streamlit.code(latex_code, language='latex')
+                streamlit.markdown(f'$\\displaystyle {latex_code}$')
+            else:
+                streamlit.error(response.text)
+        else:
+            streamlit.error('Please upload an image.')
diff --git a/setup.py b/setup.py
index 689f857..4fb6213 100644
--- a/setup.py
+++ b/setup.py
@@ -7,6 +7,18 @@
 this_directory = Path(__file__).parent
 long_description = (this_directory / "README.md").read_text()
 
+gui = [
+    "PyQt5",
+    "PyQtWebEngine",
+    "pynput",
+    "screeninfo",
+]
+api = [
+    "streamlit>=1.8.1",
+    "fastapi>=0.75.2",
+    "uvicorn[standard]"
+]
+
 setuptools.setup(
     name='pix2tex',
     version='0.0.12',
@@ -53,12 +65,9 @@
         "python-Levenshtein>=0.12.2",
     ],
     extras_require={
-        "gui":  [
-            "PyQt5",
-            "PyQtWebEngine",
-            "pynput",
-            "screeninfo",
-        ]
+        "all": gui+api,
+        "gui": gui,
+        "api": api
     },
     entry_points={
         'console_scripts': [

From 5c63c3b68b0b1d560d70a76b013b5817cafe1cb1 Mon Sep 17 00:00:00 2001
From: Lukas Blecher <luk.blecher@gmail.com>
Date: Thu, 21 Apr 2022 11:15:21 +0200
Subject: [PATCH 2/3] update

---
 setup.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/setup.py b/setup.py
index 8f37a8a..41a5d79 100644
--- a/setup.py
+++ b/setup.py
@@ -8,20 +8,21 @@
 long_description = (this_directory / 'README.md').read_text()
 
 gui = [
-    "PyQt5",
-    "PyQtWebEngine",
-    "pynput",
-    "screeninfo",
+    'PyQt5',
+    'PyQtWebEngine',
+    'pynput',
+    'screeninfo',
 ]
 api = [
-    "streamlit>=1.8.1",
-    "fastapi>=0.75.2",
-    "uvicorn[standard]"
+    'streamlit>=1.8.1',
+    'fastapi>=0.75.2',
+    'uvicorn[standard]',
+    'python-multipart'
 ]
 
 setuptools.setup(
     name='pix2tex',
-    version='0.0.14',
+    version='0.0.15',
     description='pix2tex: Using a ViT to convert images of equations into LaTeX code.',
     long_description=long_description,
     long_description_content_type='text/markdown',
@@ -64,9 +65,9 @@
         'imagesize>=1.2.0',
     ],
     extras_require={
-        "all": gui+api,
-        "gui": gui,
-        "api": api
+        'all': gui+api,
+        'gui': gui,
+        'api': api
     },
     entry_points={
         'console_scripts': [

From c160ea0a126f44b53e009d2bc9db7c6b4c48ce1b Mon Sep 17 00:00:00 2001
From: Lukas Blecher <luk.blecher@gmail.com>
Date: Fri, 22 Apr 2022 17:07:50 +0200
Subject: [PATCH 3/3] better inference with model class

---
 notebooks/LaTeX_OCR_test.ipynb |   4 +-
 pix2tex/api/app.py             |  25 +++--
 pix2tex/api/streamlit.py       |   2 +-
 pix2tex/cli.py                 | 165 ++++++++++++++++-----------------
 pix2tex/gui.py                 |  26 ++----
 5 files changed, 109 insertions(+), 113 deletions(-)

diff --git a/notebooks/LaTeX_OCR_test.ipynb b/notebooks/LaTeX_OCR_test.ipynb
index 52846e6..a1c800f 100644
--- a/notebooks/LaTeX_OCR_test.ipynb
+++ b/notebooks/LaTeX_OCR_test.ipynb
@@ -61,7 +61,7 @@
         "\n",
         "from pix2tex import cli as pix2tex\n",
         "from PIL import Image\n",
-        "args = pix2tex.initialize()\n",
+        "model = pix2tex.LatexOCR()\n",
         "\n",
         "from IPython.display import HTML, Math\n",
         "display(HTML(\"<script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.3/\"\n",
@@ -76,7 +76,7 @@
         "predictions = []\n",
         "for name, f in imgs:\n",
         "    img = Image.open(f)\n",
-        "    math = pix2tex.call_model(*args, img)\n",
+        "    math = model(img)\n",
         "    print(math)\n",
         "    predictions.append('\\\\mathrm{%s} & \\\\displaystyle{%s}'%(name, math))\n",
         "Math(table%'\\\\\\\\'.join(predictions))"
diff --git a/pix2tex/api/app.py b/pix2tex/api/app.py
index b79941f..89b4f62 100644
--- a/pix2tex/api/app.py
+++ b/pix2tex/api/app.py
@@ -1,8 +1,11 @@
+# Adapted from https://github.com/kingyiusuen/image-to-latex/blob/main/api/app.py
+
+from ctypes import resize
 from http import HTTPStatus
-from fastapi import FastAPI, File, UploadFile
+from fastapi import FastAPI, File, UploadFile, Form
 from PIL import Image
 from io import BytesIO
-from pix2tex.cli import initialize, call_model
+from pix2tex.cli import LatexOCR
 
 model = None
 app = FastAPI(title='pix2tex API')
@@ -17,7 +20,7 @@ def read_imagefile(file) -> Image.Image:
 async def load_model():
     global model
     if model is None:
-        model = initialize()
+        model = LatexOCR()
 
 
 @app.get('/')
@@ -35,10 +38,12 @@ def root():
 async def predict(file: UploadFile = File(...)):
     global model
     image = Image.open(file.file)
-    pred = call_model(*model, img=image)
-    response = {
-        'message': HTTPStatus.OK.phrase,
-        'status-code': HTTPStatus.OK,
-        'data': pred,
-    }
-    return response
+    return model(image)
+
+
+@app.post('/bytes/')
+async def predict_from_bytes(file: bytes = File(...)): #, size: str = Form(...)
+    global model
+    #size = tuple(int(a) for a in size.split(','))
+    image = Image.open(BytesIO(file))
+    return model(image, resize=False)
diff --git a/pix2tex/api/streamlit.py b/pix2tex/api/streamlit.py
index 44735d6..41131e7 100644
--- a/pix2tex/api/streamlit.py
+++ b/pix2tex/api/streamlit.py
@@ -24,7 +24,7 @@
             with streamlit.spinner('Computing'):
                 response = requests.post('http://127.0.0.1:8000/predict/', files={'file': uploaded_file.getvalue()})
             if response.ok:
-                latex_code = response.json()['data']
+                latex_code = response.json()
                 streamlit.code(latex_code, language='latex')
                 streamlit.markdown(f'$\\displaystyle {latex_code}$')
             else:
diff --git a/pix2tex/cli.py b/pix2tex/cli.py
index c89e0bb..e43e76a 100644
--- a/pix2tex/cli.py
+++ b/pix2tex/cli.py
@@ -21,8 +21,6 @@
 from pix2tex.utils import *
 from pix2tex.model.checkpoints.get_latest_checkpoint import download_checkpoints
 
-last_pic = None
-
 
 def minmax_size(img, max_dimensions=None, min_dimensions=None):
     if max_dimensions is not None:
@@ -40,79 +38,77 @@ def minmax_size(img, max_dimensions=None, min_dimensions=None):
     return img
 
 
-@in_model_path()
-def initialize(arguments=None):
-    if arguments is None:
-        arguments = Munch({'config': 'settings/config.yaml', 'checkpoint': 'checkpoints/weights.pth', 'no_cuda': True, 'no_resize': False})
-    logging.getLogger().setLevel(logging.FATAL)
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-    with open(arguments.config, 'r') as f:
-        params = yaml.load(f, Loader=yaml.FullLoader)
-    args = parse_args(Munch(params))
-    args.update(**vars(arguments))
-    args.wandb = False
-    args.device = 'cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu'
-    if not os.path.exists(args.checkpoint):
-        download_checkpoints()
-    model = get_model(args)
-    model.load_state_dict(torch.load(args.checkpoint, map_location=args.device))
-
-    if 'image_resizer.pth' in os.listdir(os.path.dirname(args.checkpoint)) and not arguments.no_resize:
-        image_resizer = ResNetV2(layers=[2, 3, 3], num_classes=max(args.max_dimensions)//32, global_pool='avg', in_chans=1, drop_rate=.05,
-                                 preact=True, stem_type='same', conv_layer=StdConv2dSame).to(args.device)
-        image_resizer.load_state_dict(torch.load(os.path.join(os.path.dirname(args.checkpoint), 'image_resizer.pth'), map_location=args.device))
-        image_resizer.eval()
-    else:
-        image_resizer = None
-    tokenizer = PreTrainedTokenizerFast(tokenizer_file=args.tokenizer)
-    return args, model, image_resizer, tokenizer
-
-
-@in_model_path()
-def call_model(args, model, image_resizer, tokenizer, img=None):
-    global last_pic
-    encoder, decoder = model.encoder, model.decoder
-    if type(img) is bool:
-        img = None
-    if img is None:
-        if last_pic is None:
-            print('Provide an image.')
-            return ''
+class LatexOCR:
+    image_resizer = None
+    last_pic = None
+
+    @in_model_path()
+    def __init__(self, arguments=None):
+        if arguments is None:
+            arguments = Munch({'config': 'settings/config.yaml', 'checkpoint': 'checkpoints/weights.pth', 'no_cuda': True, 'no_resize': False})
+        logging.getLogger().setLevel(logging.FATAL)
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+        with open(arguments.config, 'r') as f:
+            params = yaml.load(f, Loader=yaml.FullLoader)
+        self.args = parse_args(Munch(params))
+        self.args.update(**vars(arguments))
+        self.args.wandb = False
+        self.args.device = 'cuda' if torch.cuda.is_available() and not self.args.no_cuda else 'cpu'
+        if not os.path.exists(self.args.checkpoint):
+            download_checkpoints()
+        self.model = get_model(self.args)
+        self.model.load_state_dict(torch.load(self.args.checkpoint, map_location=self.args.device))
+
+        if 'image_resizer.pth' in os.listdir(os.path.dirname(self.args.checkpoint)) and not arguments.no_resize:
+            self.image_resizer = ResNetV2(layers=[2, 3, 3], num_classes=max(self.args.max_dimensions)//32, global_pool='avg', in_chans=1, drop_rate=.05,
+                                          preact=True, stem_type='same', conv_layer=StdConv2dSame).to(self.args.device)
+            self.image_resizer.load_state_dict(torch.load(os.path.join(os.path.dirname(self.args.checkpoint), 'image_resizer.pth'), map_location=self.args.device))
+            self.image_resizer.eval()
+        self.tokenizer = PreTrainedTokenizerFast(tokenizer_file=self.args.tokenizer)
+
+    @in_model_path()
+    def __call__(self, img=None, resize=True):
+        if type(img) is bool:
+            img = None
+        if img is None:
+            if self.last_pic is None:
+                print('Provide an image.')
+                return ''
+            else:
+                img = self.last_pic.copy()
+        else:
+            self.last_pic = img.copy()
+        img = minmax_size(pad(img), self.args.max_dimensions, self.args.min_dimensions)
+        if (self.image_resizer is not None and not self.args.no_resize) and resize:
+            with torch.no_grad():
+                input_image = img.convert('RGB').copy()
+                r, w, h = 1, input_image.size[0], input_image.size[1]
+                for _ in range(10):
+                    h = int(h * r)  # height to resize
+                    img = pad(minmax_size(input_image.resize((w, h), Image.BILINEAR if r > 1 else Image.LANCZOS), self.args.max_dimensions, self.args.min_dimensions))
+                    t = test_transform(image=np.array(img.convert('RGB')))['image'][:1].unsqueeze(0)
+                    w = (self.image_resizer(t.to(self.args.device)).argmax(-1).item()+1)*32
+                    logging.info(r, img.size, (w, int(input_image.size[1]*r)))
+                    if (w == img.size[0]):
+                        break
+                    r = w/img.size[0]
         else:
-            img = last_pic.copy()
-    else:
-        last_pic = img.copy()
-    img = minmax_size(pad(img), args.max_dimensions, args.min_dimensions)
-    if image_resizer is not None and not args.no_resize:
+            img = np.array(pad(img).convert('RGB'))
+            t = test_transform(image=img)['image'][:1].unsqueeze(0)
+        im = t.to(self.args.device)
+
         with torch.no_grad():
-            input_image = img.convert('RGB').copy()
-            r, w, h = 1, input_image.size[0], input_image.size[1]
-            for _ in range(10):
-                h = int(h * r)  # height to resize
-                img = pad(minmax_size(input_image.resize((w, h), Image.BILINEAR if r > 1 else Image.LANCZOS), args.max_dimensions, args.min_dimensions))
-                t = test_transform(image=np.array(img.convert('RGB')))['image'][:1].unsqueeze(0)
-                w = (image_resizer(t.to(args.device)).argmax(-1).item()+1)*32
-                logging.info(r, img.size, (w, int(input_image.size[1]*r)))
-                if (w == img.size[0]):
-                    break
-                r = w/img.size[0]
-    else:
-        img = np.array(pad(img).convert('RGB'))
-        t = test_transform(image=img)['image'][:1].unsqueeze(0)
-    im = t.to(args.device)
-
-    with torch.no_grad():
-        model.eval()
-        device = args.device
-        encoded = encoder(im.to(device))
-        dec = decoder.generate(torch.LongTensor([args.bos_token])[:, None].to(device), args.max_seq_len,
-                               eos_token=args.eos_token, context=encoded.detach(), temperature=args.get('temperature', .25))
-        pred = post_process(token2str(dec, tokenizer)[0])
-    try:
-        clipboard.copy(pred)
-    except:
-        pass
-    return pred
+            self.model.eval()
+            device = self.args.device
+            encoded = self.model.encoder(im.to(device))
+            dec = self.model.decoder.generate(torch.LongTensor([self.args.bos_token])[:, None].to(device), self.args.max_seq_len,
+                                              eos_token=self.args.eos_token, context=encoded.detach(), temperature=self.args.get('temperature', .25))
+            pred = post_process(token2str(dec, self.tokenizer)[0])
+        try:
+            clipboard.copy(pred)
+        except:
+            pass
+        return pred
 
 
 def output_prediction(pred, args):
@@ -144,7 +140,8 @@ def main():
     parser.add_argument('--no-resize', action='store_true', help='Resize the image beforehand')
     arguments = parser.parse_args()
     with in_model_path():
-        args, *objs = initialize(arguments)
+        model = LatexOCR(arguments)
+        file = None
         while True:
             instructions = input('Predict LaTeX code for image ("?"/"h" for help). ')
             possible_file = instructions.strip()
@@ -176,32 +173,32 @@ def main():
                     ''')
                 continue
             elif ins in ['show', 'katex', 'no_resize']:
-                setattr(args, ins, not getattr(args, ins, False))
-                print('set %s to %s' % (ins, getattr(args, ins)))
+                setattr(model.args, ins, not getattr(model.args, ins, False))
+                print('set %s to %s' % (ins, getattr(model.args, ins)))
                 continue
             elif os.path.isfile(os.path.realpath(possible_file)):
-                args.file = possible_file
+                file = possible_file
             else:
                 t = re.match(r't=([\.\d]+)', ins)
                 if t is not None:
                     t = t.groups()[0]
-                    args.temperature = float(t)+1e-8
-                    print('new temperature: T=%.3f' % args.temperature)
+                    model.args.temperature = float(t)+1e-8
+                    print('new temperature: T=%.3f' % model.args.temperature)
                     continue
             try:
                 img = None
-                if args.file:
-                    img = Image.open(args.file)
+                if file:
+                    img = Image.open(file)
                 else:
                     try:
                         img = ImageGrab.grabclipboard()
                     except:
                         pass
-                pred = call_model(args, *objs, img=img)
-                output_prediction(pred, args)
+                pred = model(img)
+                output_prediction(pred, model.args)
             except KeyboardInterrupt:
                 pass
-            args.file = None
+            file = None
 
 
 if __name__ == "__main__":
diff --git a/pix2tex/gui.py b/pix2tex/gui.py
index b274fca..7ed32aa 100644
--- a/pix2tex/gui.py
+++ b/pix2tex/gui.py
@@ -27,17 +27,11 @@ class App(QMainWindow):
     def __init__(self, args=None):
         super().__init__()
         self.args = args
-        self.initModel()
+        self.model = cli.LatexOCR(self.args)
         self.initUI()
         self.snipWidget = SnipWidget(self)
-
         self.show()
 
-    def initModel(self):
-        args, *objs = cli.initialize(self.args)
-        self.args = args
-        self.objs = objs
-
     def initUI(self):
         self.setWindowTitle("LaTeX OCR")
         QApplication.setWindowIcon(QtGui.QIcon(':/icons/icon.svg'))
@@ -59,7 +53,7 @@ def initUI(self):
 
         # Create temperature text input
         self.tempField = QDoubleSpinBox(self)
-        self.tempField.setValue(self.args.get('temperature', 0.25))
+        self.tempField.setValue(self.args.temperature)
         self.tempField.setRange(0, 1)
         self.tempField.setSingleStep(0.1)
 
@@ -102,6 +96,7 @@ def toggleProcessing(self, value=None):
         else:
             text = 'Snip [Alt+S]'
             func = self.onClick
+            self.retryButton.setEnabled(True)
         self.shortcut.setEnabled(not self.isProcessing)
         self.snipButton.setText(text)
         self.snipButton.clicked.disconnect()
@@ -140,13 +135,13 @@ def returnSnip(self, img=None):
 
         self.show()
         try:
-            self.args.temperature = self.tempField.value()
-            if self.args.temperature == 0:
-                self.args.temperature = 1e-8
+            self.model.args.temperature = self.tempField.value()
+            if self.model.args.temperature == 0:
+                self.model.args.temperature = 1e-8
         except:
             pass
         # Run the model in a separate thread
-        self.thread = ModelThread(img=img, args=self.args, objs=self.objs)
+        self.thread = ModelThread(img=img, model=self.model)
         self.thread.finished.connect(self.returnPrediction)
         self.thread.finished.connect(self.thread.deleteLater)
         self.thread.start()
@@ -198,15 +193,14 @@ def displayPrediction(self, prediction=None):
 class ModelThread(QThread):
     finished = pyqtSignal(dict)
 
-    def __init__(self, img, args, objs):
+    def __init__(self, img, model):
         super().__init__()
         self.img = img
-        self.args = args
-        self.objs = objs
+        self.model = model
 
     def run(self):
         try:
-            prediction = cli.call_model(self.args, *self.objs, img=self.img)
+            prediction = self.model(self.img)
             # replace <, > with \lt, \gt so it won't be interpreted as html code
             prediction = prediction.replace('<', '\\lt ').replace('>', '\\gt ')
             self.finished.emit({"success": True, "prediction": prediction})