### Import necessary packages

In [None]:
import sys, os
sys.path.insert(0, os.path.join('ScanSSD'))
sys.path.insert(0, os.path.join('ScanSSD', 'layers'))
sys.path.insert(0, os.path.join('ScanSSD', 'gtdb'))

from pdf2image import convert_from_path

from collections import OrderedDict
import cv2
import math
import numpy as np
import argparse
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.ops import nms

from pix2tex import cli as pix2tex
from PIL import Image

from ScanSSD.ssd import build_ssd
from ScanSSD.data import config

from p2l_utils import get_rolling_crops, postprocess

from tqdm import tqdm

### Load a paper and convert it to a list of images

In [None]:
test_path = './demo/paper.pdf'
images_list = convert_from_path(test_path)
print(f"The number of pages in the pdf is {len(images_list)}")

### Load LaTeX-OCR model

In [None]:
model = pix2tex.LatexOCR()

### Load ScanSSD model

In [None]:
from scanssd_wrapper import *

class ArgStub():
    def __init__ (self):
        self.cuda = True if torch.cuda.is_available() else False
        self.kernel = (1, 5)
        self.padding = (0, 2)
        self.phase = 'test'
        self.visual_threshold = 0.8
        self.verbose = False
        self.exp_name = 'SSD'
        self.model_type = 512
        self.use_char_info = False
        self.limit = -1
        self.cfg = 'hboxes512'
        self.batch_size = 32
        self.num_workers = 4
        self.neg_mining = True
        self.log_dir = 'logs'
        self.stride = 0.1
        self.window = 1200


md = MathDetector('./saved_models/AMATH512_e1GTDB.pth', ArgStub())

### Resize images

In [None]:
new_images = []

for temp_image in images_list:
    img_size = 1280
    # convert image to numpy array
    temp_image = np.array(temp_image)
    img = cv2.resize(temp_image, (img_size, int(img_size * temp_image.shape[0] / temp_image.shape[1])))
    new_images.append(img)

### Perform equation detection and recognition

In [None]:
res = []

batch_size = 32
threshold = 0.9
iou = 0.1

for idx, temp_image in enumerate(new_images):
    crops_list, padded_crops_list, crops_info_list = get_rolling_crops(temp_image, stride=[128, 128])

    scores_list = []
    wb_list = []
    for i in tqdm(range(0, len(padded_crops_list), batch_size)):
        batch = padded_crops_list[i:i+batch_size]
        window_borders, scores = md.DetectAny(batch, threshold)
        scores_list.extend(scores)
        wb_list.extend(window_borders)

    # change crops to original image coordinates
    bb_list, s_list = postprocess(wb_list, scores_list, crops_info_list)
    
    # convert to torch tensors
    bb_torch = torch.tensor(bb_list).float()
    scores_torch = torch.tensor(s_list)

    # perform non-maximum suppression
    # check if bb_torch is empty
    if bb_torch.shape[0] == 0:
        res.append(([], []))
        continue
    indices = nms(bb_torch, scores_torch, iou)

    bb_torch = bb_torch[indices]
    new_bb_list = bb_torch.int().tolist()
    new_s_list = scores_torch[indices].tolist()

    res.append((new_bb_list, new_s_list))

### [Optional] Save detected equations

In [None]:
# make a directory to store the results
path2save = test_path.split('.pdf')[0]

if not os.path.exists(path2save):
    os.mkdir(path2save)

for idx, temp_image in enumerate(new_images):
    img_c = np.copy(temp_image)
    bb_list, score_list = res[idx]
    for i, (bb, score) in enumerate(zip(bb_list, score_list)):
        # draw bounding boxes
        cv2.rectangle(img_c, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 255), 2)
        # put the index of the bounding boxes
        cv2.putText(img_c, str(i), (bb[0], bb[1]), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA)
        # put the score of the bounding boxes
        cv2.putText(img_c, str(round(score, 2)), (bb[2], bb[1]), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1, cv2.LINE_AA)

    cv2.imwrite(os.path.join(path2save, f'{idx}.png'), img_c)

### Convert equations to LaTeX

In [None]:
final_img_crops = []

for idx, temp_image in enumerate(new_images):
    bb_list, score_list = res[idx]
    temp = []
    for i in range(len(bb_list)):
        img_c = np.copy(temp_image)
        temp_bb = bb_list[i][:]

        temp_bb[0] = max(0, temp_bb[0] - int(0.05 * (temp_bb[2] - temp_bb[0])))
        temp_bb[1] = max(0, temp_bb[1] - int(0.05 * (temp_bb[3] - temp_bb[1])))
        temp_bb[2] = min(img_c.shape[1], temp_bb[2] + int(0.05 * (temp_bb[2] - temp_bb[0])))
        temp_bb[3] = min(img_c.shape[0], temp_bb[3] + int(0.05 * (temp_bb[3] - temp_bb[1])))       

        # convert to int
        temp_bb = [int(x) for x in temp_bb]

        # crop the image
        cropped_img = img_c[temp_bb[1]:temp_bb[3], temp_bb[0]:temp_bb[2]]

        # resize the image to height 128
        cropped_img = cv2.resize(cropped_img, (int(128 * cropped_img.shape[1] / cropped_img.shape[0]), 128))

        # convert to PIL image
        cropped_img = Image.fromarray(cropped_img)

        temp.append(cropped_img)
    final_img_crops.append(temp)


In [None]:
results = []

for cropped_img_list in tqdm(final_img_crops):
    temp_res = []
    for img in cropped_img_list:
        temp = model(img)
        temp_res.append(temp)
    results.append(temp_res)