# ICIP 2022, notebook materials


  - Reproduce the Proposed Method and PypiDeskew on two DISE 2021 datasets.
  
 
**Download dataset**:
- [DISE2021_15](https://drive.google.com/file/d/1WvaSgMY-JXB5t8pHwK5HzbgSG4YDdxw5/view)
- [DISE2021_45](https://drive.google.com/file/d/1S4VApXsIlUaCQZuuyedCQmcobdGZCpoJ/view)


**Before run Jupyter Notebook, run the following commands in your shell (e.g. bash)**
```
# create a new virtual environment and activate it
python3.9 -m venv env
source env/bin/activate

# install jupyter-notebook and run this file
pip install jupyter
jupyter-notebook
```

In [1]:
!pip install opencv-python numpy tqdm

Collecting opencv-python
  Downloading opencv_python-4.5.4.58-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (60.3 MB)
[K     |████████████████████████████████| 60.3 MB 5.0 MB/s eta 0:00:01
[?25hCollecting numpy
  Downloading numpy-1.21.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
[K     |████████████████████████████████| 15.7 MB 107.3 MB/s eta 0:00:01
[?25hCollecting tqdm
  Downloading tqdm-4.62.3-py2.py3-none-any.whl (76 kB)
[K     |████████████████████████████████| 76 kB 9.6 MB/s s eta 0:00:01
[?25hInstalling collected packages: numpy, tqdm, opencv-python
Successfully installed numpy-1.21.4 opencv-python-4.5.4.58 tqdm-4.62.3
You should consider upgrading via the '/home/ubuntu/research/lib-table/env/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [3]:
import cv2
import numpy as np


def ensure_gray(image):
    try:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:
        pass
    return image


def ensure_optimal_square(image):
    assert image is not None, image
    nw = nh = cv2.getOptimalDFTSize(max(image.shape[:2]))
    output_image = cv2.copyMakeBorder(
        src=image,
        top=0,
        bottom=nh - image.shape[0],
        left=0,
        right=nw - image.shape[1],
        borderType=cv2.BORDER_CONSTANT,
        value=255,
    )
    return output_image


def get_fft_magnitude(image):
    gray = ensure_gray(image)
    opt_gray = ensure_optimal_square(gray)

    # thresh
    opt_gray = cv2.adaptiveThreshold(
        ~opt_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -10
    )

    # perform fft
    dft = np.fft.fft2(opt_gray)
    shifted_dft = np.fft.fftshift(dft)

    # get the magnitude (module)
    magnitude = np.abs(shifted_dft)
    # magnitude = magnitude / np.max(magnitude)

    return magnitude


def _get_angle_adaptive(m, amax=None, num=None, W=None):
    assert m.shape[0] == m.shape[1]
    r = c = m.shape[0] // 2

    if W is None:
        W = m.shape[0] // 10

    if amax is None:
        amax = 15

    if num is None:
        num = 20

    tr = np.linspace(-1 * amax, amax, amax * num * 2) / 180 * np.pi
    profile_arr = tr.copy()

    def f(t):
        _f = np.vectorize(
            lambda x: m[c + int(x * np.cos(t)), c + int(-1 * x * np.sin(t))]
        )
        _l = _f(range(0, r))
        val_init = np.sum(_l)
        val_correct = np.sum(_l[W:])
        return val_init, val_correct

    vf = np.vectorize(f)

    li = vf(profile_arr)
    li_init = li[0]
    li_correct = li[1]

    a_init = tr[np.argmax(li_init)] / np.pi * 180
    a_correct = tr[np.argmax(li_correct)] / np.pi * 180

    dist = a_init - a_correct
    if a_init < a_correct:
        dist = a_correct - a_init

    return -1 * a_init, -1 * a_correct, dist


def get_angle(image, amax=None, V=None, W=None, D=None, train_D=False):
    assert isinstance(image, np.ndarray), image

    if amax is None:
        amax = 15
    if V is None:
        V = 1024
    if W is None:
        W = 0
    if D is None:
        D = 0.45

    ratio = V / image.shape[0]
    image = cv2.resize(image, None, fx=ratio, fy=ratio)

    magnitude = get_fft_magnitude(image)
    a_init, a_correct, dist = _get_angle_adaptive(magnitude, amax=amax, W=W)

    if train_D is True:
        return a_init, a_correct, dist

    if dist <= D:
        return a_correct
    return a_init

In [4]:
import glob
import os
from multiprocessing import Manager
from multiprocessing.pool import Pool

from tqdm import tqdm


manager = Manager()
elist = manager.list()

amax, V, W, D = 15, 1024, 274, 0.7
data_dir = "./data/dise2021_15/dise2021_test/*"

def w(image_path):
    image_name = os.path.basename(image_path)
    gt = float(image_name[image_name.find("[") + 1 : image_name.find("]")])

    image = cv2.imread(image_path)
    pd = get_angle(image, amax=amax, V=V, W=W, D=D)
    e = round(abs(gt - pd), 2)

    elist.append(e)

def eval_func():
    elist[:] = []
    with Pool(int(os.cpu_count() * 0.98)) as p:
        total_length = len(glob.glob(data_dir))
        list(tqdm(p.imap_unordered(w, glob.glob(data_dir)), total=total_length))

    ce = sum(e <= 0.1 for e in elist) / len(elist)
    aed = sum(elist) / len(elist)
    elist80 = sorted(elist)[: int(len(elist) * 0.8)]
    top80 = sum(elist80) / len(elist80)

    print(f"AED: {aed:.2f}")
    print(f"TOP80: {top80:.2f}")
    print(f"CE: {ce:.2f}")
    print(f"WORST: {max(elist)}")

# Table 4: Our - DISE (15)

In [5]:
data_dir = "./data/dise2021_15/dise2021_test/*"
amax, V, W, D = 15, 1024, 274, 0.7
eval_func()

100%|███████████████████████████████████████| 1491/1491 [00:39<00:00, 37.64it/s]


AED: 0.11
TOP80: 0.07
CE: 0.67
WORST: 1.13


In [6]:
data_dir = "./data/dise2021_15/dise2021_test/*"
amax, V, W, D = 15, 1500, 328, 0.55
eval_func()

100%|███████████████████████████████████████| 1491/1491 [00:58<00:00, 25.56it/s]


AED: 0.09
TOP80: 0.05
CE: 0.78
WORST: 1.13


In [7]:
data_dir = "./data/dise2021_15/dise2021_test/*"
amax, V, W, D = 15, 2048, 304, 0.55
eval_func()

100%|███████████████████████████████████████| 1491/1491 [01:22<00:00, 18.16it/s]


AED: 0.08
TOP80: 0.04
CE: 0.84
WORST: 1.13


In [9]:
data_dir = "./data/dise2021_15/dise2021_test/*"
amax, V, W, D = 15, 3072, 328, 0.55
eval_func()

100%|███████████████████████████████████████| 1491/1491 [02:09<00:00, 11.50it/s]


AED: 0.07
TOP80: 0.03
CE: 0.86
WORST: 1.13


In [10]:
data_dir = "./data/dise2021_15/dise2021_test/*"
amax, V, W, D = 15, 4096, 250, 0.5
eval_func()

100%|███████████████████████████████████████| 1491/1491 [03:17<00:00,  7.54it/s]


AED: 0.08
TOP80: 0.04
CE: 0.83
WORST: 1.18


# Table 4: Our - DISE (44.9)

In [11]:
data_dir = "./data/dise2021_45/test/*"
amax, V, W, D = 45, 1024, 274, 0.7
eval_func()

100%|███████████████████████████████████████| 2800/2800 [03:32<00:00, 13.19it/s]


AED: 0.09
TOP80: 0.05
CE: 0.73
WORST: 0.91


In [12]:
data_dir = "./data/dise2021_45/test/*"
amax, V, W, D = 45, 1500, 328, 0.55
eval_func()

100%|███████████████████████████████████████| 2800/2800 [05:07<00:00,  9.11it/s]


AED: 0.08
TOP80: 0.04
CE: 0.82
WORST: 0.89


In [13]:
data_dir = "./data/dise2021_45/test/*"
amax, V, W, D = 45, 2048, 304, 0.55
eval_func()

100%|███████████████████████████████████████| 2800/2800 [07:02<00:00,  6.63it/s]


AED: 0.06
TOP80: 0.03
CE: 0.87
WORST: 1.06


In [14]:
data_dir = "./data/dise2021_45/test/*"
amax, V, W, D = 45, 3072, 328, 0.55
eval_func()

100%|███████████████████████████████████████| 2800/2800 [10:38<00:00,  4.39it/s]


AED: 0.05
TOP80: 0.02
CE: 0.89
WORST: 1.06


In [15]:
data_dir = "./data/dise2021_45/test/*"
amax, V, W, D = 45, 4096, 250, 0.5
eval_func()

100%|███████████████████████████████████████| 2800/2800 [14:58<00:00,  3.12it/s]


AED: 0.06
TOP80: 0.03
CE: 0.86
WORST: 1.06


# Table 4 - PypiDeskew

In [16]:
!pip install -q deskew

You should consider upgrading via the '/home/ubuntu/research/lib-table/env/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [17]:
import warnings
warnings.filterwarnings("ignore")

In [18]:
# according to the README at https://github.com/sbrunner/deskew
from deskew import determine_skew
from skimage import io
from skimage.color import rgb2gray

def w(image_path):
    image_name = os.path.basename(image_path)
    gt = float(image_name[image_name.find("[") + 1 : image_name.find("]")])

    image = io.imread(image_path)
    grayscale = rgb2gray(image)
    pd = determine_skew(grayscale)

    if pd is None:  # give them a favor, don't take into account cases they can't run
        return
    e = round(abs(gt + pd), 2)

    elist.append(e)

data_dir = "./data/dise2021_15/dise2021_test/*"
eval_func()

data_dir = "./data/dise2021_45/test/*"
eval_func()

100%|███████████████████████████████████████| 1491/1491 [00:48<00:00, 30.46it/s]


AED: 16.59
TOP80: 0.24
CE: 0.20
WORST: 141.24


100%|███████████████████████████████████████| 2800/2800 [01:29<00:00, 31.44it/s]


AED: 21.79
TOP80: 2.51
CE: 0.14
WORST: 179.45
