<img src="python_logo.svg">
<img src="c_logo.png">
<img src="cython_logo.svg">

# Prerequisites

## 1. Install GCC

### INSTALLATION ON OSX
- https://www.mkyong.com/mac/how-to-install-gcc-compiler-on-mac-os-x/

### INSTALLATION ON WINDOWS 10
- http://www.mingw.org/
- https://www.rose-hulman.edu/class/csse/resources/MinGW/installation.htm

## 2. Install Cython
INSTALL WITH: `pip install Cython`

Useful references:

- https://cython.org/

- https://cython.readthedocs.io/en/latest/src/tutorial/cython_tutorial.html

- https://www.pyimagesearch.com/2017/08/28/fast-optimized-for-pixel-loops-with-opencv-and-python/

# PERFORMANCE IMPROVEMENT WITH CYTHON

In [1]:
# Substitute with your custom functions package 
# ---------------------------------------
import sys ; sys.path.append("../")
import cvlib
# ---------------------------------------

In [2]:
sys.path.append("../")

In [3]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import time

%matplotlib inline
%load_ext cython

# ignore distracting warnings
import warnings
warnings.filterwarnings('ignore')

In [4]:
path = "C:\GitHub\pythonPrograms\computerVision2023\clases\imagenes"

In [5]:
filename = path + '\cameraman_face.jpg'
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [6]:
img.shape

(339, 338)

# THRESHOLDING FUNCTION SPEEDUP
Evaluate speedup of cython with a thresholding function.

In [7]:
%%cython -a
 
def threshold_python(img, THRESH):
    r, c = img.shape
    
    for i in range(0, r):
        for j in range(0, c):
            img[i, j] = 255 if img[i, j] >= THRESH else 0
            
    return img

In [8]:
%timeit threshold_python(img, 5)

326 ms ± 9.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%cython -a
import cython
@cython.boundscheck(False)
cpdef unsigned char[:, :] threshold_cython(unsigned char[:, :] img, int THRESH):
    cdef int r, c, i, j
    
    r = img.shape[0]
    c = img.shape[1]

    for i in range(0, r):
        for j in range(0, c):
            img[i, j] = 255 if img[i, j] >= THRESH else 0

    return img

In [10]:
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [11]:
%timeit threshold_cython(img, 5)

292 µs ± 14.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [12]:
# compare speedup


# Aplying a Kernel to an image

In [13]:
kernel = np.ones((3,3), dtype=np.uint8)
print(kernel)

[[1 1 1]
 [1 1 1]
 [1 1 1]]


In [14]:
# Neighbors example
kernel_size = 3

assert kernel_size%2 != 0 ,"Kernel size not odd"

i=100
j=200

R = kernel_size//2

img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

# get neighborhood
img[i-R:i+R+1, j-R:j+R+1]

array([[ 99,  93, 104],
       [ 95,  87,  88],
       [115, 134, 145]], dtype=uint8)

In [15]:
%%cython -a
def apply_kernel_naive(img, kernel):
    k_r, k_c = kernel.shape

    R = k_r//2
    r, c = img.shape

    for i in range(R,r-R):
        for j in range(R,c-R):
            n = img[i-R:i+R+1, j-R:j+R+1]
            t = 0
            for x in range(k_r):
                for y in range(k_c):
                        t += n[x,y]*kernel[x,y]
            img[i,j] = t
    return img

In [16]:
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [17]:
%timeit apply_kernel_naive(img, kernel)

1.1 s ± 22.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
# store performance for speedup computation
naive_ms = 394

In [19]:
%%cython -a

import numpy as np    # imprort all packages used in function to evaluate

def apply_kernel_np(img, kernel):
    k_r, k_c = kernel.shape

    R = k_r//2
    
    r = img.shape[0]
    c = img.shape[1]

    for i in range(R, r-R):
        for j in range(R, c-R):
            n = img[i-R:i+R+1, j-R:j+R+1]
            v = np.multiply(n, kernel).sum()
            img[i,j] = v
    
    return img

In [20]:
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [21]:
%timeit apply_kernel_np(img, kernel)

The slowest run took 5.16 times longer than the fastest. This could mean that an intermediate result is being cached.
1.65 s ± 1.29 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
# speedup
394/206

1.912621359223301

In [23]:
%%cython -a
import cython
import numpy as np

@cython.boundscheck(False)
cpdef unsigned char[:, :] apply_kernel_cython(unsigned char[:, :] img, unsigned char[:, :] kernel):
    cdef int k_r, k_c, x, y, R, r, c, i , j, t
    
    k_r = kernel.shape[0]
    k_c = kernel.shape[1]

    R = k_r//2
    
    r = img.shape[0]
    c = img.shape[1]

    for i in range(R, r-R):
        for j in range(R, c-R):
            n = img[i-R:i+R+1, j-R:j+R+1]
            
            # numpy
            t = np.multiply(n, kernel).sum()

            # loop
#             t = 0
#             for x in range(k_r):
#                 for y in range(k_c):
#                         t += n[x, y] * kernel[x, y]

            img[i,j] = t
    
    return img

In [24]:
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [25]:
%timeit apply_kernel_cython(img, kernel)

1.48 s ± 113 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
%%cython -a
import cython
import numpy as np
@cython.boundscheck(False)
cpdef unsigned char[:, :] apply_kernel_cython_fast(unsigned char[:, :] img, unsigned char[:, :] kernel):
    cdef int k_r, k_c, x, y, R, r, c, i , j, t
    cdef unsigned char[:, :] n
    
    k_r = kernel.shape[0]
    k_c = kernel.shape[1]

    R = k_r//2
    
    r = img.shape[0]
    c = img.shape[1]

    for i in range(R, r-R):
        for j in range(R, c-R):
            n = img[i-R:i+R+1, j-R:j+R+1]
            
            # numpy
            #t = np.multiply(n, kernel).sum()

            # loop
            t = 0
            for x in range(k_r):
                for y in range(k_c):
                        t += n[x, y] * kernel[x, y]
                        
            img[i,j] = t
    
    return img

In [27]:
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [28]:
%timeit apply_kernel_cython_fast(img, kernel)

6.95 ms ± 562 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [29]:

speedup =  394/1.69
print("Total Speedup is {0}".format(round(speedup,4)))

Total Speedup is 233.1361


# COMPILED Cython code 
- external.pyx : Contains Cython code
- setup.py : Contains a cython makefile style file

**setup.py** contains
```
from distutils.core import setup
from Cython.Build import cythonize

setup(
    ext_modules=cythonize("external.pyx")
)
```

Execute ```python setup.py build_ext --inplace``` to compile

Generates a **.so** file for Linux or **.pyd** file for Windows, witch can be imported as a module

In [30]:
from external import apply_kernel_cythonized

In [31]:
img = cv.imread(filename, cv.IMREAD_GRAYSCALE)

In [32]:
%timeit apply_kernel_cythonized(img, kernel)

6.49 ms ± 579 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [33]:
cython_us = 7.41
speedup = ((naive_ms)*1000)/cython_us
print("Total Speedup is {0}".format(round(speedup,4)))

Total Speedup is 53171.39
