# Preprocess: mobile pics
input: mobile pics

goal: delete part of image where there isnt the scanned record

output: new image

## Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv2
import zipfile
import os
from PIL import Image

In [None]:
#define data_path

## Get input

In [None]:
#open zip with pics
imgzip = open(data_path+'mobile_pics.zip', 'r+b')

pics = zipfile.ZipFile(imgzip)
ids_p = pics.namelist() # list of names all images in the given path
print("No. of images = ", len(ids_p))

## Preprocess

### Predefined functions

In [None]:
def find_points(c, borde):
  
  edges_ = c[borde:c.shape[0]-borde, borde:c.shape[1]-borde]>0

  # POINT 1 - upper left
  distancia = np.sqrt(edges_.shape[0]**2 + edges_.shape[1]**2)

  for i in range(0, edges_.shape[0]//2):
      for j in range(0, edges_.shape[1]//2):
          if edges_[i,j]:
              distancia_aux = np.sqrt((i)**2+(j)**2)
              if distancia_aux < distancia:
                  distancia = np.copy(distancia_aux)
                  point_1 = [j+borde, i+borde]
                  
  # POINT 2 - upper right
  distancia = np.sqrt(edges_.shape[0]**2 + edges_.shape[1]**2)

  for i in range(0, edges_.shape[0]//2):
      for j in range(edges_.shape[1]//2, edges_.shape[1]):
          if edges_[i,j]:
              distancia_aux = np.sqrt((i)**2+(edges_.shape[1]-j)**2)
              if distancia_aux < distancia:
                  distancia = np.copy(distancia_aux)
                  point_2 = [j+borde, i+borde]
   
  # POINT 3 - lower left          
  distancia = np.sqrt(edges_.shape[0]**2 + edges_.shape[1]**2)

  for i in range(edges_.shape[0]//2, edges_.shape[0]):
      for j in range(0, edges_.shape[1]//2):
          if edges_[i,j]:
              distancia_aux = np.sqrt((edges_.shape[0]-i)**2+(j)**2)
              if distancia_aux < distancia:
                  distancia = np.copy(distancia_aux)
                  point_3 = [j+borde, i+borde]
                  
  # POINT 4 - lower right              
  distancia = np.sqrt(edges_.shape[0]**2 + edges_.shape[1]**2)

  for i in range(edges_.shape[0]//2, edges_.shape[0]):
      for j in range(edges_.shape[1]//2, edges_.shape[1]):
          if edges_[i,j]:
              distancia_aux = np.sqrt((edges_.shape[0]-i)**2+(edges_.shape[1]-j)**2)
              if distancia_aux < distancia:
                  distancia = np.copy(distancia_aux)
                  point_4 = [j+borde, i+borde]

  return [point_1, point_2, point_3, point_4]

In [None]:
def extract_part(coords, img):

  #get some coord
  rect = np.zeros((4, 2), dtype = "float32")
  suma = np.sum(coords, axis=1)
  rect[0]= coords[np.argmin(suma)]
  rect[2]= coords[np.argmax(suma)]
  
  diff=-np.diff(coords, axis=1)
  rect[1]= coords[np.argmax(diff)]
  rect[3]= coords[np.argmin(diff)]
  

  (tl,tr,br,bl)=rect
  
  #width of the image
  widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
  widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
  maxWidth = max(int(widthA), int(widthB))

  #height of the image
  heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
  heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
  maxHeight = max(int(heightA), int(heightB))

  dst = np.array([[0, 0],[maxWidth - 1, 0],[maxWidth - 1, maxHeight - 1],[0, maxHeight - 1]], dtype = "float32")

  # compute the perspective transform matrix and then apply it
  M = cv2.getPerspectiveTransform(rect, dst)
  warped = cv2.warpPerspective(img, M,(maxWidth, maxHeight),cv2.INTER_LANCZOS4|cv2.WARP_INVERSE_MAP, cv2.BORDER_CONSTANT)

  return warped

### apply to all pics

In [None]:
#GOOD
for id in ids:
  #1 GET PIC
  im = pics.read(id)
  gray = cv2.imdecode(np.frombuffer(im, np.uint8), 0)
  #2 REMOVE BACKGROUND
  canny = cv2.Canny(gray,30,200)
  pp = find_points(canny, 20)
  crop = extract_part(pp, gray)
  #3 REMOVE MARGINS
  canny2 = cv2.Canny(crop,30,100)
  p = find_points(canny2, 10)
  fcrop = extract_part(p, crop)
  #4 SAVE
  ims = Image.fromarray(fcrop)
  ims.save(data_path+id)