# Collect Pain data from web

In [None]:
# import

import os
import cv2
import time
import shutil
import numpy as np
from math import *
import sys, traceback
from PIL import Image, ImageDraw


## web crawling
from selenium import webdriver
import urllib.request

## DPSR
from demo_test_dpsr_crawl import dpsr

## torch
import torch
import torchvision
from torchvision import models
import torchvision.transforms as T
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches

## dlib
import dlib

In [None]:
## selenium
CHROME_DRIVER_PATH = "C:\chromedriver\chromedriver.exe"
COMMON_URL = "https://google.com"

## path
COMMON_RAW_DATA_PATH = "./data/0. raw_web_data/"
COMMON_RAW_ISR_PATH = "./data/1. sr_web_data/"
COMMON_ROUGH_CROP_PATH = "./data/2. rough_crop_data/"
COMMON_ROUGH_ISR_PATH = "./data/3. sr_crop_data/"
COMMON_ALIGNED_PATH = "./data/4. aligned_data/"
COMMON_TOTAL_CROP_PATH = "./data/5. total_crop_data/"
COMMON_RESULT_PATH = "./data/6. face_only_data/"
COMMON_FACE_ONLY_PATH = "./data/7. result_data/"

BORDER = 300

## dlib
ALL = list(range(0, 68))
RIGHT_EYEBROW = list(range(17, 22))
LEFT_EYEBROW = list(range(22, 27))
NOSE = list(range(27, 36))
RIGHT_EYE = list(range(36, 42))
LEFT_EYE = list(range(42, 48))
MOUTH_OUTLINE = list(range(61, 68))
JAWLINE = list(range(0, 17))
index = ALL

In [None]:
## DLIB face detector와 landmark predictor 정의
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

## KRCNN pretrained model
krcnn_model = models.detection.keypointrcnn_resnet50_fpn(pretrained=True).eval()
THRESHOLD = 0.90

In [None]:
### search list
search_list = ['soccer injury', 'olympic injury', 'women soccer injury', 'baseball injury', 
               'boy soccer injury', 'girl soccer injury', 'headache', 'headache expression',
               'toothache', 'toothache expression', 'stomachache', 'stomachache expression']

more_ search_list = ['soccer injury', 'olympic injury', 'women soccer injury', 'baseball injury', 
                   'boy soccer injury', 'girl soccer injury', 'headache', 'headache expression',
                   'toothache', 'toothache expression', 'stomachache', 'stomachache expression',
                   'heartburn expression', 'ouch face' 'sharp pain expression', 
                   'cramping pain expression', 'sore expression', 'severe pain face', 
                   'excruciating pain face', 'irritated pain expression', 'stiff pain expression']

### 0. Basic Functions

In [None]:
# check whether file is image or not
def is_image(file) :
    if '.png' or '.jpg' or '.jpeg' in file :
        print(file)
        return True
        
    else :
        print("NOT IMAGE ", file)
        return False
    

# 가로, 세로 중 더 짧은 쪽을 size에 맞춰서 resize
def resize(img, size) :
    w, h, c = img.shape
    ori_img = Image.fromarray(img)
        
    if w <= h :
        re_img = ori_img.resize((int(h * size / w), size))
    else :
        re_img = ori_img.resize((size, int(w * size / h)))
        
    return re_img


# 두 좌표 사이 맨하튼 거리 구하기
def pointDist(x1, y1, x2, y2):
    class Point2D:
        def __init__(self, x, y):
            self.x = x
            self.y = y
    p1 = Point2D(x=x1, y=y1)
    p2 = Point2D(x=x2, y=y2)
    dist=sqrt(pow(p2.x-p1.x,2)+pow(p2.y-p1.y,2))
   
    return dist

# reverse point 180 degree
def reverse_point(w, h, point):
    point[0] = w - point[0]
    point[1] = h - point[1]
    return point

## 1. Web Crawl - Selenium

In [None]:
def screenshot(driver, element, save_path) :
    element.screenshot(save_path)

    
## scroll down for more imgs
def scroll_down(driver) :    
    for i in range(100) :
        
        driver.execute_script("window.scrollBy(0, 1000)")
        
        if i%10 == 0 : 
            # 이미지 로딩 시간 확보
            time.sleep(1.0)
            
        try :
            # 'MORE' 버튼 확인
            more_btn = driver.find_element_by_class_name("mye4qd")
            
            if more_btn.is_enabled() :
                more_btn.click()
                time.sleep(1.0)
                
        except Exception as e :
            continue
            
    return


## search
def search(search_term) :
    url = COMMON_URL + '/search?q=' + search_term + "&tbm=isch"
    driver = webdriver.Chrome(CHROME_DRIVER_PATH)
    driver.get(url)
    
    ## scroll down
    scroll_down(driver)
    return driver


def crawl_image(search_term, save_folder) :
    driver = search(search_term)
    
    for img_index, img in enumerate(driver.find_elements_by_class_name("isv-r")) :
        index = str(img_index).zfill(3)
        crawl_data_file = save_folder + search_term + "_" + index + ".png"
        screenshot(driver, img, crawl_data_file)
        
    print(search_term + " crawling complete\n")

## 2. DPSR * 4

In [None]:
def image_super_resolution(original_path, save_path) :
    file_list = os.listdir(original_path)
    
    for file in file_list :
        try :
            dpsr(file, original_path, save_path)
        except :
            shutil.copy(original_path + file, save_path)
        
def isr_to_size(original_path, save_path, size=256):
    file_list = os.listdir(original_path)
    
    for file in file_list :
        img_path = original_path + file
        if is_image(file) :
            origin_img = np.array(Image.open(img_path))
            w, h, c = origin_img.shape
            
            if(w < h and h < size) or (h < w and w < size) :
                dpsr(file, original_path, save_path)
                origin_img = np.array(Image.open(save_path + file))
            
            result_img = resize(origin_img, size)
            result_img.save(save_path + file)

## 3. KRCNN

In [None]:
def krcnn_extract_face(img):
    trf = T.Compose([
        T.ToTensor()
    ])
    input_img = trf(img)
    out = krcnn_model([input_img])[0]
    codes = [
        Path.MOVETO,
        Path.LINETO,
        Path.LINETO
    ]
    return out

# face crop - size : (face width) * per
def face_crop(ori_img, face, per):
    img = np.array(ori_img)
    w, h, c = img.shape
    
    left_x = min(face[0][0], face[1][0], face[2][0], face[3][0], face[4][0])
    left_y = min(face[0][1], face[1][1], face[2][1], face[3][1], face[4][1])
    right_x = max(face[0][0], face[1][0], face[2][0], face[3][0], face[4][0])
    right_y = max(face[0][1], face[1][1], face[2][1], face[3][1], face[4][1])
    
    width = int((right_x - left_x)*per)
    height = width
    
    from_h = int(left_y - height)
    to_h = int(right_y + height)
    from_w = int(left_x - width)
    to_w = int(right_x + width)
    
    # 범위를 초과하는 경우
    if from_h <= 0 : from_h = 0
    if to_h >= h : to_h = h
    if from_w <= 0 : from_w = 0
    if to_w >= w : to_w = w
    
    cropped_img = img[from_h : to_h, from_w : to_w]
    return cropped_img

## 눈과 귀의 각도를 이용하여 정렬
# 두 눈 좌표의 중점이 이미지의 중심
def face_align(ori_img, face) :
    img = np.array(ori_img)
    w, h, c = img.shape
    
    right_eye, left_eye = face[1], face[2]
    right_ear, left_ear = face[3], face[4]
    
    # mid point
    mid_x = int((right_eye[0]+left_eye[0])/2)
    mid_y = int((right_eye[1]+left_eye[1])/2)
    
    
    # 뒤집힌 경우 -> 일단 180도 돌린다
    if right_eye[0] < left_eye[0] :
        pre_matrix = cv2.getRotationMatrix2D((mid_x, mid_y), 180, 1)
        img = cv2.warpAffine(img, pre_matrix, (w, h))
        
        right_eye = reverse_point(w, h, right_eye)
        left_eye = reverse_point(w, h, left_eye)
        right_ear = reverse_point(w, h, right_ear)
        left_ear = reverse_point(w, h, left_ear)
        
        # mid point again
        mid_x = int((right_eye[0]+left_eye[0])/2)
        mid_y = int((right_eye[1]+left_eye[1])/2)
    
    # 눈과 귀의 각도 확인 - 더 작은 각도 사용
    eye_angle = degrees(atan((right_eye[1]-left_eye[1])/(right_eye[0] - left_eye[0])))
    ear_angle = degrees(atan((right_ear[1]-left_ear[1])/(right_ear[0] - left_ear[0])))  
    angle = eye_angle

    if abs(angle) < 3 :
        aligned_img = img
    else :
        matrix = cv2.getRotationMatrix2D((mid_x, mid_y), angle, 1)
        aligned_img = cv2.warpAffine(img, matrix, (w, h))
    
    return aligned_img
    
    
# rough face crop
def krcnn_crop_or_align(original_path, save_path, iscrop, per):
    file_list = os.listdir(original_path)
    
    for file in file_list :
        # 이미지 파일인지 확인
        if is_image(file) == False : continue
        
        # make border
        origin_img = np.array(Image.open(original_path + file))
        bordered_img = cv2.copyMakeBorder(origin_img, BORDER, BORDER, BORDER, BORDER, cv2.BORDER_REPLICATE)
        
        # extract face with krcnn
        faces = krcnn_extract_face(bordered_img)
        
        index = 0
        for box, score, keypoints in zip(faces['boxes'], faces['scores'], faces['keypoints']) :
            score = score.detach().numpy()

            if score < THRESHOLD :
                continue

            box = box.detach().numpy()
            keypoints = keypoints.detach().numpy()[:,:2]
            face = keypoints[0:5]
            
            ## crop face
            if iscrop :
                result_img = face_crop(bordered_img, face, per)

            ## align face
            else :
                result_img = face_align(bordered_img, face)
                
            # save result img
            if (result_img.shape[0] == 0) or (result_img.shape[1] == 0) : continue
            save_file_path = save_path + file.replace(".png", "") + str(index).zfill(2) + ".png"
            Image.fromarray(result_img).save(save_file_path)
            index += 1
        
        ## no valiable face
        if index == 0 :
            print("No face for " + file)         

## 4. Dlib

In [None]:
#### tight crop preprocessing - make face only dataset

# count face
def check_face(faces, file) :
    if len(faces) == 0 :
        print("zero face detected in ", file)
        return -1
    elif len(faces) > 1 :
        print("many faces detected in ", file)
        return 1
    return 0

# tight crop
def crop_face_only(img, face) :
    from_y = int(face.top())
    from_x = int(face.left())
    to_y = int(face.bottom())
    to_x = int(face.right())
    
    cropped_img = img[from_y:to_y, from_y:to_x]
    resized_img = cv2.resize(cropped_img, dsize=(128, 128), interpolation = cv2.INTERCUBIC)
    return resized_img

def make_face_only_dataset(original_path, save_path) :
    file_list = os.listdir(original_path)
    
    index = 0
    for file in file_list :
        if is_image(file) == False : continue
        
        # detect face with dlib
        origin_img = cv2.imread(original_path + file)
        img_gray = cv2.cvtColor(origin_img, cv2.COLOR_BGR2GRAY)
        faces = detector(img_gray, 1)
        
        if check_face(faces, file) < 0 : continue
            
        for face in faces :
            result_img = crop_face_only(origin_img, face)
            file_name = save_path + "webdata_" + str(index).zfill(4) + ".jpg"
            save_path = save_folder + file_name
            cv2.imwrite(save_path, result_img)
            index += 1
            
    return

## Total Method
### ** Please Check Result between each steps

In [None]:
def crawl_data(search_list) :
    for index, search_term in enumerate(search_list) :
        print(">>>>> search term : ", search_term)
        # make_path
        crawl_folder = COMMON_RAW_DATA_PATH + search_term + "/"
        crawl_isr_folder = COMMON_RAW_ISR_PATH + search_term + "/"
        rough_crop_folder = COMMON_ROUGH_CROP_PATH + search_term + "/"
        rough_isr_folder = COMMON_ROUGH_ISR_PATH + search_term + "/"
        aligned_folder = COMMON_ALIGNED_PATH + search_term + "/"
        total_crop_folder = COMMON_TOTAL_CROP_PATH + search_term + "/"
        result_folder = COMMON_RESULT_PATH + search_term + "/"
        face_only_folder = COMMON_FACE_ONLY_PATH + search_term + "/"

        ## 검색어에 대해 search + screenshot
        crawl_image(search_term, crawl_folder)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return

        ## DPSR
        image_super_resolution(crawl_folder, crawl_isr_folder)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return

        ## KRCNN Rough Crop
        krcnn_crop_or_align(crawl_isr_folder, rough_crop_folder, True, 2.0)
        cont = input("CONTINUE? (Y/N) : ")
        # 불필요한 이미지 지우기 - 용량 문제
        if cont == 'N' : return

        ## DPSR
        image_super_resolution(rough_crop_folder, rough_isr_folder)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return

        ## KRCNN Align
        krcnn_crop_or_align(rough_isr_folder, aligned_folder, False, 0.0)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return

        ## KRCNN Crop
        krcnn_crop_or_align(aligned_folder, total_crop_folder, True, 1.0)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return
        
        ## resize to 256
        isr_to_size(total_crop_folder, result_folder, 256)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return
        
        ## Dlib Tight crop
        make_face_only_dataset(result_folder, face_only_folder)
        cont = input("CONTINUE? (Y/N) : ")
        if cont == 'N' : return
        

In [None]:
crawl_data(search_list)