# The MIT License

Copyright 2020 Akari Shimono, Yuki Kakui

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Attention

Before running this file, please confirm **API keys (Face API, remove.bg, TSUNA)** are prepared.

In [None]:
# API Keys
faceAPI_key = input("API Key of Face: ")
removebg_key = input("API Key of remove.bg: ")
tsuna_key = input("API Key of TSUNA: ")

In [None]:
import cv2
import time
import glob
import requests
import argparse, os
import bs4, shutil, ssl
import numpy as np
import matplotlib.pyplot as plt
import http.client, urllib.request, urllib.parse, json
from bs4 import BeautifulSoup
from PIL import Image, ImageDraw, ImageFont

In [None]:
video_path = input("Video Filename (e.g. ****.mp4): ") #Filename
summary_text = input("Video Title (Japanese only): ")
search_word = input("Video Key Phrase (This will be searched on Google): ")

# Step 1: Frame Sampling
Sampled images are saved in the "processing/step1" folder.

In [None]:
def movie_to_image():
    output_path = "processing/step1/"
    os.makedirs(output_path, exist_ok=True)
  
    #load the video
    capture = cv2.VideoCapture(video_path)
    
    img_count = 0 #number of sampling images
    frame_count = 0 #number of  frames
    num_cut  = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)/300) #sample approximately 300 images

    while(capture.isOpened()):
        ret, frame = capture.read()
        if ret == False:
            break
        if frame_count % num_cut == 0:
            img_file_name = output_path + str(img_count) + ".jpg"
            cv2.imwrite(img_file_name, frame)
            img_count += 1
        frame_count += 1
    capture.release()
    return img_count

img_sum = movie_to_image()
print("Frame-Sampling Finished.")

# Step 2: Emotion Recognition

Free version of Face API recognizes 20 images/min.

Two images with the highest probability of happiness and surprise are saved as the "happiness.jpg" file and the "surprise.jpg" file in the "processing/step2" folder.

In [None]:
file = "processing/step1/" 
output_path = "processing/step2/"
os.makedirs(output_path, exist_ok=True)

# extract the image with the highest emotion for each emotion
max_emotion = {"anger":["", 0.0], "contempt":["", 0.0], "disgust":["", 0.0], "fear":["", 0.0], 
               "happiness":["", 0.0],  "neutral":["", 0.0], "sadness":["", 0.0], "surprise": ["", 0.0]}
emo_1 = ["anger","contempt","disgust","fear","happiness","neutral","sadness","surprise"]
img_name = ""
cnt = 0

def RaadJson(datas):
    emotion = []
    for data in datas:
        if data == "error":
            return 0
        f = data["faceAttributes"]
        d = f["emotion"]
        for name in emo_1:
            emotion.append(d[name])
    return emotion


def Recognize(emotion):
    data = np.array(emotion)
    if data.size == 0:
        return 
    for num in range(8):
        if data[num] > max_emotion[emo_1[num]][1]:
            max_emotion[emo_1[num]][0] = img_name
            max_emotion[emo_1[num]][1] = data[num]
    
    
headers = {
    #Request headers
    "Content-Type" : "application/octet-stream",
    "Ocp-Apim-Subscription-Key" : faceAPI_key,
}

params = urllib.parse.urlencode({
    #Request parameters
    "returnFaceId" : "false",
    'returnFaceLandmarks': 'false',
    'returnFaceAttributes': 'emotion'
})

try:
    conn = http.client.HTTPSConnection('westcentralus.api.cognitive.microsoft.com')
    for i in range(0, img_sum):
        img_name = file + str(i) + ".jpg"
        if cnt > 20:
            time.sleep(60)
            cnt = 0
        f = open(img_name, "rb")
        conn.request("POST", "/face/v1.0/detect?%s" % params, f, headers)
        response = conn.getresponse()
        data = response.read()
        data = json.loads(data)
        if RaadJson(data) == 0:
            cnt += 1
            continue
        emotion = RaadJson(data)
        Recognize(emotion)
        cnt += 1
    conn.close()
    
    for key, value in max_emotion.items():
        if key == "happiness" or key == "surprise":
            if value[0] == "":
                continue
            pic = cv2.imread(value[0])
            cv2.imwrite(output_path + key + ".jpg", pic)
except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))
    
print("Emotion-Recognition Finished.")

# Step 3: Image Insertion

Images dowloaded from Google are saved in the "processing/step3/search_deta" folder.

Representative object image which background is trimmed is saved as the "paste_img.png" file in the "processing/step3" folder.

In [None]:
output_path = "processing/step3/"
os.makedirs(output_path, exist_ok=True)

def detect_face(img):     
    face_img = img.copy()  
    face_rects = face_cascade.detectMultiScale(face_img) 
    return face_rects

def img_add_msg(img, message, location, emotion, j):
    font_path = "system/NotoSansCJKjp-Bold.otf" 
    font_size = 140
    font = ImageFont.truetype(font_path, font_size)
    img = Image.fromarray(img)
    draw = ImageDraw.Draw(img)
 
    size=draw.textsize(message, font=font)
    largex = size[0]
    largey = size[1]
    #determine where the string must be placed
    #priority: bottom → upper left → upper right
    length = len(location)
    checklu = np.zeros(length)
    checkru = np.zeros(length)
    checkd = np.zeros(length)
 
    for i in range(length):
        if location[i][1] + location[i][3] + largey + 60 < img.height:
            checkd[i] = 1
    if all(checkd):
        textloc = ((img.width-largex)/2, img.height-largey-60) #bottom
    else:
        font_size = 60
        font = ImageFont.truetype(font_path, font_size)
        size2=draw.textsize(message, font=font)
        x = size2[0]
        y = size2[1]
        for i in range(length):
            if x + 100 < location[i][0] or y + 60 < location[i][1]:
                checklu[i] = 1
        if all(checklu):
            textloc = (100, 60) #upper left
        else:
            textloc = (img.width-x-50, 30) #upper right
        
    pos = np.array(textloc)
    #insert the text
    bw = 1
    draw.text(pos-(-bw, -bw), message, font=font, fill='black')
    draw.text(pos-(-bw, +bw), message, font=font, fill='black')
    draw.text(pos-(+bw, -bw), message, font=font, fill='black')
    draw.text(pos-(+bw, +bw), message, font=font, fill='black')
    draw.text(pos, message, font=font, fill=(0, 0, 255, 0))
    img = np.array(img) # convert PIL into cv2(NumPy)
    cv2.imwrite("result/recommend_" + emotion + "/000" + str(j) + ".jpg", img)

In [None]:
data_dir = "processing/step3/search_data/"
os.makedirs(data_dir, exist_ok=True)

ssl._create_default_https_context = ssl._create_unverified_context

def image(search_word, num):
    src_list = []
    Res = requests.get("https://www.google.com/search?hl=jp&q=" + search_word + "&btnG=Google+Search&tbs=0&safe=off&tbm=isch")
    Html = Res.text
    Soup = bs4.BeautifulSoup(Html,'lxml')
    links = Soup.find_all("img")            
    
    i = 0
    cnt = 0
    while cnt < num:
        src = links[i].get("src")
        if src[len(src)-3:] == "gif":
            i += 1
            continue
        else:
            src_list.append(src)
            i += 1
            cnt += 1
    return src_list

def download_img(url, file_name):
    r = requests.get(url, stream=True)
    if r.status_code == 200:
        with open(file_name +".jpg", 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
            
num = 5
srcs = image(search_word, num)
for i in range(num):
    file_name = data_dir + str(i)
    download_img(srcs[i], file_name)
print("Image-Search Finished.")

In [None]:
# calculation of the similarity
def average_hash(target_file, size):
    img = Image.open(target_file)
    img = img.convert('RGB')
    img = img.resize((size, size), Image.ANTIALIAS)
    px = np.array(img.getdata()).reshape((size, size, 3))
    avg = px.mean()
    px = 1 * (px > avg)
    return px

def hamming_dist(a, b):    
    a = a.reshape(1, -1)
    b = b.reshape(1, -1)
    dist = (a != b).sum()
    return dist

size = 64
images = glob.glob(os.path.join(data_dir, "*.jpg"))
rate = 20.0
result = []
diffmin = 10000

for j, targetf in enumerate(images):
    target_dist = average_hash(targetf, size)
    for i, fname in enumerate(images, j+3):
        dist = average_hash(fname, size)
        diff = hamming_dist(target_dist, dist)/256
        result.append([diff, targetf, fname])
        if diff < diffmin and diff != 0:
            diffmin = diff
            choice1 = targetf
            choice2 = fname

print("Successful Image Selected.")

In [None]:
# Inserting a specified image on an image in OpenCV format
class CvOverlayImage(object):

    def __init__(self):
        pass

    @classmethod
    def overlay(
            cls,
            cv_background_image,
            cv_overlay_image,
            point,
    ):
        
        overlay_height, overlay_width = cv_overlay_image.shape[:2]

        # background image
        cv_rgb_bg_image = cv2.cvtColor(cv_background_image, cv2.COLOR_BGR2RGB)
        pil_rgb_bg_image = Image.fromarray(cv_rgb_bg_image)
        pil_rgba_bg_image = pil_rgb_bg_image.convert('RGBA')
        # foreground image
        cv_rgb_ol_image = cv2.cvtColor(cv_overlay_image, cv2.COLOR_BGRA2RGBA)
        pil_rgb_ol_image = Image.fromarray(cv_rgb_ol_image)
        pil_rgba_ol_image = pil_rgb_ol_image.convert('RGBA')

        pil_rgba_bg_temp = Image.new('RGBA', pil_rgba_bg_image.size,
                                     (255, 255, 255, 0))
        pil_rgba_bg_temp.paste(pil_rgba_ol_image, point, pil_rgba_ol_image)
        result_image = \
            Image.alpha_composite(pil_rgba_bg_image, pil_rgba_bg_temp)


        cv_bgr_result_image = cv2.cvtColor(
            np.asarray(result_image), cv2.COLOR_RGBA2BGRA)

        return cv_bgr_result_image

In [None]:
response = requests.post(
    'https://api.remove.bg/v1.0/removebg',
    files={'image_file': open(choice2, 'rb')},
    data={'size': 'auto'},
    headers={'X-Api-Key': removebg_key},
)
if response.status_code == requests.codes.ok:
    with open("processing/step3/paste_img.png", 'wb') as out:
        out.write(response.content)
    print("Successful Cutout")
else:
    print("Error:", response.status_code, response.text)

In [None]:
cutimage = Image.open("processing/step3/paste_img.png")

crop = cutimage.split()[-1].getbbox()
newimage = cutimage.crop(crop)
newimage.save("processing/step3/paste_img.png", quality=95)

In [None]:
# paste the foreground image
def img_paste(background, location): 
    foreground = cv2.imread("processing/step3/paste_img.png", cv2.IMREAD_UNCHANGED)
    original_h, original_w = foreground.shape[:2]
    if original_h/original_w>1.5:
        foreground = cv2.resize(foreground, (int(background.shape[0]*0.7*foreground.shape[1]/foreground.shape[0]),int(background.shape[0]*0.7)))
    elif original_h/original_w>1:
        foreground = cv2.resize(foreground, (int(background.shape[0]*0.55*foreground.shape[1]/foreground.shape[0]),int(background.shape[0]*0.55)))
    else:
        foreground = cv2.resize(foreground, (int(background.shape[0]*0.4*foreground.shape[1]/foreground.shape[0]),int(background.shape[0]*0.4)))
    fore_h, fore_w = foreground.shape[:2]
    length = len(location)
    checkl = np.zeros(length)

    for i in range(length):
        if fore_w + 100 < location[i][0]:
            checkl[i] = 1
    if all(checkl):
        point = (100, int((background.shape[0] - fore_h)/2)) #left
    else:
        point = (background.shape[1]-fore_w-100, int((background.shape[0] - fore_h)/2)) #right
    image = CvOverlayImage.overlay(background, foreground,point)
    return image

# Step 4: Text Insertion

The thumbnails which are recommended to users are saved in the following folder:

- **happiness**: the "result/recommend_happiness" folder

- **surprise**: the "result/recommend_surprise" folder

In [None]:
# summarize the title
def summary():
    headers = {"x-api-key": tsuna_key}
    url = "https://clapi.asahi.com/headline-generation"
    query = {"text" : summary_text, "types" : "paper", "length" : "8", "n_head": 5}
    r = requests.post(url, headers=headers, data=query)
    data = json.loads(r.text)

    return data["headline"]

In [None]:
title_data = summary()

In [None]:
#original image
happiness = 'processing/step2/happiness.jpg'
surprise = 'processing/step2/surprise.jpg'

face_cascade = cv2.CascadeClassifier('system/haarcascade_frontalface_default.xml')

# happiness
happiness_img = cv2.imread(happiness, cv2.IMREAD_UNCHANGED)
location_happiness = detect_face(happiness_img)
os.makedirs("result/recommend_happiness", exist_ok=True)
h_image = img_paste(happiness_img, location_happiness)
for i in range(0, 5):
    img_add_msg(h_image, title_data[i], location_happiness, "happiness", i)
    
#surprise
surprise_img = cv2.imread(surprise, cv2.IMREAD_UNCHANGED)
location_surprise = detect_face(surprise_img)
os.makedirs("result/recommend_surprise", exist_ok=True)
s_image = img_paste(surprise_img, location_surprise)
for i in range(0, 5):
    img_add_msg(s_image, title_data[i], location_surprise, "surprise", i)
    
print("Output Finished.")