In [1]:
import cv2  # openCV 4.5.1
import numpy as np
from numpy import prod
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import time
from skimage.io import imread
from skimage.transform import resize
from PIL import Image, ImageFont, ImageDraw  # add caption by using custom font

from collections import deque

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "2"  # Set the GPU 2 to use

NMS_THRESHOLD=0.3
MIN_CONFIDENCE=0.2

In [2]:
def pedestrian_detection(image, model, layer_name, personidz=0):
	(H, W) = image.shape[:2]
	results = []


	blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
		swapRB=True, crop=False)
	model.setInput(blob)
	layerOutputs = model.forward(layer_name)

	boxes = []
	centroids = []
	confidences = []

	for output in layerOutputs:
		for detection in output:

			scores = detection[5:]
			classID = np.argmax(scores)
			confidence = scores[classID]

			if classID == personidz and confidence > MIN_CONFIDENCE:

				box = detection[0:4] * np.array([W, H, W, H])
				(centerX, centerY, width, height) = box.astype("int")

				x = int(centerX - (width / 2))
				y = int(centerY - (height / 2))

				boxes.append([x, y, int(width), int(height)])
				centroids.append((centerX, centerY))
				confidences.append(float(confidence))
	# apply non-maxima suppression to suppress weak, overlapping
	# bounding boxes
	idzs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONFIDENCE, NMS_THRESHOLD)
	# ensure at least one detection exists
	if len(idzs) > 0:
		# loop over the indexes we are keeping
		for i in idzs.flatten():
			# extract the bounding box coordinates
			(x, y) = (boxes[i][0], boxes[i][1])
			(w, h) = (boxes[i][2], boxes[i][3])
			# update our results list to consist of the person
			# prediction probability, bounding box coordinates,
			# and the centroid
			res = (confidences[i], (x, y, x + w, y + h), centroids[i])
			results.append(res)
	# return the list of results
	return results

In [22]:
# bi

IMG_SIZE = 224
# preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
base = tf.keras.applications.MobileNetV3Small(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights='imagenet')
# base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2B0(input_shape=(IMG_SIZE, IMG_SIZE, 3), weights='imagenet', include_top=False)
base_model = tf.keras.Sequential([
    base,
    tf.keras.layers.Flatten()
])

base_model.trainable = False
# base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2M(input_shape = (IMG_SIZE, IMG_SIZE, 3), weights='imagenet', include_top=False)
# base_model=keras.applications.mobilenet.MobileNet(input_shape=(160, 160, 3), include_top=False, weights='imagenet', classes=2)


model = keras.models.load_model('../model/new_model.h5')
model.trainable = False

input_path = 'input_어린이.mp4'
output_path = 'output_어린이.mp4'


#detect
labelsPath = "coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

weights_path = "yolov4-tiny.weights"
config_path = "yolov4-tiny.cfg"

cv2_model = cv2.dnn.readNetFromDarknet(config_path, weights_path)
layer_name = cv2_model.getLayerNames()
layer_name = [layer_name[i - 1] for i in cv2_model.getUnconnectedOutLayers()]

vid = cv2.VideoCapture(input_path)
fps = vid.get(cv2.CAP_PROP_FPS)  # recognize frames per secone(fps) of input_path video file.
print(f'fps : {fps}')  # print fps.
fps = 30
writer = None
(W, H) = (None, None)
i = 0  # number of seconds in video = The number of times that how many operated while loop .
Q = deque(maxlen=128)

video_frm_ar = np.zeros((1, int(fps), IMG_SIZE, IMG_SIZE), dtype=np.float64)  # frames
frame_counter = 0  # frame number in 1 second. 1~30
frame_list = []
preds = None
maxprob = None

# . While loop : Until the end of input video, it read frame, extract features, predict violence True or False.
# ----- Reshape & Save frame img as (30, 160, 160, 3) Numpy array  -----
grabbed, frm = vid.read() 

if W is None or H is None:  # W: width, H: height of frame img
    (H, W) = frm.shape[:2]
frame = cv2.resize(frm, (IMG_SIZE, IMG_SIZE))
pre_frm = frame.copy()
#pre detection mask
results = pedestrian_detection(pre_frm, cv2_model, layer_name,personidz=LABELS.index("person"))
mask = np.zeros(frame.shape[:2], np.uint8)
for res in results:
    polygon = np.array([[res[1][0]-5,res[1][1]-5],[res[1][2]+5,res[1][1]-5],[res[1][2]+5,res[1][3]+5],[res[1][0]-5,res[1][3]+5]])
    cv2.fillPoly(mask,[polygon],1)
pre_mask = mask.astype(bool)

while True:
    frame_counter += 1
    grabbed, frm = vid.read()  # read each frame img. grabbed=True, frm=frm img. ex: (240, 320, 3)

    if not grabbed:
        print('There is no frame. Streaming ends.')
        break

    if W is None or H is None:  # W: width, H: height of frame img
        (H, W) = frm.shape[:2]
    
    output = frm.copy()  # It is necessary for streaming captioned output video, and to save that.
    
    frame = cv2.resize(frm, (IMG_SIZE, IMG_SIZE))  # > Resize frame img array to (160, 160, 3)
    
    #detection mask
    
    results = pedestrian_detection(frame, cv2_model, layer_name,personidz=LABELS.index("person"))
    mask = np.zeros(frame.shape[:2], np.uint8)
    for res in results:
        polygon = np.array([[res[1][0]-10,res[1][1]-10],[res[1][2]+10,res[1][1]-10],[res[1][2]+10,res[1][3]+10],[res[1][0]-10,res[1][3]+10]])
        cv2.fillPoly(mask,[polygon],1)
    cur_mask = mask.astype(bool)
    
    # optical flow
    hsv = np.zeros_like(pre_frm)
    hsv[...,1] = 255
    hsv = np.array(hsv, dtype=np.float32)
    pre_frm = np.array(pre_frm, dtype=np.float32)
    cur_frm = np.array(frame, dtype=np.float32)
    pre_frm = cv2.cvtColor(pre_frm, cv2.COLOR_BGR2GRAY)
    cur_frm = cv2.cvtColor(cur_frm, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(pre_frm, cur_frm, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
    hsv[...,0] = ang*180/np.pi/2
    hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX)
    rgb = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
    #mask
    rgb = rgb * (cur_mask[:, :, np.newaxis] + pre_mask[:, :, np.newaxis])
    
    frame_list.append(rgb)  # Append each frame img Numpy array : element is (160, 160, 3) Numpy array.
    pre_frm = frame.copy()
    pre_mask = cur_mask.copy()
    
    if frame_counter >= fps:  # fps=30 et al
        # . ----- we'll predict violence True or False every 30 frame -----
        # . ----- Insert (1, 30, 160, 160, 3) Numpy array to LSTM model ---
        # . ----- We'll renew predict result caption on output video every 1 second. -----
        # 30-element-appended list -> Transform to Numpy array -> Predict -> Initialize list (repeat)
        frame_ar = np.array(frame_list, dtype=np.float16)  # > (30, 160, 160, 3)
        frame_list = []  # Initialize frame list when frame_counter is same or exceed 30, after transforming to Numpy array.

        if (np.max(frame_ar) > 1):
            frame_ar = frame_ar / 255.0  # Scaling RGB value in Numpy array

        pred_imgarr = base_model.predict(frame_ar)  # > Extract features from each frame img by using MobileNet. (30, 5, 5, 1024)
        pred_imgarr_dim = pred_imgarr.reshape(1, pred_imgarr.shape[0], 7 * 7 * 576)  # > (1, 30, 25600)

        preds = model.predict(pred_imgarr_dim)  # > (True, 0.99) : (Violence True or False, Probability of Violence)
        print(f'preds:{preds}')
#         Q.append(preds)

#         # Predict Result : Average of Violence probability in last 5 second
#         if i < 5:
#             results = np.array(Q)[:i].mean(axis=0)
#         else:
#             results = np.array(Q)[(i - 5):i].mean(axis=0)

#         print(f'Results = {results}')  # > ex : (0.6, 0.650)
#         maxprob=np.max(results) #> Select Maximum Probability

        maxprob=np.max(preds) #> Select Maximum Probability
        print(f'Maximum Probability : {maxprob}')
        print('')
            
#         rest=1-maxprob # Probability of Non-Violence
#         diff=maxprob-rest # Difference between Probability of Violence and Non-Violence's
        th= 0.65
            
#         if diff>0.60: 
#             th=diff # ?? What is supporting basis?
        
        frame_counter=0 #> Initialize frame_counter to 0
        i+=1 #> 1 second elapsed

        # When frame_counter>=30, Initialize frame_counter to 0, and repeat above while loop.

    # ----- Setting caption option of output video -----
    # Renewed caption is added every 30 frames(if fps=30, it means 1 second.)
    font_size = 160
    font = ImageFont.truetype("DejaVuSans.ttf", font_size)
    if preds is not None and maxprob is not None:
        if (preds[0][0])<th : #> if violence probability < th, Violence=False (Normal, Green Caption)
            text1_1='Normal'
            text1_2='{:.2f}%'.format(preds[0][1]*100)
            img_pil=Image.fromarray(output)
            draw=ImageDraw.Draw(img_pil)
            draw.text((int(0.025*W), int(0.025*H)), text1_1, font=font, fill=(0, 255, 0, 0))
            draw.text((int(0.025*W), int(0.105*H)), text1_2, font=font, fill=(0, 255, 0, 0))
            output=np.array(img_pil)
                
        else : #> if violence probability > th, Violence=True (Violence Alert!, Red Caption)
            text2_1='Abuse Detected'
            text2_2='{:.2f}%'.format(maxprob*100)
            img_pil=Image.fromarray(output)
            draw=ImageDraw.Draw(img_pil)
            draw.text((int(0.025*W), int(0.025*H)), text2_1, font=font, fill=(0, 0, 255, 0))
            draw.text((int(0.025*W), int(0.105*H)), text2_2, font=font, fill=(0, 0, 255, 0))
            output=np.array(img_pil)

    # Save captioned video file by using 'writer'
    if writer is None:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(output_path, fourcc, 30, (W, H), True)

    # cv2.imshow('This is output', output)  # View output in new Window.
    writer.write(output)  # Save output in output_path
    
    # key = cv2.waitKey(round(1000 / 30))  # time gap of frame and next frame
    # if key == 27:  # If you press ESC key, While loop will be breaked and output file will be saved.
    #    print('ESC is pressed. Video recording ends.')
    #    break

print('Video recording ends. Release Memory.')  # Output file will be saved.
writer.release()
vid.release()
cv2.destroyAllWindows()

fps : 15.0
preds:[[0.43992567 0.56874067]]
Maximum Probability : 0.5687406659126282

preds:[[0.21950346 0.7645569 ]]
Maximum Probability : 0.764556884765625

preds:[[0.2803715  0.67306995]]
Maximum Probability : 0.673069953918457

preds:[[0.557877  0.4616517]]
Maximum Probability : 0.5578770041465759

preds:[[0.64704925 0.35387042]]
Maximum Probability : 0.6470492482185364

preds:[[0.7067796 0.3146906]]
Maximum Probability : 0.7067795991897583

preds:[[0.55726624 0.42058933]]
Maximum Probability : 0.5572662353515625

preds:[[0.33377582 0.6308277 ]]
Maximum Probability : 0.6308277249336243

preds:[[0.4149173 0.5711393]]
Maximum Probability : 0.5711392760276794

preds:[[0.71442074 0.30978835]]
Maximum Probability : 0.714420735836029

preds:[[0.7104612 0.353718 ]]
Maximum Probability : 0.7104611992835999

preds:[[0.42370555 0.55689985]]
Maximum Probability : 0.5568998456001282

preds:[[0.65366554 0.31376737]]
Maximum Probability : 0.6536655426025391

preds:[[0.5390591  0.47832018]]
Maximu

In [2]:
import requests
import pandas as pd
import numpy as np
import folium
from folium.plugins import MiniMap

In [3]:
def elec_location(region,page_num):
    url = 'https://dapi.kakao.com/v2/local/search/keyword.json'
    params = {'query': region,'page': page_num}
    headers = {"Authorization": "KakaoAK 45ea69e7bbfdab6fde7b9cc67c6656c9"}

    places = requests.get(url, params=params, headers=headers).json()['documents']
    total = requests.get(url, params=params, headers=headers).json()['meta']['total_count']
    if total > 45:
        print(total,'개 중 45개 데이터밖에 가져오지 못했습니다!')
    else :
        print('모든 데이터를 가져왔습니다!')
    return places

In [4]:
def elec_info(places):
    X = []
    Y = []
    stores = []
    road_address = []
    place_url = []
    ID = []
    for place in places:
        X.append(float(place['x']))
        Y.append(float(place['y']))
        stores.append(place['place_name'])
        road_address.append(place['road_address_name'])
        place_url.append(place['place_url'])
        ID.append(place['id'])

    ar = np.array([ID,stores, X, Y, road_address,place_url]).T
    df = pd.DataFrame(ar, columns = ['ID','stores', 'X', 'Y','road_address','place_url'])
    return df

In [5]:
def keywords(location_name):
    df = None
    for loca in location:
        for page in range(1,4):
            local_name = elec_location(loca, page)
            local_elec_info = elec_info(local_name)

            if df is None:
                df = local_elec_info
            elif local_elec_info is None:
                continue
            else:
                df = pd.concat([df, local_elec_info],join='outer', ignore_index = True)
    return df

In [6]:
def make_map(dfs, x, y):
    # 지도 생성하기
    m = folium.Map(location=[float(y), float(x)],
                   zoom_start=12)

    # 미니맵 추가하기
    minimap = MiniMap() 
    m.add_child(minimap)

    # 마커 추가하기
    for i in range(len(dfs)):
        folium.Marker([df['Y'][i],df['X'][i]],
                  tooltip=dfs['stores'][i],
                  popup=dfs['place_url'][i],
                  ).add_to(m)
    return m

In [7]:
location = ['금오공과대학교 어린이집']
df = keywords(location)
df = df.drop_duplicates(['ID'])
df = df.reset_index()

make_map(df, df["X"][0], df["Y"][0])

모든 데이터를 가져왔습니다!
모든 데이터를 가져왔습니다!
모든 데이터를 가져왔습니다!
