In [1]:
%pip install mediapipe opencv-python

Collecting mediapipe
  Downloading mediapipe-0.10.11-cp310-cp310-win_amd64.whl.metadata (9.8 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.4.6-py3-none-win_amd64.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.11-cp310-cp310-win_amd64.whl (50.8 MB)
   ---------------------------------------- 0.0/50.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/50.8 MB 1.3 MB/s eta 0:00:40
   ---------------------------------------- 0.1/50.8 MB 1.2 MB/s eta 0:00:43
   ---------------------------------------- 0.3/50.8 MB 2.0 MB/s eta 0:00:26
    --------------------------------------- 0.7/50.8 MB 3.4 MB/s eta 0:00:15
   - -------------------------------------- 1.4/50.8 MB 5.9 MB/s eta 0:00:09
   -- ------------------------------------- 2.8/50.8 MB 10.0 MB/s eta 0:00:05
   ---- ----------------------------------- 5.6/50.8 MB 17.1 MB/s eta 0:00:03
   ------- -------------------------------- 9.8/50.8 MB 26.1 MB/s eta 0:00:02
   ----------- -------------



In [1]:
import os
import glob
import cv2 
import pandas as pd
import numpy as np
import mediapipe as mp

In [None]:
# mutiple videos only with lip region (no crop or the bg video) (ex1)
def video_from_dataframe(output_path, df, width, height, fps, fourcc):
    frame_count = int(df.shape[0] / 40)
    reshaped_data = df[['frame', 'x', 'y', 'z', 'visibility']].values.reshape((frame_count, 40, 5))
    
    out_video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    for frame_data in reshaped_data:
        frame = np.zeros((height, width, 3), dtype=np.uint8) 
        for keypoint in frame_data:
            x, y = int(keypoint[1]), int(keypoint[2])
            cv2.circle(frame, (x, y), 1, (100, 100, 0), -1)  
        out_video.write(frame) 

    out_video.release()

os.makedirs('processed_videos', exist_ok=True)

face_mesh = mp.solutions.face_mesh.FaceMesh()

lipsUpperOuter = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
lipsLowerOuter = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lipsUpperInner = [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
lipsLowerInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips = list(set(lipsUpperOuter + lipsLowerOuter + lipsUpperInner + lipsLowerInner))

videos = glob.glob('data/s1/*.mpg')

for video_path in videos:
    data_list = []
    cap = cv2.VideoCapture(video_path)

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    output_path = os.path.join('processed_videos', os.path.basename(video_path))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    while True:    
        check, test_image = cap.read()
        if check is not True:
            break

        height, width, _ = test_image.shape 

        rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

        results = face_mesh.process(rgb_image)
        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                    for i in lips:
                        pt = face_landmarks.landmark[i]
                        x = int(pt.x * width)
                        y = int(pt.y * height)
                        cv2.circle(test_image, (x, y), 1, (100, 100, 0), -1)
                        data_list.append({'frame': int(cap.get(cv2.CAP_PROP_POS_FRAMES)), 'x': x, 'y': y, 'z': float(pt.z), 'visibility': int(pt.visibility)})

        df = pd.DataFrame(data_list)
        
    video_from_dataframe(output_path, df, width, height, fps, fourcc)

    cap.release()

cv2.destroyAllWindows()

In [20]:
# single lip only no crop (ex1-2)
def video_from_dataframe(df, width, height, fps, fourcc):
    frame_count = int(df.shape[0] / 40)
    reshaped_data = df[['frame', 'x', 'y', 'z', 'visibility']].values.reshape((frame_count, 40, 5))
    
    out_video = cv2.VideoWriter("bbizzn_lip_no_crop.mp4", fourcc, fps, (width, height))

    for frame_data in reshaped_data:
        frame = np.zeros((height, width, 3), dtype=np.uint8) 
        for keypoint in frame_data:
            x, y = int(keypoint[1]), int(keypoint[2])
            cv2.circle(frame, (x, y), 1, (100, 100, 0), -1)  
        out_video.write(frame) 

    out_video.release()

def video_from_dataframe_lip(df, fps, fourcc):
    frame_count = int(df.shape[0] / 40)
    reshaped_data = df[['frame', 'x', 'y', 'z', 'visibility']].values.reshape((frame_count, 40, 5))

    out_video = cv2.VideoWriter('bbizzn_lip_cropped.mp4', fourcc, fps, (140, 46))

    for frame_data in reshaped_data:
        # boundary check
        frame = np.zeros((46, 140, 3), dtype=np.uint8) 
        for keypoint in frame_data:
            x, y = int(keypoint[1]) - 80, int(keypoint[2]) - 190  
            if 0 <= x < 140 and 0 <= y < 46: 
                cv2.circle(frame, (x, y), 1, (100, 100, 0), -1)
        out_video.write(frame)

    out_video.release()

cap = cv2.VideoCapture("data/s1/bbizzn.mpg")
face_mesh = mp.solutions.face_mesh.FaceMesh()

lipsUpperOuter = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
lipsLowerOuter = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lipsUpperInner = [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
lipsLowerInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips = list(set(lipsUpperOuter + lipsLowerOuter + lipsUpperInner + lipsLowerInner))

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
data_list = []

while True:    
    check, test_image = cap.read()
    if check is not True:
        break
    
    height, width, _ = test_image.shape 

    rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

    results = face_mesh.process(rgb_image)
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for i in lips:
                pt = face_landmarks.landmark[i]
                x = int(pt.x * width)
                y = int(pt.y * height)
                data_list.append({'frame': int(cap.get(cv2.CAP_PROP_POS_FRAMES)), 'x': x, 'y': y, 'z': float(pt.z), 'visibility': int(pt.visibility)})
                cv2.circle(test_image, (x, y), 1, (100, 100, 0), -1)

    df = pd.DataFrame(data_list)
    
video_from_dataframe(df, width, height, fps, fourcc)
video_from_dataframe_lip(df, fps, fourcc)
cap.release()
cv2.destroyAllWindows()



In [7]:
# full vid (ex3)
import cv2
import mediapipe as mp

cap = cv2.VideoCapture("data/s1/bbizzn.mpg")
face_mesh = mp.solutions.face_mesh.FaceMesh()

lipsUpperOuter = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
lipsLowerOuter = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lipsUpperInner = [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
lipsLowerInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips = list(set(lipsUpperOuter + lipsLowerOuter + lipsUpperInner + lipsLowerInner))

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

out_video = cv2.VideoWriter('bbizzn_full_vid_with_lip.mp4', fourcc, fps, (140, 46))

while True:    
    check, test_image = cap.read()
    if check is not True:
        break
    
    height, width, _ = test_image.shape 

    rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

    results = face_mesh.process(rgb_image)
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for i in lips:
                pt = face_landmarks.landmark[i]
                x = int(pt.x * width)
                y = int(pt.y * height)
                cv2.circle(test_image, (x, y), 1, (100, 100, 0), -1)

    out_video.write(cv2.resize(test_image, (140, 46)))

out_video.release()
cap.release()
cv2.destroyAllWindows()


In [31]:
df.shape[0] / 40

63.0

In [None]:
# prev

cap = cv2.VideoCapture("Download.mp4")
face_mesh = mp.solutions.face_mesh.FaceMesh()

lipsUpperOuter = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
lipsLowerOuter = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lipsUpperInner = [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
lipsLowerInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips = list(set(lipsUpperOuter + lipsLowerOuter + lipsUpperInner + lipsLowerInner))

while True:    
    check, test_image = cap.read()
    if check is not True:
        break
    
    height, width, _ = test_image.shape 

    rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

    results = face_mesh.process(rgb_image)
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
                for i in lips:
                    pt = face_landmarks.landmark[i]
                    x = int(pt.x * width)
                    y = int(pt.y * height)
                    cv2.circle(test_image, (x, y), 1, (100, 100, 0), -1)

    cv2.imshow("Result",test_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

