In [1]:
print("hello world")

hello world


In [2]:
import requests

url = "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite"
output_path = "detector.tflite"

response = requests.get(url)
if response.status_code == 200:
    with open(output_path, "wb") as f:
        f.write(response.content)
    print(f"Downloaded to {output_path}")
else:
    print(f"Failed to download. HTTP Status Code: {response.status_code}")



Downloaded to detector.tflite


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import json
import time
from deepface import DeepFace
from yt_dlp import YoutubeDL
from scipy.spatial.distance import euclidean

# Initialize MediaPipe solutions
mp_face_detection = mp.solutions.face_detection
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
pose = mp_pose.Pose(min_detection_confidence=0.5)

# Profiles for tracking people
profiles = {}
max_inactive_frames = 30  # Maximum frames to keep a profile if not detected

# Function to assign or update ID based on face embedding and spatial proximity
def assign_id(embedding, bbox, profiles, threshold=0.6, spatial_threshold=50):
    best_match_id = None
    best_similarity = float("inf")
    best_distance = float("inf")

    bbox_center = ((bbox[0] + bbox[2] / 2), (bbox[1] + bbox[3] / 2))

    for profile_id, profile in profiles.items():
        # Compare embedding similarity
        similarity = np.linalg.norm(np.array(profile['face_embedding']) - np.array(embedding))
        # Compare spatial proximity
        profile_bbox = profile["bbox"]
        profile_center = ((profile_bbox[0] + profile_bbox[2] / 2), (profile_bbox[1] + profile_bbox[3] / 2))
        spatial_distance = euclidean(profile_center, bbox_center)

        # Find the best match
        if similarity < threshold and spatial_distance < spatial_threshold:
            if similarity < best_similarity or (similarity == best_similarity and spatial_distance < best_distance):
                best_match_id = profile_id
                best_similarity = similarity
                best_distance = spatial_distance

    if best_match_id is not None:
        # Update the matched profile
        profiles[best_match_id]["bbox"] = bbox
        profiles[best_match_id]["last_seen"] = frame_count
        return best_match_id
    else:
        # Create a new profile
        new_id = len(profiles) + 1
        profiles[new_id] = {"face_embedding": embedding, "actions": [], "gaze": {}, "pose": {}, "bbox": bbox, "last_seen": frame_count}
        return new_id

# Function to fetch video URL using yt-dlp
def get_video_url(youtube_url):
    ydl_opts = {'quiet': True, 'format': 'best[ext=mp4]'}
    with YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=False)
        return info['url']

# Process a YouTube video
youtube_url = "https://www.youtube.com/watch?v=96Y6mc3C1Bg"  # Replace with your URL
video_url = get_video_url(youtube_url)
print("Video URL:", video_url)

cap = cv2.VideoCapture(video_url)

if not cap.isOpened():
    print("Error: Unable to open the YouTube video stream.")
else:
    print("Processing YouTube video. Press 'q' to quit.")
    frame_count = 0
    start_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            print("End of video or error reading the video stream.")
            break

        # Skip frames for efficiency
        if frame_count % 5 != 0:
            frame_count += 1
            continue

        # Validate frame dimensions
        if frame is None or frame.shape[0] == 0 or frame.shape[1] == 0:
            print(f"Skipped frame {frame_count}: invalid dimensions.")
            continue

        # Get frame dimensions
        ih, iw, _ = frame.shape

        # Convert frame to RGB for MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Face Detection
        face_results = face_detection.process(rgb_frame)
        detected_person_ids = []
        if face_results.detections:
            for detection in face_results.detections:
                bboxC = detection.location_data.relative_bounding_box
                bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
                
                # Extract face region
                x, y, w, h = bbox
                face_roi = rgb_frame[y:y+h, x:x+w]

                try:
                    # Generate face embedding
                    embedding = DeepFace.represent(face_roi, model_name="Facenet", enforce_detection=False)[0]['embedding']
                    person_id = assign_id(embedding, bbox, profiles)
                    detected_person_ids.append(person_id)

                    # Debugging: Log assigned person ID
                    print(f"Frame {frame_count}: Assigned Person ID {person_id}")
                except Exception as e:
                    print(f"Error processing face embedding in frame {frame_count}: {e}")
                    continue

        # Pose Detection
        pose_results = pose.process(rgb_frame)
        if pose_results.pose_landmarks:
            mp_drawing.draw_landmarks(frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            # Assign pose landmarks to profiles
            for lm in pose_results.pose_landmarks.landmark:
                pose_coords = (lm.x * iw, lm.y * ih)
                # Find the nearest face and associate
                for person_id in detected_person_ids:
                    profiles[person_id]["pose"] = {"landmarks": pose_coords}

        # Remove stale profiles
        profiles = {k: v for k, v in profiles.items() if frame_count - v["last_seen"] <= max_inactive_frames}

        # Debugging: Draw bounding boxes and IDs on the frame for visualization
        for person_id, profile in profiles.items():
            bbox = profile["bbox"]
            x, y, w, h = bbox
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, f"ID: {person_id}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        # Display frame
        cv2.imshow("MediaPipe YouTube Video Processing", frame)

        # Save profiles every 100 frames
        if frame_count % 100 == 0:
            with open("profiles_temp.json", "w") as f:
                json.dump(profiles, f, indent=4)

        # Exit on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("Exiting video processing.")
            break

        frame_count += 1

cap.release()
cv2.destroyAllWindows()

# Final Save: Save profiles to JSON
with open("profiles.json", "w") as f:
    json.dump(profiles, f, indent=4)
print(f"Profiles saved to profiles.json")

end_time = time.time()
print(f"Processed {frame_count} frames in {end_time - start_time:.2f} seconds.")


2024-12-18 23:33:07.257365: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
I0000 00:00:1734593598.761831 3453900 gl_context.cc:357] GL version: 2.1 (2.1 ATI-4.14.1), renderer: AMD Radeon Pro 555 OpenGL Engine
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1734593598.795860 3456626 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1734593598.825625 3453900 gl_context.cc:357] GL version: 2.1 (2.1 ATI-4.14.1), renderer: AMD Radeon Pro 555 OpenGL Engine
W0000 00:00:1734593599.282087 3456791 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Video URL: https://rr4---sn-jxopj-n5oe.googlevideo.com/videoplayback?expire=1734615201&ei=QcxjZ9a7MrC_sfIPmcW6mAg&ip=2607%3Af140%3A400%3A6b%3A8481%3Aaa15%3A119c%3Ac39f&id=o-AHMtIE2B7rt86xcjQJ74X5F3mHkVBszdTBqUfYH2_EoT&itag=18&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&met=1734593601%2C&mh=KN&mm=31%2C29&mn=sn-jxopj-n5oe%2Csn-n4v7snls&ms=au%2Crdu&mv=m&mvi=4&pl=32&rms=au%2Cau&initcwndbps=5768750&bui=AfMhrI9A5NVVLnMo-BCaK2c4dDcabu4rmNZfaRaFIqUwFRnHzBjf50n_2jAyGNV1apB6Srpu07tVYUmP&vprv=1&svpuc=1&mime=video%2Fmp4&ns=DDY6Y3snuKkYblvJbwsvXHIQ&rqh=1&cnr=14&ratebypass=yes&dur=5104.187&lmt=1729255083328319&mt=1734593377&fvip=1&fexp=51326932%2C51331020%2C51335594%2C51371293&c=MWEB&sefc=1&txp=5538434&n=2Xa3z-X7KImdWw&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cbui%2Cvprv%2Csvpuc%2Cmime%2Cns%2Crqh%2Ccnr%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRAIgaEgtD658MA7VNtYs6yY6FpRK8joQGSzUEnObec79hoACIGylz7ddkCa1ZlUHwXKmKU1AgP73gNdrFfge501UjpTB&lsparams=met%2Cmh%2Cmm%2Cmn%2Cms%2Cmv%

W0000 00:00:1734593605.258694 3456789 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Frame 40: Assigned Person ID 1
Frame 45: Assigned Person ID 2
Frame 65: Assigned Person ID 3
Frame 85: Assigned Person ID 2
Frame 90: Assigned Person ID 3
Frame 95: Assigned Person ID 3
Frame 100: Assigned Person ID 3
Frame 100: Assigned Person ID 3
Frame 105: Assigned Person ID 3
Frame 105: Assigned Person ID 3
Frame 110: Assigned Person ID 3
Frame 110: Assigned Person ID 3
Frame 115: Assigned Person ID 3
Frame 120: Assigned Person ID 3
Frame 125: Assigned Person ID 2
Frame 130: Assigned Person ID 3
Frame 135: Assigned Person ID 3
Frame 140: Assigned Person ID 3
Frame 185: Assigned Person ID 1
Frame 225: Assigned Person ID 1
Frame 230: Assigned Person ID 2
Frame 235: Assigned Person ID 3
Frame 240: Assigned Person ID 4
Frame 245: Assigned Person ID 5
Frame 250: Assigned Person ID 6
Frame 255: Assigned Person ID 7
Exiting video processing.
Profiles saved to profiles.json
Processed 255 frames in 21.69 seconds.


: 

In [None]:
#old code that just streams without mediapipe facial recognition
# Get the direct video URL
import yt_dlp
import cv2

def get_video_url(youtube_url):
    ydl_opts = {
        'quiet': True,
        'format': 'best[ext=mp4]'
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=False)
        return info['url']

# Get the direct video URL
youtube_url = "https://www.youtube.com/watch?v=m34ZKJNyxac"
video_url = get_video_url(youtube_url)
print("Video URL:", video_url)

# Open video stream with OpenCV
cap = cv2.VideoCapture(video_url)

if not cap.isOpened():
    print("Error: Unable to open video stream.")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        cv2.imshow("YouTube Video", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()
video_url = get_video_url(youtube_url)
print("Video URL:", video_url)

# Open video stream with OpenCV
cap = cv2.VideoCapture(video_url)

if not cap.isOpened():
    print("Error: Unable to open video stream.")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        cv2.imshow("YouTube Video", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


Video URL: https://rr2---sn-o097znsr.googlevideo.com/videoplayback?expire=1733935790&ei=Tm5ZZ8OCHvr-sfIP1OPhoAg&ip=2601%3A644%3A601%3Afa0%3A18f1%3A6d9c%3A9455%3Ae968&id=o-AM0VWy9id_RhIja_CuAX_7jCOsmuB3nF0M_O12ZHnnTH&itag=18&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&met=1733914190%2C&mh=Vq&mm=31%2C29&mn=sn-o097znsr%2Csn-n4v7sns7&ms=au%2Crdu&mv=m&mvi=2&pl=34&rms=au%2Cau&initcwndbps=3915000&bui=AQn3pFQKhi7MVAR6il70JWVEwJTRLPZJ7nzHT55pqdE6sdGYDy3fT88Js2ixpJEj8o83imVFwTnhby9Q&vprv=1&svpuc=1&mime=video%2Fmp4&ns=dgnEsjCi-frwcKg6mSObwzwQ&rqh=1&gir=yes&clen=36612911&ratebypass=yes&dur=398.593&lmt=1733879672190924&mt=1733913906&fvip=3&fexp=51326932%2C51331020%2C51335594%2C51347747&c=MWEB&sefc=1&txp=5538434&n=jbygYr9NxynRtw&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cbui%2Cvprv%2Csvpuc%2Cmime%2Cns%2Crqh%2Cgir%2Cclen%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRQIgR_sLg4GdoV2OPdKEWhxKh-0u7jr7-NyGlLR3-mI-5a4CIQCMCLTi1P8C5kClsDZoxqTwkw4zeYhfluOoJaAbPTohlA%3D%3D&lsparams=met%

KeyboardInterrupt: 

: 

In [None]:
#Todo, need to add cookies for age restricted content