In [None]:
!pip install mediapipe
!pip install deepface
!pip install tqdm
!pip install opencv-python

[31mERROR: Could not find a version that satisfies the requirement openpose (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for openpose[0m[31m
[0m

In [None]:
import cv2
import mediapipe as mp
from deepface import DeepFace
from collections import Counter
from tqdm import tqdm
import numpy as np

def main():
    video_path = "/content/Unlocking Facial Recognition_ Diverse Activities Analysis.mp4"
    output_video_path = "video_tech_challenge_modified.mp4"
    output_summary_path = "video_summary.txt"

    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    mp_face_detection = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.5)
    mp_pose = mp.solutions.pose.Pose()

    activity_summary = []
    emotions_summary = []
    anomalies_count = 0

    with tqdm(total=frame_count, desc="Processando vídeo") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Process all frames for emotions
            frame, detected_emotions = detect_and_mark_faces_and_emotions(frame, mp_face_detection)
            if detected_emotions:
                emotions_summary.extend(detected_emotions)

            # Process all frames for activities
            activity, _ = detect_activity(frame, mp_pose)
            if activity:
                activity_summary.append(activity)

                # Display detected activity on video
                cv2.putText(frame, f"Atividade: {activity}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)

            # Write processed frame to output
            out.write(frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # Generate activity and emotions summary
    generate_summary(activity_summary, emotions_summary, output_summary_path, frame_count, anomalies_count)
    print(f"Vídeo processado e salvo como {output_video_path}")
    print(f"Resumo salvo em {output_summary_path}")


def detect_and_mark_faces_and_emotions(frame, mp_face_detection):
    detected_emotions = []

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = mp_face_detection.process(frame_rgb)

    if results.detections:
        for detection in results.detections:
            bboxC = detection.location_data.relative_bounding_box
            h, w, _ = frame.shape
            left, top, width, height = int(bboxC.xmin * w), int(bboxC.ymin * h), int(bboxC.width * w), int(bboxC.height * h)
            right, bottom = left + width, top + height

            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)

            face_frame = frame[top:bottom, left:right]

            try:
                result = DeepFace.analyze(face_frame, actions=['emotion'], enforce_detection=False)
                if result:
                    emotion = result[0]['dominant_emotion']
                    detected_emotions.append(emotion)
                    cv2.putText(frame, emotion, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
            except Exception as e:
                pass

    return frame, detected_emotions


def detect_activity(frame, mp_pose):
    """
    Identify posture (standing, sitting, lying) and detect dancing.
    """
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pose_results = mp_pose.process(frame_rgb)

    if pose_results.pose_landmarks:
        landmarks = pose_results.pose_landmarks.landmark
        h, w = frame.shape[:2]

        # Helper function to get (x, y) coordinates
        def get_coords(landmark):
            return int(landmark.x * w), int(landmark.y * h)

        # Extract relevant landmarks
        left_hip = landmarks[mp.solutions.pose.PoseLandmark.LEFT_HIP]
        right_hip = landmarks[mp.solutions.pose.PoseLandmark.RIGHT_HIP]
        left_shoulder = landmarks[mp.solutions.pose.PoseLandmark.LEFT_SHOULDER]
        right_shoulder = landmarks[mp.solutions.pose.PoseLandmark.RIGHT_SHOULDER]
        left_knee = landmarks[mp.solutions.pose.PoseLandmark.LEFT_KNEE]
        right_knee = landmarks[mp.solutions.pose.PoseLandmark.RIGHT_KNEE]
        left_wrist = landmarks[mp.solutions.pose.PoseLandmark.LEFT_WRIST]
        right_wrist = landmarks[mp.solutions.pose.PoseLandmark.RIGHT_WRIST]

        # Posture detection
        # Calculate angles or distances
        hips_y = (left_hip.y + right_hip.y) / 2
        shoulders_y = (left_shoulder.y + right_shoulder.y) / 2
        knees_y = (left_knee.y + right_knee.y) / 2

        if abs(hips_y - shoulders_y) < 0.1:  # Shoulders and hips aligned horizontally
            posture = "Deitada"
        elif abs(hips_y - knees_y) < 0.2:  # Hips and knees close in height
            posture = "Sentada"
        else:
            posture = "Em pe"

        # Dance detection (hand movement above shoulders)
        left_hand_up = left_wrist.y < left_shoulder.y
        right_hand_up = right_wrist.y < right_shoulder.y

        if left_hand_up or right_hand_up:
            dancing = "Dancando"
        else:
            dancing = "Nao dancando"

        # Draw landmarks only when hands are visible
        if left_wrist.visibility > 0.5 or right_wrist.visibility > 0.5:
            mp.solutions.drawing_utils.draw_landmarks(
                frame,
                pose_results.pose_landmarks,
                mp.solutions.pose.POSE_CONNECTIONS,
                mp.solutions.drawing_utils.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                mp.solutions.drawing_utils.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)
            )

        # Combine results
        return f"Postura: {posture}, {dancing}", None

    return "Nao detectado", None


def generate_summary(activities, emotions, output_path, frame_count, anomalies_count):
    activity_counter = Counter(activities)
    emotion_counter = Counter(emotions)

    with open(output_path, "w") as f:
        f.write("Resumo do vídeo:\n\n")
        f.write(f"Total de frames analisados: {frame_count}\n")
        f.write(f"Número de anomalias detectadas: {anomalies_count}\n\n")

        f.write("Atividades detectadas:\n")
        for activity, count in activity_counter.items():
            f.write(f"- {activity}: detectado {count} vezes\n")

        f.write("\nEmoções predominantes:\n")
        for emotion, count in emotion_counter.items():
            f.write(f"- {emotion}: detectado {count} vezes\n")

        f.write("\nAnálise geral:\n")
        if activity_counter:
            most_common_activity = activity_counter.most_common(1)[0]
            f.write(f"A atividade mais frequente no vídeo foi '{most_common_activity[0]}', "
                    f"ocorrendo aproximadamente {most_common_activity[1]} vezes.\n")

        if emotion_counter:
            most_common_emotion = emotion_counter.most_common(1)[0]
            f.write(f"A emoção predominante foi '{most_common_emotion[0]}', "
                    f"aparecendo em aproximadamente {most_common_emotion[1]} quadros.\n")

    print(f"Resumo salvo em {output_path}")

if __name__ == "__main__":
    main()


Processando vídeo: 100%|██████████| 3326/3326 [09:03<00:00,  6.12it/s]

Resumo salvo em video_summary.txt
Vídeo processado e salvo como video_tech_challenge_modified.mp4
Resumo salvo em video_summary.txt



