# Task 1: Motion Estimation and Event Detection in a Video

In [4]:
from google.colab.patches import cv2_imshow
import cv2
import numpy as np

video_path = '/content/video1.webm'
output_path = 'output_video.mp4'

cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

ret, prev_frame = cap.read()
if not ret:
    print("Error: Could not read the first frame.")
    cap.release()
    exit()

prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

frame_number = 1
event_frames = []
motion_threshold = 0.02

while True:
    ret, frame = cap.read()
    if not ret:
        break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    frame_diff = cv2.absdiff(prev_gray, gray)
    _, thresh = cv2.threshold(frame_diff, 30, 255, cv2.THRESH_BINARY)
    motion_intensity = np.sum(thresh) / (thresh.shape[0] * thresh.shape[1] * 255)

    if motion_intensity > motion_threshold:
        timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
        cv2.putText(frame, f"Event Detected at {timestamp:.2f}s",
                    (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        event_frames.append((frame_number, timestamp))

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(frame, contours, -1, (0, 255, 0), 2)

    out.write(frame)



    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    prev_gray = gray.copy()
    frame_number += 1

cap.release()
out.release()
cv2.destroyAllWindows()

print("Detected Events:")
for frame_num, timestamp in event_frames:
    print(f"Event at frame {frame_num}, timestamp {timestamp:.2f}s")


Detected Events:


# Task 2: Estimating Sentiments of People in a Crowd – Gesture Analysis and Image Categorization

In [8]:
import cv2
import numpy as np

image_path = '/content/img4.webp'
output_image_path = 'annotated_groupofpeople.jpg'

img = cv2.imread(image_path)
if img is None:
    print(f"Error loading image {image_path}")
else:
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)
    skin_mask = cv2.inRange(hsv, lower_skin, upper_skin)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
    skin_mask = cv2.dilate(skin_mask, kernel, iterations=2)
    skin_mask = cv2.GaussianBlur(skin_mask, (3, 3), 0)

    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    face_regions = []
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area > 500:
            x, y, w, h = cv2.boundingRect(cnt)
            aspect_ratio = w / float(h)
            if 0.75 < aspect_ratio < 1.3:
                face_regions.append((x, y, w, h))

    sentiments = []
    annotated_img = img.copy()

    for (x, y, w, h) in face_regions:
        face_img = img[y:y+h, x:x+w]

        gray_face = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray_face, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        contours_feat, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        features = []
        for cnt_feat in contours_feat:
            area_feat = cv2.contourArea(cnt_feat)
            if area_feat > 50:
                fx, fy, fw, fh = cv2.boundingRect(cnt_feat)
                features.append((fx, fy, fw, fh))
                cv2.rectangle(face_img, (fx, fy), (fx+fw, fy+fh), (255, 0, 0), 1)

        face_height = face_img.shape[0]
        mouth_candidates = []
        for (fx, fy, fw, fh) in features:
            if fy > face_height / 2:
                mouth_candidates.append((fx, fy, fw, fh))
        if mouth_candidates:
            mouth = max(mouth_candidates, key=lambda rect: rect[2] * rect[3])
            mx, my, mw, mh = mouth
            mouth_region = face_img[my:my+mh, mx:mx+mw]
            gray_mouth = cv2.cvtColor(mouth_region, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray_mouth, 50, 150)
            top_half = edges[0:mh//2, :]
            bottom_half = edges[mh//2:mh, :]
            top_count = cv2.countNonZero(top_half)
            bottom_count = cv2.countNonZero(bottom_half)
            if bottom_count > top_count:
                sentiment = "Happy"
            elif top_count > bottom_count:
                sentiment = "Sad"
            else:
                sentiment = "Neutral"
        else:
            sentiment = "Neutral"

        sentiments.append(sentiment)
        cv2.rectangle(annotated_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(annotated_img, sentiment, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    print(f"Image: {image_path}")
    for idx, sentiment in enumerate(sentiments):
        print(f"  Person {idx+1}: {sentiment}")
    if sentiments:
        sentiment_counts = {'Happy': 0, 'Sad': 0, 'Neutral': 0}
        for sentiment in sentiments:
            sentiment_counts[sentiment] += 1
        overall_sentiment = max(sentiment_counts, key=sentiment_counts.get)
        print(f"Overall sentiment: {overall_sentiment}\n")
    else:
        print("No faces detected.\n")

    success = cv2.imwrite(output_image_path, annotated_img)
    if success:
        print(f"Annotated image saved to {output_image_path}")
    else:
        print(f"Error saving annotated image to {output_image_path}")

    #cv2.imshow('Annotated Image', annotated_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


Image: /content/img4.webp
  Person 1: Neutral
  Person 2: Sad
Overall sentiment: Sad

Annotated image saved to annotated_groupofpeople.jpg


# Task 3: Gender Identification from Facial Features

In [11]:
import cv2
import numpy as np

def detect_faces(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lower_skin = np.array([0, 30, 60], dtype=np.uint8)
    upper_skin = np.array([20, 150, 255], dtype=np.uint8)
    skin_mask = cv2.inRange(hsv, lower_skin, upper_skin)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    skin_mask = cv2.erode(skin_mask, kernel, iterations=2)
    skin_mask = cv2.dilate(skin_mask, kernel, iterations=2)

    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    face_regions = []
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area > 1000:
            x, y, w, h = cv2.boundingRect(cnt)
            aspect_ratio = w / float(h)

            if 0.75 < aspect_ratio < 1.3:

                if w > 50 and h > 50:
                    face_regions.append((x, y, w, h))
    return face_regions

def extract_geometric_features(landmarks):
    features = {}
    eye_distance = np.linalg.norm(np.array(landmarks['left_eye']) - np.array(landmarks['right_eye']))

    jaw_width = np.linalg.norm(np.array(landmarks['jaw_left']) - np.array(landmarks['jaw_right']))
    features['eye_distance'] = eye_distance
    features['jaw_width'] = jaw_width
    return features

def detect_landmarks(face_img):

    height, width = face_img.shape[:2]
    landmarks = {
        'left_eye': (int(width * 0.3), int(height * 0.4)),
        'right_eye': (int(width * 0.7), int(height * 0.4)),
        'nose_tip': (int(width * 0.5), int(height * 0.6)),
        'jaw_left': (int(width * 0.2), int(height * 0.9)),
        'jaw_right': (int(width * 0.8), int(height * 0.9)),
        'mouth_left': (int(width * 0.4), int(height * 0.75)),
        'mouth_right': (int(width * 0.6), int(height * 0.75)),
    }
    return landmarks

def classify_gender(features):
    if features['jaw_width'] / features['eye_distance'] > 1.8:
        return 'Male', 'Broad jawline relative to eye distance suggests male.'
    else:
        return 'Female', 'Narrower jawline relative to eye distance suggests female.'

def process_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading image {image_path}")
        return

    face_regions = detect_faces(img)
    annotated_img = img.copy()
    if len(face_regions) == 0:
        print(f"No face detected in {image_path}")
        return

    for (x, y, w, h) in face_regions:
        face_img = img[y:y+h, x:x+w]
        landmarks = detect_landmarks(face_img)
        geometric_features = extract_geometric_features(landmarks)
        gender, reason = classify_gender(geometric_features)

        cv2.rectangle(annotated_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(annotated_img, f"Predicted: {gender}", (x, y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        for point in landmarks.values():
            point_abs = (point[0] + x, point[1] + y)
            cv2.circle(annotated_img, point_abs, 2, (255, 0, 0), -1)

        print(f"Image: {image_path}")
        print(f"Predicted Gender: {gender}")
        print(f"Reason: {reason}")
        print(f"Geometric Features: {geometric_features}")


        cv2.waitKey(0)
    cv2.destroyAllWindows()

image_path = '/content/img6.jpg'
process_image(image_path)


Image: /content/img6.jpg
Predicted Gender: Female
Reason: Narrower jawline relative to eye distance suggests female.
Geometric Features: {'eye_distance': 21.0, 'jaw_width': 33.0}
Image: /content/img6.jpg
Predicted Gender: Female
Reason: Narrower jawline relative to eye distance suggests female.
Geometric Features: {'eye_distance': 33.0, 'jaw_width': 49.0}
