In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # must come BEFORE TensorFlow
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"   # optional: suppress logs

import cv2
import numpy as np
from deepface import DeepFace
import albumentations as A
import json
from tensorflow.keras.models import load_model
import joblib
import glob
from tensorflow.keras.mixed_precision import Policy as DTypePolicy
from tensorflow.keras.layers import InputLayer


# 1. Load your trained model and tools (EXACTLY like your unit test)
# model = load_model("/home/ayombalima/ml_models/student_recognition_model.h5")
class CustomInputLayer(InputLayer):
    def __init__(self, *args, **kwargs):
        bs = kwargs.pop("batch_shape", None)
        if bs is not None:
            kwargs["batch_input_shape"] = bs
        super().__init__(*args, **kwargs)
ML_MODEL_PATH = "/home/ayombalima/ml_models/student_recognition_model.h5"
model     = load_model(
    ML_MODEL_PATH,
    compile=False,
    safe_mode=False,
    custom_objects={"InputLayer": CustomInputLayer, "DTypePolicy": DTypePolicy}
)
le = joblib.load("/home/ayombalima/ml_models/label_encoder.pkl")
scaler = joblib.load("/home/ayombalima/ml_models/scaler.pkl")

with open("/home/ayombalima/ml_models/final_clustered_results.json", "r") as f:
    clustered_data = json.load(f)

id_to_path = {
    v: k for k, v in clustered_data["student_id_mapping"].items()
}



# 2. Same augmenter used for Mariam
augmenter = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.6),
    A.RandomBrightnessContrast(p=0.6),
    A.GaussNoise(p=0.2),
    A.HueSaturationValue(p=0.3),
    A.RandomShadow(p=0.2)
])


2025-04-27 19:17:09.765823: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-27 19:17:09.765901: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-27 19:17:09.765987: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# 3. YOLO Detection (EXACTLY like your command)
def run_yolo_detection(source):
    os.system(f"""
    /home/ayombalima/backend_env/bin/python /home/ayombalima/YOLO-FaceV2-master/detect.py \
        --weights /home/ayombalima/YOLO-FaceV2-master/yolov5s_v2.pt \
        --img 640 \
        --conf 0.25 \
        --source {source} \
        --save-txt --save-conf
    """)
    detect_folders = glob.glob("runs/detect/exp*")
    return max(detect_folders, key=os.path.getctime)




In [3]:
# 4. Generate embeddings (IDENTICAL to Mariam's approach)
def generate_embeddings(face_img):
    embeddings = []
    try:
        # Original
        original_emb = DeepFace.represent(face_img, model_name="Facenet", enforce_detection=False)[0]["embedding"]
        embeddings.append(original_emb)
        
        # 9 augmentations
        for _ in range(9):
            aug_img = augmenter(image=face_img)['image']
            aug_emb = DeepFace.represent(aug_img, model_name="Facenet", enforce_detection=False)[0]["embedding"]
            embeddings.append(aug_emb)
    except:
        pass
    return np.array(embeddings) if embeddings else None

In [4]:
# # 5. Identify student using your model
# def identify_student(embeddings):
#     if embeddings is None: 
#         return "Unknown", 0.0
#     avg_embed = np.mean(embeddings, axis=0).reshape(1, -1)
#     scaled = scaler.transform(avg_embed)
#     pred_probs = model.predict(scaled)[0]
#     return le.inverse_transform([np.argmax(pred_probs)])[0], float(np.max(pred_probs))

def identify_student(embeddings):
    if embeddings is None: 
        return "Unknown", 0.0, None
    avg_embed = np.mean(embeddings, axis=0).reshape(1, -1)
    scaled = scaler.transform(avg_embed)
    pred_probs = model.predict(scaled)[0]
    pred_id = le.inverse_transform([np.argmax(pred_probs)])[0]
    original_source = id_to_path.get(pred_id, "N/A")
    return pred_id, float(np.max(pred_probs)), original_source


In [5]:
# # 6. MAIN VIDEO PROCESSING FUNCTION
# def process_video(video_path, output_dir="video_results"):
#     # Create output dir
#     os.makedirs(output_dir, exist_ok=True)
    
#     # Extract frames at 2 FPS
#     cap = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frames = []
    
#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret: break
        
#         frame_count += 1
#         if frame_count % int(cap.get(cv2.CAP_PROP_FPS)/2) != 0:  # 2 FPS
#             continue
            
#         frame_path = os.path.join(output_dir, f"frame_{frame_count:05d}.jpg")
#         cv2.imwrite(frame_path, frame)
#         saved_frames.append(frame_path)
    
#     cap.release()
#     print(f"Extracted {len(saved_frames)} frames")
    
#     # Process frames with YOLO
#     detect_dir = run_yolo_detection(output_dir)
#     labels_dir = os.path.join(detect_dir, "labels")
    
#     # Process detections
#     results = []
#     for frame_path in saved_frames:
#         frame_name = os.path.basename(frame_path).split('.')[0]
#         label_path = os.path.join(labels_dir, f"{frame_name}.txt")
        
#         if not os.path.exists(label_path):
#             continue
            
#         # Read YOLO detections
#         with open(label_path, 'r') as f:
#             detections = [list(map(float, line.strip().split())) for line in f.readlines()]
        
#         # Process each face
#         frame = cv2.imread(frame_path)
#         h, w = frame.shape[:2]
        
#         for det in detections:
#             # Convert YOLO to pixels (EXACTLY like your Mariam code)
#             x_center, y_center, width, height = det[1:5]
#             x1 = int((x_center - width/2) * w)
#             y1 = int((y_center - height/2) * h)
#             x2 = int((x_center + width/2) * w)
#             y2 = int((y_center + height/2) * h)
            
#             # Extract and process face
#             face_img = frame[y1:y2, x1:x2]
#             if face_img.size == 0: continue
            
#             # Generate embeddings (SAME as Mariam)
#             face_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
#             embeddings = generate_embeddings(face_rgb)
            
#             # Identify student
#             name, confidence, source_file = identify_student(embeddings)
#             print(f"✅ Frame: {frame_path} | Predicted: {name} ({confidence:.2f}) from source file: {source_file}")

#             # if confidence >= 0.80:
#             #     print(f"✅ Frame: {frame_path} | Predicted: {name} ({confidence:.2f}) from source file: {source_file}")
            
#             # Save results
#             results.append({
#                 "frame": frame_path,
#                 "student": name,
#                 "confidence": confidence,
#                 "source_file": source_file,
#                 "bbox": [x1, y1, x2, y2]
#             })
            
#             # Draw on frame
#             color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
#             cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
#             cv2.putText(frame, f"{name} ({confidence:.2f})", (x1, y1-10), 
#                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
            
#             # Save annotated frame
#             cv2.imwrite(frame_path, frame)
    
#     # Save all results
#     with open(os.path.join(output_dir, "results.json"), 'w') as f:
#         json.dump(results, f, indent=2)
    
#     return results

In [6]:
def process_video(video_path, output_dir="video_results"):
    import cv2
    import os
    import json
    from collections import defaultdict

    # Create output dir
    os.makedirs(output_dir, exist_ok=True)
    
    # Extract frames at 2 FPS
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    saved_frames = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        if frame_count % int(cap.get(cv2.CAP_PROP_FPS)/2) != 0:  # 2 FPS
            continue
            
        frame_path = os.path.join(output_dir, f"frame_{frame_count:05d}.jpg")
        cv2.imwrite(frame_path, frame)
        saved_frames.append(frame_path)
    
    cap.release()
    print(f"Extracted {len(saved_frames)} frames")
    
    # Process frames with YOLO
    detect_dir = run_yolo_detection(output_dir)
    labels_dir = os.path.join(detect_dir, "labels")
    
    results = []
    for frame_path in saved_frames:
        frame_name = os.path.basename(frame_path).split('.')[0]
        label_path = os.path.join(labels_dir, f"{frame_name}.txt")
        
        if not os.path.exists(label_path):
            continue
            
        # Read YOLO detections
        with open(label_path, 'r') as f:
            detections = [list(map(float, line.strip().split())) for line in f.readlines()]
        
        frame = cv2.imread(frame_path)
        h, w = frame.shape[:2]
        
        for det in detections:
            x_center, y_center, width, height = det[1:5]
            x1 = int((x_center - width/2) * w)
            y1 = int((y_center - height/2) * h)
            x2 = int((x_center + width/2) * w)
            y2 = int((y_center + height/2) * h)
            
            face_img = frame[y1:y2, x1:x2]
            if face_img.size == 0:
                continue
            
            face_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            embeddings = generate_embeddings(face_rgb)
            
            name, confidence, source_file = identify_student(embeddings)

            if confidence < 0.80:
                name = "Unknown"

            results.append({
                "frame": frame_path,
                "student": name,
                "confidence": confidence,
                "source_file": source_file,
                "bbox": [x1, y1, x2, y2]
            })

            if name == "Unknown":
                print(f"❌ Frame: {frame_path} | Prediction below threshold - Marked as Unknown")
            else:
                print(f"✅ Frame: {frame_path} | Predicted: {name} ({confidence:.2f}) from source file: {source_file}")
            
            color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            label = name if name != "Unknown" else "Unknown"
            cv2.putText(frame, f"{label} ({confidence:.2f})", (x1, y1-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
            cv2.imwrite(frame_path, frame)

    # Save results
    with open(os.path.join(output_dir, "results.json"), 'w') as f:
        json.dump(results, f, indent=2)

    # === CALL SUMMARY HERE BEFORE RETURN ===
    summarize_final_identifications(results)

    return results


def summarize_final_identifications(results):
    from collections import defaultdict

    student_summary = defaultdict(lambda: {"source_file": None, "max_confidence": 0.0, "count": 0})

    for result in results:
        student = result["student"]
        if student == "Unknown":
            continue  # Skip unknowns from summary
        
        conf = result["confidence"]
        source = result["source_file"]

        if conf > student_summary[student]["max_confidence"]:
            student_summary[student]["max_confidence"] = conf
            student_summary[student]["source_file"] = source
        
        student_summary[student]["count"] += 1

    if not student_summary:
        print("\n Final Identification Results: None found.\n")
        return

    print("\n Final Identification Results:\n")
    for idx, (student, info) in enumerate(student_summary.items()):
        print(f"- Student_{idx}: {info['source_file']}.jpg (confidence: {info['max_confidence']:.2f}, seen in {info['count']} frames)")

# def summarize_final_identifications(results):
#     from collections import defaultdict

#     student_summary = defaultdict(lambda: {"source_file": None, "max_confidence": 0.0, "count": 0})

#     for result in results:
#         student = result["student"]
#         if student == "Unknown":
#             continue
        
#         conf = result["confidence"]
#         source = result["source_file"]

#         if conf > student_summary[student]["max_confidence"]:
#             student_summary[student]["max_confidence"] = conf
#             student_summary[student]["source_file"] = source
        
#         student_summary[student]["count"] += 1

#     if not student_summary:
#         print("\n🎯 Final Identification Results: None found.\n")
#         return

#     # 👉 NEW FILTER: Accept students with max_confidence >= 0.80
#     filtered_students = {student: info for student, info in student_summary.items() if info["max_confidence"] >= 0.80}

#     if not filtered_students:
#         print("\n🎯 Final Identification Results: None passed filtering (≥ 80% confidence).\n")
#         return

#     # Sort by highest confidence among accepted
#     sorted_students = sorted(filtered_students.items(), key=lambda x: x[1]["max_confidence"], reverse=True)

#     print("\n🎯 Final Identification Results:\n")
#     for idx, (student, info) in enumerate(sorted_students):
#         print(f"- Student_{idx}: {info['source_file']}.jpg (confidence: {info['max_confidence']:.2f}, seen in {info['count']} frames)")


In [7]:
#THIS IS THE ONE CHAT GAVE WHICH IS THE EDITED VERSION OF THE FIRST COMMENTED OUT, 
#THE ONE ABOVE THIS ONE IS FOR THE PREDICTIONS MATCHING THE NUMBER OF FACES AND ONLY DISPLAYING IF CONFIDENCE IS > 80%

In [8]:



# # 6. MAIN VIDEO PROCESSING FUNCTION
# def process_video(video_path, output_dir="video_results"):
#     import cv2
#     import os
#     import json
#     from collections import defaultdict

#     # Create output dir
#     os.makedirs(output_dir, exist_ok=True)
    
#     # Extract frames at 2 FPS
#     cap = cv2.VideoCapture(video_path)
#     frame_count = 0
#     saved_frames = []
    
#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break
        
#         frame_count += 1
#         if frame_count % int(cap.get(cv2.CAP_PROP_FPS)/2) != 0:  # 2 FPS
#             continue
            
#         frame_path = os.path.join(output_dir, f"frame_{frame_count:05d}.jpg")
#         cv2.imwrite(frame_path, frame)
#         saved_frames.append(frame_path)
    
#     cap.release()
#     print(f"Extracted {len(saved_frames)} frames")
    
#     # Process frames with YOLO
#     detect_dir = run_yolo_detection(output_dir)
#     labels_dir = os.path.join(detect_dir, "labels")
    
#     # Process detections
#     results = []
#     for frame_path in saved_frames:
#         frame_name = os.path.basename(frame_path).split('.')[0]
#         label_path = os.path.join(labels_dir, f"{frame_name}.txt")
        
#         if not os.path.exists(label_path):
#             continue
            
#         # Read YOLO detections
#         with open(label_path, 'r') as f:
#             detections = [list(map(float, line.strip().split())) for line in f.readlines()]
        
#         # Process each face
#         frame = cv2.imread(frame_path)
#         h, w = frame.shape[:2]
        
#         for det in detections:
#             # Convert YOLO to pixel coordinates
#             x_center, y_center, width, height = det[1:5]
#             x1 = int((x_center - width/2) * w)
#             y1 = int((y_center - height/2) * h)
#             x2 = int((x_center + width/2) * w)
#             y2 = int((y_center + height/2) * h)
            
#             # Extract and process face
#             face_img = frame[y1:y2, x1:x2]
#             if face_img.size == 0:
#                 continue
            
#             face_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
#             embeddings = generate_embeddings(face_rgb)
            
#             # Predict identity
#             name, confidence, source_file = identify_student(embeddings)
#             print(f"✅ Frame: {frame_path} | Predicted: {name} ({confidence:.2f}) from source file: {source_file}")
            
#             # Save results
#             results.append({
#                 "frame": frame_path,
#                 "student": name,
#                 "confidence": confidence,
#                 "source_file": source_file,
#                 "bbox": [x1, y1, x2, y2]
#             })
            
#             # Annotate frame
#             color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
#             cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
#             cv2.putText(frame, f"{name} ({confidence:.2f})", (x1, y1-10), 
#                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
#             cv2.imwrite(frame_path, frame)

#     # === Post-process results: Filter based on frequency and confidence ===

#     # Step 1: Group predictions by student ID
#     student_hits = defaultdict(list)
#     for result in results:
#         sid = result["student"]
#         conf = result["confidence"]
#         if sid != "Unknown":
#             student_hits[sid].append(conf)

#     # Step 2: Decide valid IDs — at least 2 hits and one ≥ 0.8
#     valid_ids = set()
#     for sid, confs in student_hits.items():
#         if len(confs) >= 2 and any(c >= 0.8 for c in confs):
#             valid_ids.add(sid)

#     # Step 3: Rewrite results based on the valid IDs
#     for r in results:
#         if r["student"] not in valid_ids:
#             r["student"] = "Unknown"

#     # Step 4: Save results
#     with open(os.path.join(output_dir, "results.json"), 'w') as f:
#         json.dump(results, f, indent=2)

#     return results


In [9]:
# Testing Video Upload with gathered sample
VIDEO_PATH = "/home/ayombalima/video_uploads/VID-20221017-WA0003.mp4" 
results = process_video(VIDEO_PATH)


Extracted 13 frames
Namespace(weights=['/home/ayombalima/YOLO-FaceV2-master/yolov5s_v2.pt'], source='video_results', img_size=640, conf_thres=0.25, iou_thres=0.45, device='', view_img=False, plot_label=False, save_txt=True, save_conf=True, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False)
[31m[1mrequirements:[0m /home/ayombalima/backend/tasks/requirements.txt not found, check failed.


YOLOv5 🚀 2024-2-11 torch 2.6.0+cu124 CPU



Fusing layers... 
image 1/23 /home/ayombalima/backend/tasks/video_results/frame_00014.jpg: 

Model Summary: 394 layers, 18706966 parameters, 0 gradients
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


384x640 Done. (0.420s)
image 2/23 /home/ayombalima/backend/tasks/video_results/frame_00028.jpg: 384x640 Done. (0.297s)
image 3/23 /home/ayombalima/backend/tasks/video_results/frame_00042.jpg: 384x640 Done. (0.311s)
image 4/23 /home/ayombalima/backend/tasks/video_results/frame_00056.jpg: 384x640 Done. (0.324s)
image 5/23 /home/ayombalima/backend/tasks/video_results/frame_00070.jpg: 384x640 1 face, Done. (0.329s)
image 6/23 /home/ayombalima/backend/tasks/video_results/frame_00084.jpg: 384x640 2 faces, Done. (0.301s)
image 7/23 /home/ayombalima/backend/tasks/video_results/frame_00098.jpg: 384x640 2 faces, Done. (0.290s)
image 8/23 /home/ayombalima/backend/tasks/video_results/frame_00112.jpg: 384x640 2 faces, Done. (0.291s)
image 9/23 /home/ayombalima/backend/tasks/video_results/frame_00126.jpg: 384x640 2 faces, Done. (0.264s)
image 10/23 /home/ayombalima/backend/tasks/video_results/frame_00140.jpg: 384x640 2 faces, Done. (0.299s)
image 11/23 /home/ayombalima/backend/tasks/video_results/fr