# **NEW**

In [11]:
import torch, os, cv2, numpy as np, matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
from scipy import ndimage
from google.colab import files
import time

# Install required libraries
try:
    from segment_anything import sam_model_registry, SamPredictor
except ModuleNotFoundError:
    !pip install git+https://github.com/facebookresearch/segment-anything.git
    from segment_anything import sam_model_registry, SamPredictor

# Install YOLO if not available
try:
    from ultralytics import YOLO
except ModuleNotFoundError:
    !pip install ultralytics
    from ultralytics import YOLO

# Download SAM model if needed
sam_checkpoint = "sam_vit_h_4b8939.pth"
if not os.path.exists(sam_checkpoint):
    !wget -O sam_vit_h_4b8939.pth https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def load_models():
    # Load SAM model
    sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint)
    sam.to(device)
    sam_predictor = SamPredictor(sam)

    # Load MiDaS depth model
    midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
    midas.to(device)
    midas.eval()

    # Load YOLO model for object detection
    yolo = YOLO("yolov8x.pt")  # Load the largest YOLOv8 model

    return sam_predictor, midas, yolo

def preprocess_image(image_path):
    img = Image.open(image_path).convert('RGB')
    img_array = np.array(img)

    # Resize keeping aspect ratio
    width, height = img.size
    ratio = min(520 / width, 520 / height)
    new_size = (int(width * ratio), int(height * ratio))
    resized_img = img.resize(new_size, Image.LANCZOS)

    # Process for MiDaS
    midas_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((384, 384), antialias=True),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    midas_input = midas_transform(img).unsqueeze(0).to(device)

    return {
        'original': img, 'array': img_array,
        'resized': resized_img, 'resized_array': np.array(resized_img),
        'midas_input': midas_input
    }

def detect_sports_objects(yolo, img_data):
    """Detect sports-related objects using YOLO"""
    results = yolo(img_data['resized_array'], conf=0.25)

    # Extract detections
    boxes = []
    classes = []
    scores = []
    sports_classes = ['person', 'sports ball', 'tennis racket', 'baseball bat', 'baseball glove',
                      'skateboard', 'surfboard', 'tennis ball', 'bottle', 'wine glass', 'cup',
                      'frisbee', 'skis', 'snowboard', 'kite']

    result = results[0]  # First image result

    detections = {
        'boxes': [],
        'classes': [],
        'scores': [],
        'sports_objects': 0,
        'athletes': 0
    }

    if hasattr(result, 'boxes') and len(result.boxes) > 0:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            cls = int(box.cls[0])
            class_name = result.names[cls]

            detections['boxes'].append([x1, y1, x2, y2])
            detections['classes'].append(class_name)
            detections['scores'].append(conf)

            if class_name == 'person':
                detections['athletes'] += 1
            if class_name in sports_classes:
                detections['sports_objects'] += 1

    return detections

def generate_depth_map(midas, img_data):
    with torch.no_grad():
        depth_map = midas(img_data['midas_input'])
        depth_map = torch.nn.functional.interpolate(
            depth_map.unsqueeze(1),
            size=img_data['array'].shape[:2],
            mode="bicubic",
            align_corners=False
        ).squeeze().cpu().numpy()

    normalized_depth = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-5)

    depth_8bit = (normalized_depth * 255).astype(np.uint8)
    _, depth_mask = cv2.threshold(depth_8bit, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    kernel = np.ones((5, 5), np.uint8)
    depth_mask = cv2.morphologyEx(depth_mask, cv2.MORPH_OPEN, kernel)

    contours, _ = cv2.findContours(depth_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    main_contour = max(contours, key=cv2.contourArea) if contours else None

    return normalized_depth, depth_mask, main_contour

def segment_with_sam(sam_predictor, img_data):
    sam_predictor.set_image(img_data['resized_array'])

    height, width = img_data['resized_array'].shape[:2]
    points = []
    grid_size = 3

    for i in range(1, grid_size + 1):
        for j in range(1, grid_size + 1):
            points.append([width // (grid_size + 1) * i, height // (grid_size + 1) * j])
    points.append([width // 2, height // 2])

    input_points = np.array(points)
    input_labels = np.ones(len(input_points), dtype=int)

    masks, scores, _ = sam_predictor.predict(
        point_coords=input_points,
        point_labels=input_labels,
        multimask_output=True
    )

    best_mask = masks[np.argmax(scores)].astype(np.uint8)
    if np.mean(best_mask) > 0.5:
        best_mask = 1 - best_mask

    labeled, num = ndimage.label(best_mask)
    if num > 1:
        sizes = ndimage.sum(best_mask, labeled, range(1, num + 1))
        best_mask = (labeled == np.argmax(sizes) + 1).astype(np.uint8)

    color_mask = np.zeros((best_mask.shape[0], best_mask.shape[1], 3), dtype=np.uint8)
    color_mask[best_mask == 1] = [0, 255, 0]

    return best_mask, color_mask

def analyze_sports_scene(detections, depth_map, img_data):
    """Analyze the sports scene based on detected objects and depth"""
    height, width = depth_map.shape[:2]  # Lấy kích thước từ depth_map thay vì img_data

    # Analyze player distribution
    player_positions = []
    for i, cls in enumerate(detections['classes']):
        if cls == 'person':
            x1, y1, x2, y2 = detections['boxes'][i]
            # Chuyển đổi tọa độ hộp giới hạn để phù hợp với kích thước depth_map
            x1 = int(x1 * depth_map.shape[1] / img_data['resized_array'].shape[1])
            y1 = int(y1 * depth_map.shape[0] / img_data['resized_array'].shape[0])
            x2 = int(x2 * depth_map.shape[1] / img_data['resized_array'].shape[1])
            y2 = int(y2 * depth_map.shape[0] / img_data['resized_array'].shape[0])

            center_x = (x1 + x2) / 2 / width
            center_y = (y1 + y2) / 2 / height
            player_positions.append((center_x, center_y))

    # Khởi tạo player_dispersion với giá trị mặc định
    player_dispersion = 0

    # Calculate player dispersion (if multiple players)
    if len(player_positions) > 1:
        # Calculate average pairwise distance
        total_distance = 0
        count = 0
        for i in range(len(player_positions)):
            for j in range(i+1, len(player_positions)):
                p1 = player_positions[i]
                p2 = player_positions[j]
                dist = np.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)
                total_distance += dist
                count += 1
        if count > 0:
            player_dispersion = total_distance / count

    # Identify key subjects (based on size and position)
    key_subjects = []
    for i, box in enumerate(detections['boxes']):
        x1, y1, x2, y2 = box
        area = (x2-x1) * (y2-y1)
        area_ratio = area / (img_data['resized_array'].shape[1] * img_data['resized_array'].shape[0])

        # Chuyển đổi tọa độ để phù hợp với depth_map
        depth_x1 = int(x1 * depth_map.shape[1] / img_data['resized_array'].shape[1])
        depth_y1 = int(y1 * depth_map.shape[0] / img_data['resized_array'].shape[0])
        depth_x2 = int(x2 * depth_map.shape[1] / img_data['resized_array'].shape[1])
        depth_y2 = int(y2 * depth_map.shape[0] / img_data['resized_array'].shape[0])

        # Đảm bảo các tọa độ nằm trong giới hạn của depth_map
        depth_x1 = max(0, min(depth_x1, depth_map.shape[1]-1))
        depth_y1 = max(0, min(depth_y1, depth_map.shape[0]-1))
        depth_x2 = max(0, min(depth_x2, depth_map.shape[1]-1))
        depth_y2 = max(0, min(depth_y2, depth_map.shape[0]-1))

        # Tạo mask với kích thước của depth_map
        mask = np.zeros((depth_map.shape[0], depth_map.shape[1]), dtype=np.uint8)
        if depth_y2 > depth_y1 and depth_x2 > depth_x1:  # Đảm bảo hộp có kích thước hợp lệ
            mask[depth_y1:depth_y2, depth_x1:depth_x2] = 1

        # Tính toán độ sâu trung bình
        obj_depth = np.mean(depth_map[mask > 0]) if np.sum(mask) > 0 else 0

        subject_info = {
            'class': detections['classes'][i],
            'box': box,
            'area_ratio': area_ratio,
            'depth': obj_depth,
            'position': ((x1+x2)/2/img_data['resized_array'].shape[1],
                         (y1+y2)/2/img_data['resized_array'].shape[0])
        }

        # Add prominence score based on size and position
        center_dist = np.sqrt((subject_info['position'][0]-0.5)**2 +
                             (subject_info['position'][1]-0.5)**2)
        subject_info['prominence'] = area_ratio * (1 - center_dist)

        key_subjects.append(subject_info)

    # Sort by prominence
    key_subjects.sort(key=lambda x: x['prominence'], reverse=True)

    return {
        'player_count': detections['athletes'],
        'player_positions': player_positions,
        'player_dispersion': player_dispersion,
        'key_subjects': key_subjects[:5] if key_subjects else []
    }

def analyze_action_quality(detections, img_data):
    """Phân tích chất lượng hành động trong ảnh thể thao với cách tiếp cận cải tiến"""
    height, width = img_data['resized_array'].shape[:2]

    # 1. Kiểm tra thiết bị thể thao (giữ nguyên như cũ)
    has_equipment = False
    equipment_types = []
    for cls in detections['classes']:
        if cls in ['sports ball', 'tennis racket', 'baseball bat', 'baseball glove',
                  'skateboard', 'surfboard', 'tennis ball', 'frisbee', 'skis', 'snowboard']:
            has_equipment = True
            if cls not in equipment_types:
                equipment_types.append(cls)

    # 2. Phân tích tư thế cá nhân thay vì so sánh giữa nhiều người
    action_posture_score = 0
    dynamic_posture_count = 0
    total_players = 0

    for i, cls in enumerate(detections['classes']):
        if cls == 'person':
            total_players += 1
            x1, y1, x2, y2 = detections['boxes'][i]

            # a. Tính tỷ lệ chiều cao/chiều rộng
            aspect_ratio = (y2-y1)/(x2-x1) if (x2-x1) > 0 else 0

            # b. Đánh giá tư thế dựa trên tỷ lệ khung hình
            # Tư thế không điển hình (có thể đang nhảy, cúi, nằm...)
            if aspect_ratio < 1.2 or aspect_ratio > 2.5:
                dynamic_posture_count += 1

            # c. Tính diện tích tương đối (lớn = hành động gần hơn)
            area_ratio = ((y2-y1) * (x2-x1)) / (height * width)
            if area_ratio > 0.2:  # Vận động viên chiếm diện tích lớn, thường là hành động gần
                action_posture_score += 0.2

    # Nếu có người trong tư thế không điển hình, đó có thể là hành động năng động
    if total_players > 0:
        dynamic_posture_ratio = dynamic_posture_count / total_players
        action_posture_score += dynamic_posture_ratio * 0.5

    # 3. Tính action_level cải tiến
    action_level = 0

    # Nếu có thiết bị thể thao (giữ nguyên)
    if has_equipment:
        action_level += 0.4

    # Thêm điểm từ phân tích tư thế
    action_level += min(0.6, action_posture_score)

    # 4. Phân loại chất lượng hành động
    return {
        'has_equipment': has_equipment,
        'equipment_types': equipment_types,
        'dynamic_posture_score': action_posture_score,
        'dynamic_posture_count': dynamic_posture_count,
        'total_players': total_players,
        'action_level': action_level,
        'action_quality': "High" if action_level > 0.7 else
                         "Medium" if action_level > 0.3 else "Low"
    }

def analyze_sports_composition(detections, analysis, img_data):
    """Analyze the composition with sports-specific context"""

    # Basic composition from existing analysis
    composition = analysis["composition_analysis"] if "composition_analysis" in analysis else {}

    # Sports specific enhancements
    result = {
        'sport_type': 'Unknown',
        'framing_quality': 'Unknown',
        'recommended_crop': None,
        'action_focus': 'Unknown'
    }

    # Try to determine sport type
    sport_equipment = {
        'tennis racket': 'Tennis',
        'tennis ball': 'Tennis',
        'sports ball': 'Ball Sport',
        'baseball bat': 'Baseball',
        'baseball glove': 'Baseball',
        'skateboard': 'Skateboarding',
        'surfboard': 'Surfing',
        'frisbee': 'Frisbee',
        'skis': 'Skiing',
        'snowboard': 'Snowboarding'
    }

    for cls in detections['classes']:
        if cls in sport_equipment:
            result['sport_type'] = sport_equipment[cls]
            break

    # Evaluate framing quality for sports action
    if "key_subjects" in analysis and analysis['key_subjects']:
        subject_positions = [subject['position'] for subject in analysis['key_subjects']]

        # Check if key subjects are well placed (rule of thirds or centered)
        well_placed_count = 0
        for pos in subject_positions:
            # Check rule of thirds points
            thirds_points = [
                (1/3, 1/3), (2/3, 1/3),
                (1/3, 2/3), (2/3, 2/3)
            ]

            center_point = (0.5, 0.5)

            # Check if close to rule of thirds points or center
            for third in thirds_points:
                dist = np.sqrt((pos[0]-third[0])**2 + (pos[1]-third[1])**2)
                if dist < 0.1:  # 10% of image width/height
                    well_placed_count += 1
                    break

            # Check if centered
            dist_to_center = np.sqrt((pos[0]-center_point[0])**2 + (pos[1]-center_point[1])**2)
            if dist_to_center < 0.1:
                well_placed_count += 1

        if well_placed_count / len(subject_positions) > 0.7:
            result['framing_quality'] = 'Excellent'
        elif well_placed_count / len(subject_positions) > 0.4:
            result['framing_quality'] = 'Good'
        else:
            result['framing_quality'] = 'Could be improved'

    # Recommend crop if needed
    if "key_subjects" in analysis and analysis['key_subjects']:
        main_subject = analysis['key_subjects'][0]
        x_pos = main_subject['position'][0]
        y_pos = main_subject['position'][1]

        # If subject is too far from ideal positions, suggest crop
        if not (0.3 < x_pos < 0.7 or 0.3 < y_pos < 0.7):
            # Calculate ideal center point
            if x_pos < 0.33:
                ideal_x = 0.33
            elif x_pos > 0.67:
                ideal_x = 0.67
            else:
                ideal_x = 0.5

            if y_pos < 0.33:
                ideal_y = 0.33
            elif y_pos > 0.67:
                ideal_y = 0.67
            else:
                ideal_y = 0.5

            # Calculate shift needed
            shift_x = ideal_x - x_pos
            shift_y = ideal_y - y_pos

            result['recommended_crop'] = {
                'shift_x': shift_x,
                'shift_y': shift_y
            }

    # Evaluate action focus
    if "action_quality" in analysis:
        result['action_focus'] = analysis['action_quality']

    return result

def analyze_facial_expression(detections, img_data):
    """Phân tích biểu cảm khuôn mặt trong ảnh thể thao"""
    try:
        # Import thư viện phân tích khuôn mặt
        from deepface import DeepFace
        import tensorflow as tf
    except ModuleNotFoundError:
        !pip install deepface
        from deepface import DeepFace

    image = img_data['resized_array']

    # Kết quả phân tích biểu cảm
    expression_results = {
        'has_faces': False,
        'expressions': [],
        'dominant_emotion': 'unknown',
        'emotion_intensity': 0,
        'emotional_value': 'Low'
    }

    # Phát hiện khuôn mặt từ hộp giới hạn người
    faces_detected = 0
    face_regions = []

    for i, cls in enumerate(detections['classes']):
        if cls == 'person':
            x1, y1, x2, y2 = detections['boxes'][i]

            # Ước tính vùng khuôn mặt (thường ở phần trên của hộp người)
            face_h = (y2 - y1) // 4  # Ước lượng chiều cao khuôn mặt
            face_y2 = y1 + face_h + face_h // 2  # Giới hạn dưới của khuôn mặt
            face_region = image[max(0, y1):min(face_y2, image.shape[0]),
                              max(0, x1):min(x2, image.shape[1])]

            # Kiểm tra kích thước vùng mặt
            if face_region.shape[0] > 20 and face_region.shape[1] > 20:
                face_regions.append({
                    'region': face_region,
                    'box': (x1, y1, x2, y2)
                })

    # Phân tích biểu cảm cho từng khuôn mặt
    significant_emotions = ['happy', 'sad', 'angry', 'surprise', 'fear', 'disgust']
    emotion_scores = []

    for face in face_regions:
        try:
            # Sử dụng DeepFace để phân tích biểu cảm
            result = DeepFace.analyze(face['region'], actions=['emotion'], enforce_detection=False, silent=True)

            if isinstance(result, list):
                result = result[0]  # Lấy kết quả đầu tiên nếu có nhiều

            # Lấy thông tin biểu cảm
            emotion = result['dominant_emotion']
            emotion_data = {
                'box': face['box'],
                'emotion': emotion,
                'scores': result['emotion']
            }

            # Tính cường độ cảm xúc (emotion intensity)
            # Cảm xúc mạnh hơn khi một cảm xúc vượt trội hẳn so với các cảm xúc khác
            max_score = max(result['emotion'].values())
            avg_other = sum([s for e, s in result['emotion'].items() if e != emotion]) / (len(result['emotion']) - 1)
            intensity = (max_score - avg_other) / 100  # Chuẩn hóa về dải [0, 1]
            emotion_data['intensity'] = intensity

            # Thêm vào danh sách kết quả
            expression_results['expressions'].append(emotion_data)

            # Thêm vào danh sách điểm cảm xúc có ý nghĩa
            if emotion in significant_emotions:
                emotion_scores.append({
                    'emotion': emotion,
                    'intensity': intensity
                })

            faces_detected += 1

        except Exception as e:
            continue

    # Tổng hợp kết quả
    if faces_detected > 0:
        expression_results['has_faces'] = True

        # Xác định cảm xúc chủ đạo (nếu có nhiều mặt)
        if emotion_scores:
            # Sắp xếp theo cường độ
            emotion_scores.sort(key=lambda x: x['intensity'], reverse=True)
            dominant = emotion_scores[0]
            expression_results['dominant_emotion'] = dominant['emotion']
            expression_results['emotion_intensity'] = dominant['intensity']

            # Đánh giá giá trị cảm xúc
            if dominant['intensity'] > 0.7:
                expression_results['emotional_value'] = 'Very High'
            elif dominant['intensity'] > 0.5:
                expression_results['emotional_value'] = 'High'
            elif dominant['intensity'] > 0.3:
                expression_results['emotional_value'] = 'Medium'
            else:
                expression_results['emotional_value'] = 'Low'

    return expression_results

def visualize_sports_results(img_data, detections, depth_map, sports_analysis, action_analysis, composition_analysis):
    """Create sports-specific visualization"""
    img = np.array(img_data['resized']).copy()
    height, width = img.shape[:2]

    # Create detection visualization
    det_viz = img.copy()

    # Draw bounding boxes
    for i, box in enumerate(detections['boxes']):
        x1, y1, x2, y2 = box
        label = detections['classes'][i]
        conf = detections['scores'][i]

        # Different colors for different classes
        if label == 'person':
            color = (0, 255, 0)  # Green for people
        elif 'ball' in label:
            color = (0, 0, 255)  # Red for balls
        else:
            color = (255, 0, 0)  # Blue for other equipment

        cv2.rectangle(det_viz, (x1, y1), (x2, y2), color, 2)
        cv2.putText(det_viz, f"{label} {conf:.2f}", (x1, y1-10),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Create composition analysis visualization
    comp_viz = img.copy()

    # Draw rule of thirds grid
    for i in range(1, 3):
        cv2.line(comp_viz, (0, int(height*i/3)), (width, int(height*i/3)), (255, 255, 255), 1)
        cv2.line(comp_viz, (int(width*i/3), 0), (int(width*i/3), height), (255, 255, 255), 1)

    # Draw key subjects with prominence
    if "key_subjects" in sports_analysis:
        for subject in sports_analysis['key_subjects']:
            box = subject['box']
            x1, y1, x2, y2 = box
            # Color based on prominence - more red = more important
            prominence = min(1.0, subject['prominence'] * 10)  # Scale for visibility
            color = (0, int(255 * (1-prominence)), int(255 * prominence))
            cv2.rectangle(comp_viz, (x1, y1), (x2, y2), color, 2)

    # Display results
    plt.figure(figsize=(15, 10))

    plt.subplot(2, 2, 1)
    plt.imshow(img)
    plt.title("Original Image")
    plt.axis('off')

    plt.subplot(2, 2, 2)
    plt.imshow(det_viz)
    plt.title(f"Detections: {detections['athletes']} athletes, {detections['sports_objects']} sports objects")
    plt.axis('off')

    plt.subplot(2, 2, 3)
    plt.imshow(comp_viz)
    plt.title("Composition Analysis")
    plt.axis('off')

    plt.subplot(2, 2, 4)
    plt.imshow(depth_map, cmap='plasma')
    plt.title("Depth Map")
    plt.axis('off')

    plt.tight_layout()
    plt.show()

    # Print detailed analysis
    print("\n==== SPORTS IMAGE ANALYSIS ====")
    print(f"Detected {detections['athletes']} athletes and {len(detections['classes'])-detections['athletes']} other objects")

    if "sport_type" in composition_analysis:
        print(f"\nSport type: {composition_analysis['sport_type']}")

    if detections['athletes'] > 0:
        print("\nPlayer Analysis:")
        print(f"- Number of players: {detections['athletes']}")
        if detections['athletes'] > 1:
            print(f"- Player dispersion: {sports_analysis['player_dispersion']:.2f}")

    print("\nAction Analysis:")
    print(f"- Equipment detected: {', '.join(action_analysis['equipment_types']) if action_analysis['equipment_types'] else 'None'}")
    print(f"- Action level: {action_analysis['action_quality']} ({action_analysis['action_level']:.2f})")

    print("\nComposition Analysis:")
    print(f"- Framing quality: {composition_analysis['framing_quality']}")

    if composition_analysis['recommended_crop']:
        crop = composition_analysis['recommended_crop']
        direction_x = "right" if crop['shift_x'] < 0 else "left"
        direction_y = "down" if crop['shift_y'] < 0 else "up"
        print(f"- Recommended crop: Shift {abs(crop['shift_x'])*100:.1f}% {direction_x} and {abs(crop['shift_y'])*100:.1f}% {direction_y}")

    # Key subjects
    if sports_analysis['key_subjects']:
        print("\nKey Subjects by Prominence:")
        for i, subject in enumerate(sports_analysis['key_subjects']):
            print(f"{i+1}. {subject['class']} (Prominence: {subject['prominence']:.2f})")

    if facial_analysis and facial_analysis.get('has_faces', False):
        print("\nPhân tích biểu cảm:")
        print(f"- Cảm xúc chủ đạo: {facial_analysis['dominant_emotion']}")
        print(f"- Cường độ cảm xúc: {facial_analysis['emotion_intensity']:.2f}")
        print(f"- Giá trị cảm xúc: {facial_analysis['emotional_value']}")

def analyze_sports_image(file_path):
    """Main function to analyze sports images"""
    t_start = time.time()

    # Load models
    sam_predictor, midas, yolo = load_models()
    img_data = preprocess_image(file_path)

    # Step 1: Object detection with YOLO
    detections = detect_sports_objects(yolo, img_data)

    # Step 2: Generate depth map
    depth_map, depth_mask, depth_contour = generate_depth_map(midas, img_data)

    # Step 3: Optional - Segment with SAM if needed
    # seg_mask, color_mask = segment_with_sam(sam_predictor, img_data)

    # Step 4: Analyze sports scene
    sports_analysis = analyze_sports_scene(detections, depth_map, img_data)

    # Step 5: Analyze action quality
    action_analysis = analyze_action_quality(detections, img_data)

    # Step 6: Sports composition analysis
    composition_analysis = analyze_sports_composition(detections, sports_analysis, img_data)

    # Bổ sung: Phân tích biểu cảm (tuỳ chọn)
    facial_analysis = None
    try:
        facial_analysis = analyze_facial_expression(detections, img_data)
    except Exception as e:
        print(f"Không thể phân tích biểu cảm: {str(e)}")

    # Step 7: Visualize results - thêm facial_analysis vào tham số
    visualize_sports_results(img_data, detections, depth_map,
                            sports_analysis, action_analysis, composition_analysis,
                            facial_analysis)

    t_end = time.time()
    print(f"\nAnalysis completed in {t_end - t_start:.2f} seconds")

    return {
        'detections': detections,
        'sports_analysis': sports_analysis,
        'action_analysis': action_analysis,
        'composition_analysis': composition_analysis,
        'facial_analysis': facial_analysis
    }

def main():
    # Upload image
    print("Please upload a sports image:")
    uploaded = files.upload()
    file_name = next(iter(uploaded))

    # Analyze image
    analysis = analyze_sports_image(file_name)
    return analysis

if __name__ == "__main__":
    main()

Please upload a sports image:


Saving 2166064646.webp to 2166064646 (3).webp


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master



0: 480x640 5 persons, 1 baseball bat, 74.9ms
Speed: 24.9ms preprocess, 74.9ms inference, 11.8ms postprocess per image at shape (1, 3, 480, 640)
Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.4.4-

Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/facial_expression_model_weights.h5
To: /root/.deepface/weights/facial_expression_model_weights.h5
100%|██████████| 5.98M/5.98M [00:00<00:00, 103MB/s]


TypeError: visualize_sports_results() takes 6 positional arguments but 7 were given