In [1]:
import torch
from ultralytics import YOLO
from PIL import Image
import os
import cv2
import numpy as np
from pathlib import Path

In [2]:
4719+47745+66193+140503+211021

470181

In [1]:
import torch
from ultralytics import YOLO
from PIL import Image
import os
import cv2
import numpy as np
from pathlib import Path
class FaceDetector:
    def __init__(self, model_path=None):

        # 确保CUDA可用
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")
        
        # 加载或下载模型
        if model_path is None:
            self.model = YOLO('yolov11s-face.pt') 
        else:
            self.model = YOLO(model_path)
        
        # 将模型移动到GPU
        self.model.to(self.device)

    def detect_and_crop_faces(self, image_path, output_dir, conf_threshold=0.5, file_prefix='', scale_factor=2.2):
        """
        检测图片中的人脸并保存裁剪结果
        
        参数:
        image_path: 输入图片路径
        output_dir: 输出目录
        conf_threshold: 置信度阈值
        file_prefix: 文件名前缀，用于区分不同源图片的人脸
        scale_factor: 扩大截取区域的倍数，默认1.5倍
        
        返回:
        保存的图片路径列表
        """
        # 创建输出目录
        os.makedirs(output_dir, exist_ok=True)
        
        # 读取图片
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Cannot read image: {image_path}")
        
        # BGR转RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # 进行检测
        results = self.model(img_rgb, conf=conf_threshold)
        
        saved_paths = []
        # 处理检测结果
        for i, det in enumerate(results[0].boxes.xyxy):
            x1, y1, x2, y2 = map(int, det)
            
            # 计算原始检测框的中心点和尺寸
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2
            width = x2 - x1
            height = y2 - y1
            
            # 使用最大边长作为基准，并应用缩放因子
            size = int(max(width, height) * scale_factor)
            
            # 计算扩展后的边界
            new_x1 = max(0, center_x - size // 2)
            new_y1 = max(0, center_y - size // 2)
            new_x2 = min(img.shape[1], new_x1 + size)
            new_y2 = min(img.shape[0], new_y1 + size)
            
            # 如果边界超出图片范围，调整以保持正方形
            if new_x2 - new_x1 != new_y2 - new_y1:
                size = min(new_x2 - new_x1, new_y2 - new_y1)
                new_x2 = new_x1 + size
                new_y2 = new_y1 + size
            
            # 裁剪人脸
            face = img[new_y1:new_y2, new_x1:new_x2]
            face = cv2.resize(face, (600, 600))
            # 使用前缀和序号构建输出文件名
            output_path = os.path.join(output_dir, f'{file_prefix}face_{i}.jpg')
            cv2.imwrite(output_path, face)
            saved_paths.append(output_path)
            
        return saved_paths

    def process_directory(self, input_dir, output_dir, conf_threshold=0.5, scale_factor=1.35):
        """
        处理整个目录中的图片，所有人脸直接保存到输出目录
        
        参数:
        input_dir: 输入图片目录
        output_dir: 输出目录
        conf_threshold: 置信度阈值
        scale_factor: 扩大截取区域的倍数，默认1.5倍
        
        返回:
        处理的图片数量和检测到的人脸总数
        """
        processed_images = 0
        total_faces = 0
        
        # 支持的图片格式
        img_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
        
        # 创建输出目录
        os.makedirs(output_dir, exist_ok=True)
        
        for img_path in Path(input_dir).rglob('*'):
            if img_path.suffix.lower() in img_extensions:
                try:
                    # 使用源图片名称作为前缀，确保不同图片的人脸不会互相覆盖
                    file_prefix = f'{img_path.stem}_'
                    faces = self.detect_and_crop_faces(
                        str(img_path), 
                        output_dir, 
                        conf_threshold,
                        file_prefix,
                        scale_factor
                    )
                    
                    processed_images += 1
                    total_faces += len(faces)
                    print(f"Processed {img_path.name}: found {len(faces)} faces")
                except Exception as e:
                    print(f"Error processing {img_path}: {str(e)}")
        
        return processed_images, total_faces

def main():
    # 使用示例
    detector = FaceDetector()  # 首次运行会自动下载模型
    name = "李理真"
    # 处理整个目录
    input_dir = "H:\\sd\\%s\\og" %name
    output_dir = "H:\\sd\\%s\\og2" %name
    try:
        processed, faces = detector.process_directory(input_dir, output_dir, scale_factor=2.0)
        print(f"Processed {processed} images, found {faces} faces in total")
    except Exception as e:
        print(f"Error processing directory: {str(e)}")

if __name__ == "__main__":
    main()

Using device: cuda

0: 640x448 1 face, 118.2ms
Speed: 7.0ms preprocess, 118.2ms inference, 106.6ms postprocess per image at shape (1, 3, 640, 448)
Processed 微信截图_20231008202456.png: found 1 faces

0: 640x512 1 face, 114.1ms
Speed: 2.0ms preprocess, 114.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)
Processed 微信截图_20231008202519.png: found 1 faces

0: 640x480 1 face, 115.2ms
Speed: 2.0ms preprocess, 115.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)
Processed 微信截图_20231008202531.png: found 1 faces

0: 640x512 3 faces, 11.0ms
Speed: 2.0ms preprocess, 11.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 512)
Processed 微信截图_20231008202541.png: found 3 faces

0: 640x512 1 face, 11.0ms
Speed: 1.0ms preprocess, 11.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)
Processed 微信截图_20231008202549.png: found 1 faces

0: 640x512 1 face, 11.0ms
Speed: 1.0ms preprocess, 11.0ms inference, 1.0ms postprocess per image at shape 

In [2]:
import torch
from ultralytics import YOLO
import cv2
import os
from pathlib import Path
from rembg import remove
import numpy as np
from PIL import Image

class FaceDetector:
    def __init__(self, model_path=None):
        """
        初始化人脸检测器
        model_path: YOLOv8-face模型路径，如果为None则自动下载
        """
        # 确保CUDA可用
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")
        
        # 加载或下载模型
        if model_path is None:
            self.model = YOLO('yolov11s-face.pt')  # 下载并加载模型
        else:
            self.model = YOLO(model_path)
        
        # 将模型移动到GPU
        self.model.to(self.device)

    def remove_background(self, img):
        """
        移除图片背景
        
        参数:
        img: OpenCV格式的图片（BGR）
        
        返回:
        去除背景后的图片（RGBA）
        """
        # 将OpenCV的BGR转为PIL的RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        pil_img = Image.fromarray(img_rgb)
        
        # 移除背景
        output = remove(pil_img)
        
        # 转回OpenCV格式（带alpha通道）
        output_np = np.array(output)
        return output_np

    def remove_background_with_threshold(img, threshold=128):
        # Step 1: 去除背景
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        pil_img = Image.fromarray(img_rgb)
        output = remove(pil_img)
    
        # Step 2: 转换为NumPy数组并提取Alpha通道
        output_np = np.array(output)
        alpha_channel = output_np[:, :, 3]  # 获取alpha通道
    
        # Step 3: 根据阈值控制透明度（提高背景去除的严格性）
        output_np[:, :, 3] = np.where(alpha_channel > threshold, 255, 0)
    
        return output_np

    def detect_and_crop_faces(self, image_path, output_dir, conf_threshold=0.5, file_prefix='', scale_factor=1.5, remove_bg=True):
        """
        检测图片中的人脸并保存裁剪结果
        
        参数:
        image_path: 输入图片路径
        output_dir: 输出目录
        conf_threshold: 置信度阈值
        file_prefix: 文件名前缀，用于区分不同源图片的人脸
        scale_factor: 扩大截取区域的倍数，默认1.5倍
        remove_bg: 是否移除背景
        
        返回:
        保存的图片路径列表
        """
        # 创建输出目录
        os.makedirs(output_dir, exist_ok=True)
        
        # 读取图片
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Cannot read image: {image_path}")
        
        # BGR转RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # 进行检测
        results = self.model(img_rgb, conf=conf_threshold)
        
        saved_paths = []
        # 处理检测结果
        for i, det in enumerate(results[0].boxes.xyxy):
            x1, y1, x2, y2 = map(int, det)
            
            # 计算原始检测框的中心点和尺寸
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2
            width = x2 - x1
            height = y2 - y1
            
            # 使用最大边长作为基准，并应用缩放因子
            size = int(max(width, height) * scale_factor)
            
            # 计算扩展后的边界
            new_x1 = max(0, center_x - size // 2)
            new_y1 = max(0, center_y - size // 2)
            new_x2 = min(img.shape[1], new_x1 + size)
            new_y2 = min(img.shape[0], new_y1 + size)
            
            # 如果边界超出图片范围，调整以保持正方形
            if new_x2 - new_x1 != new_y2 - new_y1:
                size = min(new_x2 - new_x1, new_y2 - new_y1)
                new_x2 = new_x1 + size
                new_y2 = new_y1 + size
            
            # 裁剪人脸
            face = img[new_y1:new_y2, new_x1:new_x2]
            
            # 如果需要去除背景
            if remove_bg:
                face = self.remove_background_with_threshold(face)
                # 构建输出文件名（使用png以支持透明通道）
                output_path = os.path.join(output_dir, f'{file_prefix}face_{i}.png')
                # 保存带透明通道的PNG
                cv2.imwrite(output_path, cv2.cvtColor(face, cv2.COLOR_RGBA2BGRA))
            else:
                # 保存普通JPG
                output_path = os.path.join(output_dir, f'{file_prefix}face_{i}.jpg')
                cv2.imwrite(output_path, face)
            
            saved_paths.append(output_path)
            
        return saved_paths

    def process_directory(self, input_dir, output_dir, conf_threshold=0.5, scale_factor=1.5, remove_bg=True):
        """
        处理整个目录中的图片，所有人脸直接保存到输出目录
        
        参数:
        input_dir: 输入图片目录
        output_dir: 输出目录
        conf_threshold: 置信度阈值
        scale_factor: 扩大截取区域的倍数，默认1.5倍
        remove_bg: 是否移除背景
        
        返回:
        处理的图片数量和检测到的人脸总数
        """
        processed_images = 0
        total_faces = 0
        
        # 支持的图片格式
        img_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
        
        # 创建输出目录
        os.makedirs(output_dir, exist_ok=True)
        
        for img_path in Path(input_dir).rglob('*'):
            if img_path.suffix.lower() in img_extensions:
                try:
                    # 使用源图片名称作为前缀，确保不同图片的人脸不会互相覆盖
                    file_prefix = f'{img_path.stem}_'
                    faces = self.detect_and_crop_faces(
                        str(img_path), 
                        output_dir, 
                        conf_threshold,
                        file_prefix,
                        scale_factor,
                        remove_bg
                    )
                    
                    processed_images += 1
                    total_faces += len(faces)
                    print(f"Processed {img_path.name}: found {len(faces)} faces")
                except Exception as e:
                    print(f"Error processing {img_path}: {str(e)}")
        
        return processed_images, total_faces

def main():
    # 使用示例
    detector = FaceDetector()  # 首次运行会自动下载模型
    
    # 处理整个目录
    input_dir = "H:\\sd\\口水狗狗3\\og"
    output_dir = "H:\\sd\\口水狗狗3\\test"
    
    try:
        # 设置scale_factor为2.0，将截取区域扩大为检测框的2倍
        # 设置remove_bg为True，启用背景移除
        processed, faces = detector.process_directory(
            input_dir, 
            output_dir, 
            scale_factor=2.0,
            remove_bg=True
        )
        print(f"Processed {processed} images, found {faces} faces in total")
    except Exception as e:
        print(f"Error processing directory: {str(e)}")

if __name__ == "__main__":
    main()

Using device: cuda

0: 640x448 1 face, 103.6ms
Speed: 4.1ms preprocess, 103.6ms inference, 62.4ms postprocess per image at shape (1, 3, 640, 448)
Error processing H:\sd\口水狗狗3\og\chrome_AxTg216frc.jpg: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'


0: 640x448 1 face, 10.9ms
Speed: 2.0ms preprocess, 10.9ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 448)
Error processing H:\sd\口水狗狗3\og\chrome_Fk4kC5beab.jpg: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'


0: 640x448 1 face, 10.0ms
Speed: 2.0ms preprocess, 10.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 448)
Error processing H:\sd\口水狗狗3\og\chrome_VsCFEfvnP6.jpg: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in funct

In [5]:
512*768 - 600*600

33216

In [1]:

import keyboard


ModuleNotFoundError: No module named 'keyboard'

In [None]:
import os
import cv2
import torch
from ultralytics import YOLO
from pathlib import Path
import keyboard
from PIL import ImageGrab
import numpy as np
from datetime import datetime

class FaceDetector:
    def __init__(self, model_path=None):
        """初始化人脸检测器"""
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")
        
        if model_path is None:
            self.model = YOLO('yolov8n-face.pt')
        else:
            self.model = YOLO(model_path)
        
        self.model.to(self.device)

    def detect_and_save_faces(self, img, output_dir, conf_threshold=0.5, scale_factor=1.5):
        """检测并保存人脸"""
        os.makedirs(output_dir, exist_ok=True)
        
        # BGR转RGB (如果输入是BGR格式)
        if len(img.shape) == 3 and img.shape[2] == 3:
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        else:
            img_rgb = img
        
        # 进行检测
        results = self.model(img_rgb, conf=conf_threshold)
        
        saved_paths = []
        for i, det in enumerate(results[0].boxes.xyxy):
            x1, y1, x2, y2 = map(int, det)
            
            # 计算中心点和尺寸
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2
            width = x2 - x1
            height = y2 - y1
            
            # 使用最大边长作为基准并应用缩放因子
            size = int(max(width, height) * scale_factor)
            
            # 计算扩展后的边界
            new_x1 = max(0, center_x - size // 2)
            new_y1 = max(0, center_y - size // 2)
            new_x2 = min(img.shape[1], new_x1 + size)
            new_y2 = min(img.shape[0], new_y1 + size)
            
            # 保持正方形
            if new_x2 - new_x1 != new_y2 - new_y1:
                size = min(new_x2 - new_x1, new_y2 - new_y1)
                new_x2 = new_x1 + size
                new_y2 = new_y1 + size
            
            # 裁剪并保存人脸
            face = img[new_y1:new_y2, new_x1:new_x2]
            face = cv2.resize(face, (600, 600))
            
            # 使用时间戳作为文件名
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = os.path.join(output_dir, f'face_{timestamp}_{i}.jpg')
            cv2.imwrite(output_path, face)
            saved_paths.append(output_path)
            
        return saved_paths

class ScreenFaceCapture:
    def __init__(self, output_dir):
        self.detector = FaceDetector()
        self.output_dir = output_dir
        self.is_running = True

    def capture_screen(self):
        """捕获当前屏幕并返回numpy数组"""
        screenshot = ImageGrab.grab()
        return np.array(screenshot)

    def process_screen(self):
        """处理当前屏幕内容"""
        print("Capturing screen...")
        screen = self.capture_screen()
        faces = self.detector.detect_and_save_faces(screen, self.output_dir)
        if faces:
            print(f"Found and saved {len(faces)} faces")
        else:
            print("No faces detected")

    def run(self):
        """运行主循环，监听快捷键"""
        print("Screen Face Capture is running...")
        print("Press Alt+Q to capture faces")
        print("Press Esc to exit")

        # 注册快捷键
        keyboard.add_hotkey('alt+q', self.process_screen)
        
        # 等待Esc键退出
        keyboard.wait('esc')
        self.is_running = False
        print("Application terminated")

def main():
    # 设置输出目录
    output_dir = os.path.join(os.path.expanduser("~"), "Desktop", "captured_faces")
    
    # 创建并运行应用
    app = ScreenFaceCapture(output_dir)
    try:
        app.run()
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    main()