## YOLO

### PyTorch 기반 물체인식 모델
- CNN, rCNN(Regions with CNN)
- https://github.com/ultralytics/ultralytics 참조

#### YOLOv5 이상 설치
```shell
> pip install ultralytics
```

In [1]:
# YOLO 설치
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.109-py3-none-any.whl.metadata (37 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Downloading PyYAML-6.0.2-cp311-cp311-win_amd64.whl.metadata (2.1 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.109-py3-none-any.whl (974 kB)
   ---------------------------------------- 0.0/974.8 kB ? eta -:--:--
   --------------------------------------- 974.8/974.8 kB 30.1 MB/s eta 0:00:00
Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   ----- ---------------------------------- 5.1/39.5 MB 107.8 MB/s eta 0:00:01


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


#### 콘솔에서 예측

In [9]:
# 콘솔에서 예측
## yolo11n.pt - pretrained YOLO model
## 자동으로 yolo11n.pt 다운로드
## 웹 URL에 있는 이미지도 예측이 가능
!yolo predict model=yolo11n.pt source-'https://ultralytics.com/images/bus.jpg'

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Source\iot-dataanalysis-2025\mlvenv\Scripts\yolo.exe\__main__.py", line 7, in <module>
  File "C:\Source\iot-dataanalysis-2025\mlvenv\Lib\site-packages\ultralytics\cfg\__init__.py", line 911, in entrypoint
    check_dict_alignment(full_args_dict, {a: ""})
  File "C:\Source\iot-dataanalysis-2025\mlvenv\Lib\site-packages\ultralytics\cfg\__init__.py", line 499, in check_dict_alignment
    raise SyntaxError(string + CLI_HELP_MSG) from e
SyntaxError: '[31m[1msource-'https://ultralytics.com/images/bus.jpg'[0m' is not a valid YOLO argument. 

    Arguments received: ['yolo', 'predict', 'model=yolo11n.pt', "source-'https://ultralytics.com/images/bus.jpg'"]. Ultralytics 'yolo' commands use the following syntax:

        yolo TASK MODE ARGS

        Where   TASK (optional) is one of frozenset({'obb', 'pose', 'classify', 'segment', 'detect'})
  

#### 파이썬으로 예측

In [10]:
# YOLO 모듈 로드
from ultralytics import YOLO

In [11]:
# YOLO 클래스가 들어오는 모델의 버전에 따라 알아서 YOLO 예측모델 객체 생성
model = YOLO('./yolo11n.pt')

##### coco8.yaml
- https://github.com/ultralytics/assets/releases/download/v0.0.0/coco8.zip
- 위 내용대로 훈련을 시킨 결과 -> yolo11n.pt

In [None]:
# coco8.yaml - YOLO 훈련에 사용할 데이터셋 정의 파일
train_results = model.train(
    data='./coco8.yaml',
    epochs=100,
    imgsz=640,
    device='cuda:0'
)

#### 이미지 예측

In [15]:
result = model('./0000001.jpg')


image 1/1 c:\Source\iot-dataanalysis-2025\day08\0000001.jpg: 480x640 1 cat, 46.9ms
Speed: 2.0ms preprocess, 46.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


In [26]:
# mapplotlib 모듈 로드
import matplotlib.pyplot as plt
from PIL import Image

In [28]:
img = result[0].plot()
img_pil = Image.fromarray(img[..., ::-1])
img_pil.save('./predict_result.jpg')

### OpenCV
- Opensource Computer Vision 약자. 실시간 컴퓨터 비전(시각처리)을 목적으로 프로그래밍 라이브러리
- 인텔에서 2000년에 C, C++ 사용하기 위해서 개발
- 파이썬에서 사용할 수 있게 래핑
```shell
> pip install opencv-python
```

In [29]:
# OpenCV 설치
!pip install opencv-python




[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [30]:
import cv2
cv2.__version__

'4.11.0'

In [32]:
img2 = cv2.imread('./predict_result.jpg')
img2.shape      # (464, 640, 3) -> (height, width, channel)

(464, 640, 3)

In [34]:
cv2.imshow('predict result', img2)
cv2.waitKey(0)
cv2.destroyAllWindows()

#### YOLO 예측

In [37]:
img = cv2.imread('./0000002.jpg')
resized_img = cv2.resize(img, (640, 400))

result = model(resized_img)
plots = result[0].plot()

cv2.imshow('predict_result.jpg', plots)
cv2.waitKey(0)
cv2.destroyAllWindows()


0: 416x640 1 cup, 1 mouse, 63.6ms
Speed: 2.6ms preprocess, 63.6ms inference, 1.4ms postprocess per image at shape (1, 3, 416, 640)


#### 동영상 플레이
- 라즈베리파이에서 동일하게 사용 가능
- 라즈베리파이 웹캠 사용추천

In [48]:
# 비디오 파일 경로
video_path = './sample01.mp4'
output_path = './sample01_output.mp4'
count_path = './sample01_count.mp4'

In [42]:
# 동영상 플레이
cap = cv2.VideoCapture(video_path)  # 0 -> 웹캡이나 카메라 설치된 번호

while cap.isOpened():
    ret, frame = cap.read()
    if not ret: break

    cv2.imshow('Video play', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):   # q버튼을 누르면
        break

cap.release()   # 비디오를 해제
cv2.destroyAllWindows()

#### YOLO 실시간 예측

In [44]:
# 시간 모듈 로드
import time

In [45]:
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)     # 동영상 FPS(Frame Per Second)
frame_time = 1.0 /fps               # 초단위로 변환
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))      # 1200
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))    # 720

# VideoWriter 객체 생성(동영상 화면에 그림, 글자를 그리기 위한 객체)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

while cap.isOpened():
    start_time = time.time()    # 시작시간
    ret, frame = cap.read()
    if not ret: break

    # 객체 탐지
    results = model(frame)
    # 탐지 결과 그리기
    for result in results:
        detect_frame = result.plot()
    # 결과 프레임을 파일로 저장
    out.write(detect_frame)
    # 결과 표시
    cv2.imshow('YOLO Object Detection', detect_frame)
    cv2.imshow('Video play', frame)

    # 프레임간 실제 지연시간 계산
    elapsed_time = time.time() - start_time
    delay = max(int((frame_time - elapsed_time) * 1000), 1)

    if cv2.waitKey(1) & 0xFF == ord('q'):   # q버튼을 누르면
        break

cap.release()   # 비디오를 해제
out.release()
cv2.destroyAllWindows()


0: 384x640 1 train, 1 clock, 31.0ms
Speed: 3.3ms preprocess, 31.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 63.3ms
Speed: 2.5ms preprocess, 63.3ms inference, 15.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 43.9ms
Speed: 2.3ms preprocess, 43.9ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 44.0ms
Speed: 2.3ms preprocess, 44.0ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 34.1ms
Speed: 2.3ms preprocess, 34.1ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 8.1ms
Speed: 1.1ms preprocess, 8.1ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 7.4ms
Speed: 1.1ms preprocess, 7.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 train, 1 clock, 7.9ms
Speed: 1.1ms preprocess, 7.9m

#### Car Counting
- 지정된 라인 아래로 내려오는 자동차 개수 카운팅

- shapely 설치
```shell
> pip install shapely==2.0.1
```

In [51]:
# shapely 설치
!pip install shapely==2.0.1




[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [53]:
# lap 설치
!pip install lap

Collecting lap
  Downloading lap-0.5.12-cp311-cp311-win_amd64.whl.metadata (6.3 kB)
Downloading lap-0.5.12-cp311-cp311-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
    --------------------------------------- 0.0/1.5 MB 1.3 MB/s eta 0:00:02
   ------ --------------------------------- 0.3/1.5 MB 3.9 MB/s eta 0:00:01
   ------------- -------------------------- 0.5/1.5 MB 3.8 MB/s eta 0:00:01
   ----------------- ---------------------- 0.6/1.5 MB 4.1 MB/s eta 0:00:01
   ---------------------- ----------------- 0.8/1.5 MB 3.8 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 5.5 MB/s eta 0:00:00
Installing collected packages: lap
Successfully installed lap-0.5.12



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [54]:
import cv2
from ultralytics.solutions import ObjectCounter

cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), 'Error reading video file'   # 파일 열리지 않으면 경고처리

region_points = [(20, 400), (1080, 400)]    # 라인수
fps = cap.get(cv2.CAP_PROP_FPS)     # 동영상 FPS(Frame Per Second)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))      # 1200
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))    # 720
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(count_path, fourcc, fps, (width, height))

# 물체 인식 핵심 객체
counter = ObjectCounter(
    show=True,              # 처리하는 동안 디스플레이 여부
    region=region_points,     # 카운팅할 위치 
    model='yolo11n.pt',   # YOLO 모델
    # classes=[0, 2],
    # tracker='botsort.yaml',
)

while cap.isOpened():    
    ret, frame = cap.read()
    if not ret: break

    results = counter(frame)    # 카운팅 결과를 원본에 바로 그림
    out.write(results.plot_im)  # 여기 차이

cap.release()   # 비디오를 해제
out.release()
cv2.destroyAllWindows()


Ultralytics Solutions:  {'region': [(20, 400), (1080, 400)], 'show_in': True, 'show_out': True, 'colormap': None, 'up_angle': 145.0, 'down_angle': 90, 'kpts': [6, 8, 10], 'analytics_type': 'line', 'json_file': None, 'records': 5, 'show': True, 'model': 'yolo11n.pt'}

0: 384x640 1 train, 8.6ms
Speed: 1.3ms preprocess, 8.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
 Results: SolutionResults(classwise_count={'train': {'IN': 0, 'OUT': 0}}, total_tracks=1)

0: 384x640 1 train, 8.0ms
Speed: 1.3ms preprocess, 8.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
 Results: SolutionResults(classwise_count={'train': {'IN': 0, 'OUT': 0}}, total_tracks=1)

0: 384x640 1 train, 7.5ms
Speed: 1.1ms preprocess, 7.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
 Results: SolutionResults(classwise_count={'train': {'IN': 0, 'OUT': 0}}, total_tracks=1)

0: 384x640 1 train, 8.1ms
Speed: 1.0ms preprocess, 8.1ms inference, 1.3ms postprocess per ima