In [1]:
# !pip install ultralytics opencv-python matplotlib


In [14]:
from ultralytics import YOLO

pose_model = YOLO("yolov8l-pose.pt")  # โหลดครั้งแรกจะดาวน์โหลด weight


In [3]:
import cv2
import matplotlib.pyplot as plt
from IPython.display import clear_output

cap = cv2.VideoCapture("Hoang_Hearn.mp4")
frame_id = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # ------------------------
    # Pose inference
    # ------------------------
    results = pose_model(
        frame,
        conf=0.25,
        device=0,       # ใช้ "cpu" ถ้าไม่มี GPU
        verbose=False
    )

    r = results[0]

    # ------------------------
    # วาด bbox + skeleton (built-in)
    # ------------------------
    vis = r.plot()   # BGR image

    # แปลงเป็น RGB สำหรับ notebook
    vis = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(10, 6))
    plt.imshow(vis)
    plt.axis("off")
    plt.title(f"Frame {frame_id}")
    plt.show()

    clear_output(wait=True)
    frame_id += 1

cap.release()

print(f"Processed {frame_id} frames")


Processed 722 frames


In [4]:
keypoints = r.keypoints.xy.cpu().numpy()     # shape: [N, 17, 2]
scores    = r.keypoints.conf.cpu().numpy()   # shape: [N, 17]

for pid, kpts in enumerate(keypoints):
    print(f"Person {pid}")
    print(kpts)


Person 0
[[     1524.4      613.21]
 [     1530.7      606.23]
 [     1521.5      605.98]
 [     1546.8       609.5]
 [     1522.7      609.61]
 [     1569.6      648.28]
 [       1510      638.92]
 [     1608.4      677.15]
 [     1476.9      666.54]
 [     1561.6      657.78]
 [       1479      664.86]
 [     1548.8      754.57]
 [     1507.8      744.64]
 [     1550.7      837.79]
 [     1460.5      811.23]
 [     1547.5      921.68]
 [     1504.4      850.87]]
Person 1
[[       1170      625.02]
 [     1176.6      617.59]
 [     1165.7      618.23]
 [     1190.4      619.53]
 [     1165.8      620.47]
 [     1213.4      653.94]
 [     1164.5      638.22]
 [     1234.2      692.88]
 [     1131.1      679.11]
 [       1184      668.74]
 [     1137.7      664.42]
 [     1204.7      757.41]
 [     1169.4      738.02]
 [     1184.3      840.21]
 [       1108      761.08]
 [     1191.1      921.73]
 [     1144.2      814.91]]
Person 2
[[     1341.9      530.93]
 [     1347.1      525.05]

In [5]:
video_files = [
    "Mek_Khara_Lor_Kaew.mp4",
    "Kum_Pa_Gun_Poong_Hork.mp4",
    "Narai_Kwang_Jug.mp4",
    "Hoang_Hearn.mp4",
    "Phra_Ram_Phang_Sorn.mp4",
    "Tad_Mai_Khom_Nam.mp4"
]


In [6]:
rows = []

for video_path in video_files:

    video_name = video_path.replace(".mp4", "")
    cap = cv2.VideoCapture(video_path)
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = pose_model(
            frame,
            conf=0.25,
            device=0,
            verbose=False
        )

        r = results[0]

        if r.keypoints is None:
            frame_id += 1
            continue

        keypoints = r.keypoints.xy.cpu().numpy()     # [N,17,2]
        scores    = r.keypoints.conf.cpu().numpy()   # [N,17]

        for person_id, (person_kpts, person_scores) in enumerate(zip(keypoints, scores)):
            for joint_id, ((x, y), s) in enumerate(zip(person_kpts, person_scores)):
                rows.append({
                    "video_name": video_name,
                    "frame_id": frame_id,
                    "person_id": person_id,
                    "joint_id": joint_id,
                    "x": float(x),
                    "y": float(y),
                    "score": float(s)
                })

        frame_id += 1

    cap.release()


In [7]:
import pandas as pd

df = pd.DataFrame(rows)
df.head()

df.to_csv("pose_data.csv", index=False)


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 564094 entries, 0 to 564093
Data columns (total 7 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   video_name  564094 non-null  object 
 1   frame_id    564094 non-null  int64  
 2   person_id   564094 non-null  int64  
 3   joint_id    564094 non-null  int64  
 4   x           564094 non-null  float64
 5   y           564094 non-null  float64
 6   score       564094 non-null  float64
dtypes: float64(3), int64(3), object(1)
memory usage: 30.1+ MB


In [9]:
df.head()

Unnamed: 0,video_name,frame_id,person_id,joint_id,x,y,score
0,Mek_Khara_Lor_Kaew,0,0,0,1567.591797,627.675171,0.972176
1,Mek_Khara_Lor_Kaew,0,0,1,1574.658691,620.556152,0.967867
2,Mek_Khara_Lor_Kaew,0,0,2,1564.053589,620.256836,0.765548
3,Mek_Khara_Lor_Kaew,0,0,3,1592.332397,623.721985,0.946861
4,Mek_Khara_Lor_Kaew,0,0,4,1563.698364,623.566528,0.185544


In [10]:
df['person_id'].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [12]:
import pandas as pd

df = pd.read_csv("pose_data.csv")
track_len = (
    df.groupby("person_id")["frame_id"]
      .nunique()
      .sort_values(ascending=False)
)

track_len


person_id
0     811
1     811
2     811
3     811
4     811
5     811
6     811
7     811
8     811
9     146
10     10
Name: frame_id, dtype: int64

In [13]:
df["frame_id"].nunique()

811

In [15]:
import cv2
import matplotlib.pyplot as plt
from IPython.display import clear_output
import pandas as pd

video_path = "muay_thai_clip.mp4"

cap = cv2.VideoCapture(video_path)
frame_id = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = pose_model(
        frame,
        conf=0.25,
        device=0,
        verbose=False
    )

    r = results[0]

    if r.keypoints is None:
        frame_id += 1
        continue

    keypoints = r.keypoints.xy.cpu().numpy()     # [N,17,2]
    scores    = r.keypoints.conf.cpu().numpy()   # [N,17]

    for person_id, (person_kpts, person_scores) in enumerate(zip(keypoints, scores)):
        for joint_id, ((x, y), s) in enumerate(zip(person_kpts, person_scores)):
            rows.append({
                "video_name": video_name,
                "frame_id": frame_id,
                "person_id": person_id,
                "joint_id": joint_id,
                "x": float(x),
                "y": float(y),
                "score": float(s)
            })

    frame_id += 1

cap.release()

In [16]:
df = pd.DataFrame(rows)
df.head()

df.to_csv("long_pose_data.csv", index=False)

In [17]:
df["frame_id"].nunique()

3866

In [18]:
track_len = (
    df.groupby("person_id")["frame_id"]
      .nunique()
      .sort_values(ascending=False)
)

track_len

person_id
0     3866
1     3866
2     3866
3     3866
4     3866
5     3866
6     3866
7     3866
8     3862
9      273
10      20
Name: frame_id, dtype: int64

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1158329 entries, 0 to 1158328
Data columns (total 7 columns):
 #   Column      Non-Null Count    Dtype  
---  ------      --------------    -----  
 0   video_name  1158329 non-null  object 
 1   frame_id    1158329 non-null  int64  
 2   person_id   1158329 non-null  int64  
 3   joint_id    1158329 non-null  int64  
 4   x           1158329 non-null  float64
 5   y           1158329 non-null  float64
 6   score       1158329 non-null  float64
dtypes: float64(3), int64(3), object(1)
memory usage: 61.9+ MB
