In [2]:
def convert_to_3fps(input_video_path, output_dir):
    """동영상을 3FPS로 변환하여 저장"""
    cap = cv2.VideoCapture(input_video_path)
    input_fps = cap.get(cv2.CAP_PROP_FPS)
    output_fps = 3
    frame_skip = int(input_fps / output_fps)
    base_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_path = os.path.join(output_dir, f"{base_name}_3fps.mp4")

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, output_fps, (int(cap.get(3)), int(cap.get(4))))

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % frame_skip == 0:
            out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()
    return output_path
import os
import cv2
input_video_path = '/home/alpaco/osh/final/final1.mp4'
output_dir = '/home/alpaco/osh/final/converted'

os.makedirs(output_dir, exist_ok=True)

# 초당 3프레임으로 변환
converted_video_path = convert_to_3fps(input_video_path, output_dir)
print(f"초당 3프레임으로 변환된 비디오 저장 경로: {converted_video_path}")


초당 3프레임으로 변환된 비디오 저장 경로: /home/alpaco/osh/final/converted/final1_3fps.mp4


In [13]:
import cv2
import csv
import torch
import gc
from ultralytics import YOLO

def process_video_with_dual_csv(input_video_path, output_video_path, abs_csv_path, rel_csv_path):
    # 비디오 파일 열기
    cap = cv2.VideoCapture(input_video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps == 0:
        print(f"Error: {input_video_path} 비디오 파일을 열 수 없습니다.")
        return
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # 비디오 저장 설정
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # CSV 파일 생성 및 헤더 작성
    with open(abs_csv_path, mode='w', newline='') as abs_csv, open(rel_csv_path, mode='w', newline='') as rel_csv:
        abs_writer = csv.writer(abs_csv)
        rel_writer = csv.writer(rel_csv)

        # 헤더 작성
        header = ['frame']
        for i in range(1, 18):
            header.extend([f'x{i}', f'y{i}'])
        header.append('label')
        abs_writer.writerow(header)
        rel_writer.writerow(header)

        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # 원본 프레임 복사
            results = model.track(frame, persist=True, conf=0.1)
            existing_labels = set()

            for result in results:
                if result.boxes.id is not None:
                    for box, track_id, cls_id in zip(result.boxes.xyxy, result.boxes.id, result.boxes.cls):
                        if int(cls_id) == 0:  # 사람 클래스만 필터링
                            x1, y1, x2, y2 = map(int, box)
                            label = f'ID: {track_id}'
                            if label in existing_labels:
                                continue
                            existing_labels.add(label)

                            # BBox 외부를 검정색으로 마스킹
                            masked_frame = frame.copy()
                            masked_frame[:y1, :] = 0
                            masked_frame[y2:, :] = 0
                            masked_frame[:, :x1] = 0
                            masked_frame[:, x2:] = 0

                            # 현재 BBox에 대해 YOLO Pose 모델 적용
                            keypoints_results = pose(masked_frame, imgsz=800)
                            if hasattr(keypoints_results[0], 'keypoints'):
                                keypoints = keypoints_results[0].keypoints.xy.cpu().numpy()[0]
                                if keypoints.shape[0] >= 17:
                                    # 절대 좌표로 저장 (첫 번째 CSV 파일)
                                    abs_row_data = [frame_count]
                                    # 상대 좌표로 저장 (두 번째 CSV 파일)
                                    rel_row_data = [frame_count]

                                    for point in keypoints:
                                        x_kp, y_kp = int(point[0]), int(point[1])
                                        # 절대 좌표
                                        abs_row_data.extend([x_kp, y_kp])
                                        # 상대 좌표 (BBox 기준)
                                        if x_kp >0:
                                            rel_x_kp = x_kp - x1
                                        else:
                                            rel_x_kp = x_kp
                                        if y_kp >0:
                                            rel_y_kp = y_kp - y1
                                        else:
                                            rel_y_kp = y_kp
                                        rel_row_data.extend([rel_x_kp, rel_y_kp])

                                    abs_row_data.append(label)
                                    rel_row_data.append(label)

                                    # CSV 파일에 작성
                                    abs_writer.writerow(abs_row_data)
                                    rel_writer.writerow(rel_row_data)

                                    # 키포인트 원본 프레임에 그리기
                                    for point in keypoints:
                                        x, y = int(point[0]), int(point[1])
                                        cv2.circle(frame, (x, y), radius=5, color=(255, 0, 0), thickness=-1)

                            # BBox 표시
                            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            # 수정된 프레임을 비디오로 저장
            out.write(frame)
            frame_count += 1
            torch.cuda.empty_cache()
            gc.collect()

    cap.release()
    out.release()
    print(f"{output_video_path} 파일로 동영상 저장이 완료되었습니다.")
    print(f"절대 좌표 CSV: {abs_csv_path}")
    print(f"상대 좌표 CSV: {rel_csv_path}")

# 모델 설정
model = YOLO('yolov8l.pt')
model.overrides['imgsz'] = 800
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model.to(device)
pose = YOLO('yolov8l-pose.pt')
pose.to(device)

#video_path = "/home/alpaco/osh/final/converted/C_32_7_smp_su_09-11_10-41-00_a_for_DF2_3fps.mp4" # 일반인
#video_path = "/home/alpaco/osh/final/converted/Convert_PXL_20241123_090504860_3fps.mp4" # 주취자
video_path = "/home/alpaco/osh/final/converted/final1_3fps.mp4" # 혼합


output = "/home/alpaco/osh/final/test.mp4"  
abscsv_path = "/home/alpaco/osh/final/abscsv.csv"
relcsv_path = "/home/alpaco/osh/final/relcsv.csv"


process_video_with_dual_csv(video_path,output,abscsv_path,relcsv_path)



0: 480x800 4 persons, 3 cars, 18.7ms
Speed: 4.4ms preprocess, 18.7ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 person, 24.6ms
Speed: 3.8ms preprocess, 24.6ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 2 persons, 15.2ms
Speed: 3.9ms preprocess, 15.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 person, 14.9ms
Speed: 3.8ms preprocess, 14.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 person, 15.0ms
Speed: 3.8ms preprocess, 15.0ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 4 persons, 3 cars, 14.9ms
Speed: 4.4ms preprocess, 14.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 1 person, 15.6ms
Speed: 3.9ms preprocess, 15.6ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 800)

0: 480x800 2 persons, 15.3ms
Speed: 4.0ms preprocess, 15.3ms inference, 2.0ms postprocess per imag

In [14]:
#스케일링 진행 후
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

test_data= pd.read_csv("final/relcsv.csv")

coordinate_cols = [f'x{i}' for i in range(1, 18)] + [f'y{i}' for i in range(1, 18)]
X = test_data[coordinate_cols].values  # 34개의 좌표 피처

#scaler_X = StandardScaler()
#X_normalized = scaler_X.fit_transform(X)


#test_data[coordinate_cols] = X_normalized


In [16]:
test_data.label.value_counts()

label
ID: 35.0    177
ID: 3.0      43
ID: 1.0      25
ID: 49.0     16
ID: 5.0      12
ID: 2.0       9
ID: 39.0      1
ID: 43.0      1
ID: 58.0      1
Name: count, dtype: int64

In [15]:
test_data

Unnamed: 0,frame,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,y13,x14,y14,x15,y15,x16,y16,x17,y17,label
0,0,0,0,0,0,0,0,36,64,0,...,276,77,384,165,379,94,479,207,481,ID: 1.0
1,0,166,40,178,36,164,30,195,52,0,...,210,85,308,72,300,69,399,87,381,ID: 2.0
2,0,168,58,178,51,0,0,202,58,0,...,190,125,272,80,237,108,358,43,335,ID: 3.0
3,0,0,0,0,0,0,0,0,0,0,...,220,128,298,111,299,171,364,143,364,ID: 5.0
4,1,0,0,0,0,0,0,34,57,86,...,269,110,385,127,368,164,483,147,469,ID: 1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,198,97,78,107,66,83,66,0,0,55,...,270,108,355,99,361,99,436,111,441,ID: 35.0
281,199,80,78,95,68,72,63,119,75,0,...,268,115,371,60,361,116,470,103,422,ID: 35.0
282,200,66,80,82,69,57,66,106,74,0,...,255,138,355,47,361,147,455,35,463,ID: 35.0
283,201,55,78,71,67,46,64,98,73,0,...,267,131,369,54,388,179,432,71,470,ID: 35.0


In [6]:
columns_to_convert = test_data.columns.difference(['label'])

# float으로 변환
test_data[columns_to_convert] = test_data[columns_to_convert].astype(float)



In [7]:
test_data

Unnamed: 0,frame,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,y13,x14,y14,x15,y15,x16,y16,x17,y17,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,41.0,66.0,0.0,...,275.0,81.0,381.0,164.0,375.0,96.0,478.0,205.0,469.0,ID: 1.0
1,0.0,160.0,40.0,173.0,33.0,158.0,28.0,191.0,46.0,0.0,...,207.0,83.0,311.0,85.0,293.0,64.0,399.0,77.0,378.0,ID: 2.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,247.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ID: 3.0
3,0.0,92.0,49.0,97.0,41.0,84.0,43.0,0.0,0.0,0.0,...,215.0,159.0,306.0,91.0,308.0,213.0,383.0,119.0,385.0,ID: 5.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,59.0,0.0,...,268.0,104.0,386.0,126.0,380.0,142.0,484.0,150.0,474.0,ID: 1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,198.0,89.0,76.0,102.0,64.0,73.0,64.0,0.0,0.0,48.0,...,273.0,107.0,355.0,96.0,360.0,93.0,428.0,100.0,433.0,ID: 35.0
260,199.0,79.0,78.0,96.0,66.0,68.0,63.0,124.0,74.0,0.0,...,270.0,122.0,378.0,77.0,379.0,109.0,457.0,98.0,458.0,ID: 35.0
261,200.0,68.0,80.0,82.0,67.0,57.0,67.0,109.0,71.0,0.0,...,266.0,142.0,368.0,46.0,366.0,140.0,464.0,47.0,459.0,ID: 35.0
262,201.0,55.0,75.0,72.0,63.0,46.0,62.0,101.0,70.0,0.0,...,269.0,128.0,369.0,36.0,368.0,168.0,460.0,74.0,454.0,ID: 35.0


In [125]:
import numpy as np
import pandas as pd

def create_sequences(df, seq_length):
    xs, pid = [], []
    for _, group in df.groupby(['label']):
        if len(group) < 5:
            continue
        print(len(group))
        group = group.sort_values(by=['frame']).reset_index(drop=True)

        # 전체 프레임 생성
        all_frames = pd.DataFrame({'frame': np.arange(0, seq_length)})

        # 누락된 프레임을 결합
        group = all_frames.merge(group, on='frame', how='left')
        #group.interpolate(method='linear', inplace=True, axis=0) #,  limit_direction='both')
        #print(group)
        # 결측값이 여전히 남아 있다면 0으로 채우기 (필요 시)
        group.fillna(0, inplace=True)
        print(group['label'].str.replace("ID: ", "").dropna().unique())
        # 'frame'과 'label' 제외한 데이터로 시퀀스 생성
        data_X = group.drop(columns=['frame', 'label'], errors='ignore').values
        xs.append(data_X)
        pid.append(int(float(group['label'].str.replace("ID: ", "").dropna().unique().tolist()[0])))
    return np.array(xs), pid

sequence_length = 90


In [126]:
# 시퀀스 생성
X_seq, pid= create_sequences(test_data, sequence_length)

25
['1.0']
9
['2.0']
43
['3.0']
177
['35.0']
16
['49.0']
12
['5.0']


In [127]:
pid

[1, 2, 3, 35, 49, 5]

In [130]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from tqdm import tqdm

test_X_tensor = torch.FloatTensor(X_seq)


# 디바이스 설정
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

class BinaryLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BinaryLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)  # 이진 분류이므로 출력 노드를 1개로 설정

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 마지막 시퀀스 출력 사용
        return out

# 모델 초기화
input_size = X_seq.shape[2]
hidden_size = 50
num_layers = 1

loaded_model = BinaryLSTMModel(X_seq.shape[2], 50, 1)

# map_location을 사용하여 모델 로드
loaded_model.load_state_dict(
    torch.load('/home/alpaco/project/jsw_model/90frame000_LSTM.pt', map_location=device)
)

loaded_model.to(device)
loaded_model.eval()


BinaryLSTMModel(
  (lstm): LSTM(34, 50, batch_first=True)
  (fc): Linear(in_features=50, out_features=1, bias=True)
)

In [131]:
import torch
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

correct = 0
total = 0
all_preds = []
all_labels = []

with torch.no_grad():
    for i in range(len(test_X_tensor)):
        data = test_X_tensor[i]
        track_id =pid[i]
        inputs = data.unsqueeze(0).to(device)
        # 모델 예측
        outputs = loaded_model(inputs)
        preds = torch.sigmoid(outputs).cpu().numpy() > 0.5  # 이진 분류로 변환
        print(torch.sigmoid(outputs),track_id)

tensor([[0.0031]], device='cuda:1') 1
tensor([[0.0022]], device='cuda:1') 2
tensor([[0.1099]], device='cuda:1') 3
tensor([[0.9999]], device='cuda:1') 35
tensor([[0.0017]], device='cuda:1') 49
tensor([[0.0080]], device='cuda:1') 5
