In [1]:
import cv2
import time
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

center_size=300

class CustomDataset(Dataset):
    def __init__(self, image_folder, label_folder, transform):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.transform = transform

        self.image_files = sorted(os.listdir(image_folder))
        self.label_files = sorted(os.listdir(label_folder))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.image_files[idx])
        label_name = os.path.join(self.label_folder, self.label_files[idx])

        img = Image.open(img_name)
        if self.transform:
            img = self.transform(img)
        
        with open(label_name, 'r') as label_file:
            label = label_file.read()
            label = label.split()
            x = float(label[0])
            y = float(label[1])

        return img, torch.tensor([x, y])
    
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=30, kernel_size=10, stride=2, padding=1)
        self.conv2 = nn.Conv2d(in_channels=30, out_channels=60, kernel_size=3, stride=2, padding=1)

        self.pool = nn.MaxPool2d(2, 2)
        #self.dropout = nn.Dropout(p=0.1)
        # 필터도 학습 가능하도록 설정
        self.conv1.weight.requires_grad = True
        self.conv2.weight.requires_grad = True
        
        self.fc1 = nn.Linear(60 * 37 * 37, 1000)
        self.fc2 = nn.Linear(1000,500)
        self.fc3 = nn.Linear(500, 250)
        self.fc4 = nn.Linear(250, 2)  # 2개의 출력을 가지는 모델

    def forward(self, x):
        x = self.conv1(x) # 151
        x = self.pool((self.conv2(x)))
        
        x = x.view(-1, 60 * 37 * 37)
        x = self.fc1(x)
        #x = self.dropout(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        return x

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Resize((center_size, center_size)),
])

def cal(n=center_size, p=1, k=2, s=2):
    return ((n + 2 * p - k) / s + 1)

In [14]:
print(cal(n=300,k=10, s=2))
print(cal(n=147, k=3, s=2))
print(cal(n=74, k=4, s=2))

147.0
74.0
37.0


학습

In [17]:
#image_path = "./images"
image_path = "./canny"
label_path = "./labels"

custom_dataset = CustomDataset(image_folder=image_path, label_folder=label_path, transform=transform)
data_loader = DataLoader(dataset=custom_dataset, shuffle=True, num_workers=0, batch_size=32)

model = SimpleCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습
for epoch in range(60):
    n=0
    for i, (images, labels) in enumerate(data_loader):
        images = images.float().to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        n+=1
        print(f"Epoch {epoch+1}, Batch {n}, Loss: {loss.item()}")

torch.save(model.state_dict(), "face_center_net.pth")
print("학습 완료")

Epoch 1, Batch 1, Loss: 0.27441543340682983
Epoch 1, Batch 2, Loss: 100.61473846435547
Epoch 1, Batch 3, Loss: 119.94292449951172
Epoch 1, Batch 4, Loss: 5.714507579803467
Epoch 1, Batch 5, Loss: 1.0933119058609009
Epoch 1, Batch 6, Loss: 0.28139618039131165
Epoch 1, Batch 7, Loss: 6.815382957458496
Epoch 1, Batch 8, Loss: 6.184061527252197
Epoch 1, Batch 9, Loss: 3.754985809326172
Epoch 1, Batch 10, Loss: 12.527873992919922
Epoch 1, Batch 11, Loss: 1.3441247940063477
Epoch 1, Batch 12, Loss: 8.158312797546387
Epoch 1, Batch 13, Loss: 6.677063465118408
Epoch 1, Batch 14, Loss: 0.730346143245697
Epoch 1, Batch 15, Loss: 4.48703145980835
Epoch 1, Batch 16, Loss: 4.196167945861816
Epoch 1, Batch 17, Loss: 1.1028870344161987
Epoch 1, Batch 18, Loss: 1.0308284759521484
Epoch 1, Batch 19, Loss: 2.0976643562316895
Epoch 1, Batch 20, Loss: 1.7610297203063965
Epoch 1, Batch 21, Loss: 1.1019501686096191
Epoch 1, Batch 22, Loss: 1.0620453357696533
Epoch 1, Batch 23, Loss: 0.8107064962387085
Epoch

모델 결과 확인(train)

In [None]:
model = SimpleCNN()
model.load_state_dict(torch.load("face_center_net.pth"))
model.eval()

transform2 = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Resize((center_size, center_size))
])

pth = "./canny"
lth = "./labels"

img_list = sorted(os.listdir(pth))
lab_list = sorted(os.listdir(lth))

for i in range(len(img_list)):
    start_time = time.perf_counter()
    image_path = os.path.join(pth, img_list[i])
    label_path = os.path.join(lth, lab_list[i])
    print(img_list[i], lab_list[i])

    image = cv2.imread(image_path)
    image = cv2.resize(image, dsize=(center_size, center_size))

    #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    with open(label_path, 'r') as label_file:
        label = label_file.read()
        label = list(map(float, label.split()))
        real_x, real_y = round(center_size * label[0]), round(center_size * label[1])

    # 이미지를 PyTorch의 Tensor로 변환
    imagein = Image.open(image_path)
    imagein = transform2(imagein)

    with torch.no_grad():
        output = model(imagein)
        model_output=output.squeeze().tolist()
        model_output=[round(x * center_size) for x in model_output]

    cv2.circle(image, (real_x, real_y), 5, (0, 255, 0), 2)
    
    cv2.circle(image, (model_output[0],model_output[1]), 5, (0, 0, 255), 2)

    cv2.imshow("model", image)
    end_time = time.perf_counter()
    elapsed_time = end_time - start_time

    print(f"실행 시간: {elapsed_time:.3f} 초")
    key = cv2.waitKey(0)
    if key == ord('q'):
        break
cv2.destroyAllWindows()

모델 결과 확인(video)

In [None]:
model = SimpleCNN()
model.load_state_dict(torch.load("face_center_net.pth"))
model.eval()

transform2 = transforms.Compose([
    transforms.Grayscale(),  # 흑백 변환을 위해 추가
    transforms.ToTensor(),
    transforms.Resize((center_size, center_size)),
])


readvideo=cv2.VideoCapture("center_screen_recording.avi")
ret,image=readvideo.read()

while ret:
    start_time = time.perf_counter()
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image2 = cv2.Canny(image, 100, 200)
    image2 = np.array(image2)
    imagein = transform2(Image.fromarray(image2))

    key = cv2.waitKey(1)

    with torch.no_grad():
        output = model(imagein)
        model_output=output.squeeze().tolist()
        model_output=[round(x * center_size) for x in model_output]
    
    cv2.circle(image, (model_output[0],model_output[1]), 5, (255, 255, 255), 2)
    cv2.imshow("win", image)
    
    end_time = time.perf_counter()
    elapsed_time = end_time - start_time
    print(f"실행 시간: {elapsed_time:.3f} 초")

    if key == ord("q"):
        break

    ret,image=readvideo.read()

    

readvideo.release()
cv2.destroyAllWindows()

모델 결과 확인(Canny)

In [7]:
# 이미지 경로와 레이블 경로를 변수로 선언
pth = "./images"
lth = "./labels"

# 이미지 목록과 레이블 목록을 생성
img_list = sorted(os.listdir(pth))
lab_list = sorted(os.listdir(lth))

# 이미지와 레이블을 읽음
for i in img_list:
    name=i[:i.find('.')]
    im = cv2.imread(pth + '/' + name + ".jpg")
    im = cv2.resize(im, (center_size,center_size))
    im = cv2.Canny(im, 100, 200)
    with open(lth + '/' + name + ".txt","r") as lbr:
        ll=lbr.read()
        ll=list(ll.split())
    ll=[round(center_size*float(x)) for x in ll]    
    cv2.circle(im, (ll[0],ll[1]), 5, (0,255,0), 2)
    # 텍스트 추가
    cv2.putText(im, str(i), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
    # 이미지 출력
    cv2.imshow("chek", im)
    key=cv2.waitKey(0)
    if key==ord('q'):
        break

# 모든 창 닫음
cv2.destroyAllWindows()