我想用python做一個程式:
1. 批量讀入照片
2. 依序顯示每張照片，並可以手動選取圖片上的點
3. 記錄點座標並寫成 .csv檔
4. .csv檔格式為 image_path, x, y

In [1]:
import cv2
import pandas as pd
import glob

# Initialize an empty list to store the data
data = []

# Define initial display scale factor
scale_factor = 1.0
min_scale = 0.1
max_scale = 3.0

# Callback function to record points
def select_point(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        # Calculate original coordinates based on current scale factor
        orig_x = int(x / scale_factor)
        orig_y = int(y / scale_factor)
        data.append((param, orig_x, orig_y))
        print(f"Point selected at: x={orig_x}, y={orig_y}")

# Load images from a directory (replace 'your_image_folder_path' with your folder path)
image_files = glob.glob('..\..\fantest_pic\pic_train_1127\*.png')  # Adjust file extension as needed

# Process each image
for img_path in image_files:
    # Read the original image
    img = cv2.imread(img_path)
    h, w = img.shape[:2]
    
    # Display the image with interactive scaling
    while True:
        # Resize image according to the current scale factor
        display_img = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)))
        
        # Show the image and set the callback
        cv2.imshow("Image", display_img)
        cv2.setMouseCallback("Image", select_point, img_path)
        
        print("Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.")
        key = cv2.waitKey(0)
        
        if key == ord('n'):
            break
        elif key == ord('+') and scale_factor < max_scale:
            scale_factor += 0.1
        elif key == ord('-') and scale_factor > min_scale:
            scale_factor -= 0.1
        elif key == ord('q'):  # Optional: press 'q' to quit early
            break
    
    if key == ord('q'):
        break

cv2.destroyAllWindows()

# Save the data to a CSV file
df = pd.DataFrame(data, columns=["image_path", "x", "y"])
df.to_csv("selected_points.csv", index=False)
print("Data saved to selected_points.csv")


Data saved to selected_points.csv


In [3]:
import cv2
import pandas as pd
import glob
import sys

# Initialize an empty list to store the data
data = []

# Define initial display scale factor
scale_factor = 1.0
min_scale = 0.1
max_scale = 3.0

# Global quit flag
quit_flag = False

# Callback function to record points
def select_point(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        # Calculate original coordinates based on current scale factor
        orig_x = int(x / scale_factor)
        orig_y = int(y / scale_factor)
        data.append((param, orig_x, orig_y))
        print(f"Point selected at: x={orig_x}, y={orig_y}")

# Load images from a directory (adjust the path)
# image_files = glob.glob(r'..\..\fantest_pic\pic_train\*.png')  # Adjust file extension as needed

image_files = glob.glob(r'D:\GitHub\0000WorkSpace\test1127\pic_train_100less\*.png')

if not image_files:
    print("No images found. Please check the path and try again.")
    sys.exit()

# Process each image
for img_path in image_files:
    if quit_flag:  # If quit_flag is True, exit the loop
        break
    
    # Read the original image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Failed to load image: {img_path}")
        continue
    h, w = img.shape[:2]
    
    # Display the image with interactive scaling
    while True:
        if quit_flag:  # If quit_flag is True, exit the loop
            break
        
        # Resize image according to the current scale factor
        display_img = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)))
        
        # Show the image and set the callback
        window_name = f"Image: {img_path}"
        cv2.imshow(window_name, display_img)
        cv2.setMouseCallback(window_name, select_point, img_path)
        
        print("Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.")
        key = cv2.waitKey(0) & 0xFF  # Handle multi-byte input
        
        if key == ord('n'):  # Move to the next image
            break
        elif key == ord('+') and scale_factor < max_scale:  # Zoom in
            scale_factor += 0.1
            print(f"Zoom in: Current scale factor = {scale_factor:.1f}")
        elif key == ord('-') and scale_factor > min_scale:  # Zoom out
            scale_factor -= 0.1
            print(f"Zoom out: Current scale factor = {scale_factor:.1f}")
        elif key == ord('q'):  # Quit the program
            quit_flag = True
            break
    
    cv2.destroyWindow(window_name)  # Close current image window

# Cleanup and save data if not quitting early
if not quit_flag:
    cv2.destroyAllWindows()

# Save the data to a CSV file
if data:
    df = pd.DataFrame(data, columns=["image_path", "x", "y"])
    df.to_csv("selected_points.csv", index=False)
    print("Data saved to selected_points.csv")
else:
    print("No points selected.")


Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Zoom out: Current scale factor = 0.9
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Zoom out: Current scale factor = 0.8
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Zoom out: Current scale factor = 0.7
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Zoom out: Current scale factor = 0.6
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Zoom in: Current scale factor = 0.7
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Zoom in: Current scale factor = 0.8
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image, 'q' to quit.
Point selected at: x=149

=================================

In [9]:
import pandas as pd
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class FanDataset(Dataset):
    def __init__(self, annotations_file, img_size=64):
        self.annotations = pd.read_csv(annotations_file)
        self.img_size = img_size
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_path = self.annotations.iloc[idx, 0]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (self.img_size, self.img_size)) / 2047.0  # Resize and normalize
        image = image.reshape(1, self.img_size, self.img_size)  # Reshape for CNN input

        # Label: x and y coordinates of the corner
        label = self.annotations.iloc[idx, 1:3].values.astype(np.float32)
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 使用自己的标注文件
dataset = FanDataset('selected_points_1126test.csv')
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [14]:
import torch.nn as nn
import torch.nn.functional as F

class CNNPointDetector(nn.Module):
    def __init__(self):
        super(CNNPointDetector, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)  # Output (x, y) coordinates
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.relu(F.max_pool2d(self.conv3(x), 2))
        x = x.view(-1, 64 * 8 * 8)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model
model = CNNPointDetector()

In [16]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 550
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}", 'about',(num_epochs-epoch)/1.5,'min left')

Epoch [1/550], Loss: 5239423.0000 about 366.6666666666667 min left
Epoch [2/550], Loss: 5011712.3000 about 366.0 min left
Epoch [3/550], Loss: 4704962.5000 about 365.3333333333333 min left
Epoch [4/550], Loss: 4042241.2500 about 364.6666666666667 min left
Epoch [5/550], Loss: 2753140.6750 about 364.0 min left
Epoch [6/550], Loss: 1119652.4625 about 363.3333333333333 min left
Epoch [7/550], Loss: 437938.6688 about 362.6666666666667 min left
Epoch [8/550], Loss: 593359.1500 about 362.0 min left
Epoch [9/550], Loss: 209908.4625 about 361.3333333333333 min left
Epoch [10/550], Loss: 242582.7313 about 360.6666666666667 min left
Epoch [11/550], Loss: 236099.0000 about 360.0 min left
Epoch [12/550], Loss: 182278.4500 about 359.3333333333333 min left
Epoch [13/550], Loss: 196909.8906 about 358.6666666666667 min left
Epoch [14/550], Loss: 177374.2500 about 358.0 min left
Epoch [15/550], Loss: 164212.3906 about 357.3333333333333 min left
Epoch [16/550], Loss: 163962.3875 about 356.6666666666667 

In [11]:
def evaluate(model, dataset, num_samples=30):
    model.eval()
    with torch.no_grad():
        for i in range(num_samples):
            img, label = dataset[i]
            img = img.unsqueeze(0)  # Add batch dimension
            
            # Model prediction
            prediction = model(img)
            print(f"True: {label.numpy()}, Predicted: {prediction.numpy().flatten()}")

# Test on some sample
evaluate(model, dataset)


True: [2374. 2429.], Predicted: [2347.8853 2483.734 ]
True: [2869. 2437.], Predicted: [2901.2551 2456.575 ]
True: [2907. 2384.], Predicted: [2895.3508 2492.4067]
True: [2924. 2309.], Predicted: [2816.6062 2343.421 ]
True: [2662. 1697.], Predicted: [2616.9502 1664.3225]
True: [2543. 1531.], Predicted: [2621.806  1521.9154]
True: [2411. 1355.], Predicted: [2428.1208 1337.626 ]
True: [2469. 1651.], Predicted: [2424.9668 1711.144 ]
True: [2491. 1765.], Predicted: [2388.5059 1757.7783]
True: [2492. 1862.], Predicted: [2412.645  1784.1642]
True: [2219. 2124.], Predicted: [2155.366  2132.2395]
True: [2112. 2129.], Predicted: [2128.967  2082.1562]
True: [2422. 2634.], Predicted: [2431.132  2592.3887]
True: [1992. 2117.], Predicted: [1971.51  2097.458]
True: [1164. 1824.], Predicted: [1221.2678 1787.3195]
True: [1252. 1934.], Predicted: [1197.6165 1944.7664]
True: [1394. 2077.], Predicted: [1368.2308 2023.23  ]
True: [2496. 2736.], Predicted: [2528.5356 2761.9802]
True: [1943. 2556.], Predicted

In [17]:
torch.save(model.state_dict(), './fan_corner_detector_1125.pth')

===================================================1114

In [7]:
import torch
model = CNNPointDetector()
model.load_state_dict(torch.load('fan_corner_detector_1126.pth'))
model.eval() 

NameError: name 'CNNPointDetector' is not defined

In [3]:
import pandas as pd
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [4]:
class FanDataset(Dataset):
    def __init__(self, annotations_file, img_size=64):
        self.annotations = pd.read_csv(annotations_file)
        self.img_size = img_size
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_path = self.annotations.iloc[idx, 0]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (self.img_size, self.img_size)) / 2047.0  # Resize and normalize
        image = image.reshape(1, self.img_size, self.img_size)  # Reshape for CNN input

        # Label: x and y coordinates of the corner
        label = self.annotations.iloc[idx, 1:3].values.astype(np.float32)
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 使用自己的标注文件
dataset = FanDataset('selected_points_2.csv')

In [7]:
def evaluate(model, dataset, num_samples=40):
    model.eval()
    with torch.no_grad():
        for i in range(num_samples):
            img, label = dataset[i]
            img = img.unsqueeze(0)  # Add batch dimension
            
            # Model prediction
            prediction = model(img)
            print(f"True: {label.numpy()}, Predicted: {prediction.numpy().flatten()}")

# Test on some sample
evaluate(model, dataset)

True: [1078. 1428.], Predicted: [1068.0464 1406.3397]
True: [1094. 1444.], Predicted: [1084.3955 1422.9469]
True: [1119. 1459.], Predicted: [1101.6335 1460.2899]
True: [1136. 1474.], Predicted: [1131.2876 1477.3566]
True: [1153. 1486.], Predicted: [1150.9265 1484.466 ]
True: [1169. 1501.], Predicted: [1159.7096 1487.786 ]
True: [1188. 1513.], Predicted: [1164.613 1512.206]
True: [1203. 1524.], Predicted: [1185.8314 1508.0747]
True: [1219. 1538.], Predicted: [1197.6195 1522.8973]
True: [1239. 1551.], Predicted: [1203.9678 1532.4856]
True: [1253. 1561.], Predicted: [1220.4241 1540.379 ]
True: [1266. 1571.], Predicted: [1253.1174 1555.8607]
True: [1281. 1583.], Predicted: [1251.1953 1553.0763]
True: [1296. 1591.], Predicted: [1284.8665 1588.1522]
True: [1311. 1601.], Predicted: [1304.0955 1608.6276]
True: [1318. 1608.], Predicted: [1322.063  1620.7413]
True: [1336. 1616.], Predicted: [1340.7534 1620.39  ]
True: [1348. 1626.], Predicted: [1347.2761 1613.0803]
True: [1358. 1633.], Predicted

## load

In [8]:
import torch
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F



class CNNPointDetector(nn.Module):
    def __init__(self):
        super(CNNPointDetector, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)  # Output (x, y) coordinates
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.relu(F.max_pool2d(self.conv3(x), 2))
        x = x.view(-1, 64 * 8 * 8)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model
model = CNNPointDetector()
# 初始化模型
model = CNNPointDetector()
model.load_state_dict(torch.load('fan_corner_detector_1126.pth'))  # 加载模型参数
model.train()

  model.load_state_dict(torch.load('fan_corner_detector_1126.pth'))  # 加载模型参数


CNNPointDetector(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)