我想用python做一個程式:
1. 批量讀入照片
2.  依序顯示每張照片，並可以手動選取圖片上的點
3. 記錄點座標並寫成 .csv檔
4. .csv檔格式為 image_path, x, y

In [19]:
import cv2
import pandas as pd
import glob

# Initialize an empty list to store the data
data = []

# Define initial display scale factor
scale_factor = 1.0
min_scale = 0.1
max_scale = 3.0

# Callback function to record points
def select_point(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        # Calculate original coordinates based on current scale factor
        orig_x = int(x / scale_factor)
        orig_y = int(y / scale_factor)
        data.append((param, orig_x, orig_y))
        print(f"Point selected at: x={orig_x}, y={orig_y}")

# Load images from a directory (replace 'your_image_folder_path' with your folder path)
image_files = glob.glob('.\pic\*.png')  # Adjust file extension as needed

# Process each image
for img_path in image_files:
    # Read the original image
    img = cv2.imread(img_path)
    h, w = img.shape[:2]
    
    # Display the image with interactive scaling
    while True:
        # Resize image according to the current scale factor
        display_img = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)))
        
        # Show the image and set the callback
        cv2.imshow("Image", display_img)
        cv2.setMouseCallback("Image", select_point, img_path)
        
        print("Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.")
        key = cv2.waitKey(0)
        
        if key == ord('n'):
            break
        elif key == ord('+') and scale_factor < max_scale:
            scale_factor += 0.1
        elif key == ord('-') and scale_factor > min_scale:
            scale_factor -= 0.1
        elif key == ord('q'):  # Optional: press 'q' to quit early
            break
    
    if key == ord('q'):
        break

cv2.destroyAllWindows()

# Save the data to a CSV file
df = pd.DataFrame(data, columns=["image_path", "x", "y"])
df.to_csv("selected_points.csv", index=False)
print("Data saved to selected_points.csv")


Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=874, y=1251
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=759, y=894
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=885, y=1075
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=897, y=1095
Click on the i

=================================

In [20]:
import pandas as pd
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class FanDataset(Dataset):
    def __init__(self, annotations_file, img_size=64):
        self.annotations = pd.read_csv(annotations_file)
        self.img_size = img_size
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_path = self.annotations.iloc[idx, 0]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (self.img_size, self.img_size)) / 2047.0  # Resize and normalize
        image = image.reshape(1, self.img_size, self.img_size)  # Reshape for CNN input

        # Label: x and y coordinates of the corner
        label = self.annotations.iloc[idx, 1:3].values.astype(np.float32)
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 使用自己的标注文件
dataset = FanDataset('selected_points.csv')
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [21]:
import torch.nn as nn
import torch.nn.functional as F

class CNNPointDetector(nn.Module):
    def __init__(self):
        super(CNNPointDetector, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)  # Output (x, y) coordinates
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.relu(F.max_pool2d(self.conv3(x), 2))
        x = x.view(-1, 64 * 8 * 8)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model
model = CNNPointDetector()

In [24]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

Epoch [1/1000], Loss: 1184355.4375
Epoch [2/1000], Loss: 1088865.6250
Epoch [3/1000], Loss: 991240.9375
Epoch [4/1000], Loss: 861901.9375
Epoch [5/1000], Loss: 721252.0938
Epoch [6/1000], Loss: 553222.9688
Epoch [7/1000], Loss: 366246.7500
Epoch [8/1000], Loss: 197410.6953
Epoch [9/1000], Loss: 73881.1680
Epoch [10/1000], Loss: 31509.8867
Epoch [11/1000], Loss: 81084.6836
Epoch [12/1000], Loss: 158630.5703
Epoch [13/1000], Loss: 154425.6719
Epoch [14/1000], Loss: 84663.8066
Epoch [15/1000], Loss: 41562.8184
Epoch [16/1000], Loss: 27468.2637
Epoch [17/1000], Loss: 37901.6113
Epoch [18/1000], Loss: 52145.6445
Epoch [19/1000], Loss: 56986.0820
Epoch [20/1000], Loss: 57230.7695
Epoch [21/1000], Loss: 49032.2188
Epoch [22/1000], Loss: 37042.7266
Epoch [23/1000], Loss: 28662.0508
Epoch [24/1000], Loss: 27054.1494
Epoch [25/1000], Loss: 31395.0049
Epoch [26/1000], Loss: 37415.1602
Epoch [27/1000], Loss: 35384.4668
Epoch [28/1000], Loss: 31872.3818
Epoch [29/1000], Loss: 29880.4834
Epoch [30/1

In [25]:
def evaluate(model, dataset, num_samples=30):
    model.eval()
    with torch.no_grad():
        for i in range(num_samples):
            img, label = dataset[i]
            img = img.unsqueeze(0)  # Add batch dimension
            
            # Model prediction
            prediction = model(img)
            print(f"True: {label.numpy()}, Predicted: {prediction.numpy().flatten()}")

# Test on some samples
evaluate(model, dataset)


True: [ 874. 1251.], Predicted: [ 873.7272 1187.0973]
True: [759. 894.], Predicted: [782.4226  943.75964]
True: [ 885. 1075.], Predicted: [ 887.7649 1076.9205]
True: [ 897. 1095.], Predicted: [ 898.4181 1092.9603]
True: [ 909. 1112.], Predicted: [ 909.65405 1109.308  ]
True: [ 922. 1131.], Predicted: [ 923.1651 1131.4318]
True: [ 932. 1150.], Predicted: [ 930.1482 1141.2787]
True: [ 947. 1165.], Predicted: [ 946.23413 1166.8523 ]
True: [ 957. 1184.], Predicted: [ 960.1891 1186.9962]
True: [ 967. 1198.], Predicted: [ 965.09576 1194.1664 ]
True: [ 981. 1217.], Predicted: [ 977.06464 1210.7899 ]
True: [ 988. 1232.], Predicted: [ 989.1035 1227.6542]
True: [774. 915.], Predicted: [778.44006 933.1844 ]
True: [ 999. 1247.], Predicted: [1000.2354 1245.9167]
True: [1007. 1262.], Predicted: [1010.62305 1266.0848 ]
True: [1015. 1277.], Predicted: [1017.8845 1272.5126]
True: [1028. 1292.], Predicted: [1025.0977 1286.0288]
True: [1038. 1305.], Predicted: [1033.9382 1300.0475]
True: [1044. 1318.], P

In [26]:
torch.save(model.state_dict(), './fan_corner_detector.pth')

In [27]:
model = CNNPointDetector()  # 重新初始化模型
model.load_state_dict(torch.load('fan_corner_detector.pth'))
model.eval()  # 切换到评估模式

  model.load_state_dict(torch.load('fan_corner_detector.pth'))


CNNPointDetector(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)