我想用python做一個程式:
1. 批量讀入照片
2. 依序顯示每張照片，並可以手動選取圖片上的點
3. 記錄點座標並寫成 .csv檔
4. .csv檔格式為 image_path, x, y

In [2]:
import cv2
import pandas as pd
import glob

# Initialize an empty list to store the data
data = []

# Define initial display scale factor
scale_factor = 1.0
min_scale = 0.1
max_scale = 3.0

# Callback function to record points
def select_point(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        # Calculate original coordinates based on current scale factor
        orig_x = int(x / scale_factor)
        orig_y = int(y / scale_factor)
        data.append((param, orig_x, orig_y))
        print(f"Point selected at: x={orig_x}, y={orig_y}")

# Load images from a directory (replace 'your_image_folder_path' with your folder path)
image_files = glob.glob('.\pic_3\*.png')  # Adjust file extension as needed

# Process each image
for img_path in image_files:
    # Read the original image
    img = cv2.imread(img_path)
    h, w = img.shape[:2]
    
    # Display the image with interactive scaling
    while True:
        # Resize image according to the current scale factor
        display_img = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)))
        
        # Show the image and set the callback
        cv2.imshow("Image", display_img)
        cv2.setMouseCallback("Image", select_point, img_path)
        
        print("Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.")
        key = cv2.waitKey(0)
        
        if key == ord('n'):
            break
        elif key == ord('+') and scale_factor < max_scale:
            scale_factor += 0.1
        elif key == ord('-') and scale_factor > min_scale:
            scale_factor -= 0.1
        elif key == ord('q'):  # Optional: press 'q' to quit early
            break
    
    if key == ord('q'):
        break

cv2.destroyAllWindows()

# Save the data to a CSV file
df = pd.DataFrame(data, columns=["image_path", "x", "y"])
df.to_csv("selected_points.csv", index=False)
print("Data saved to selected_points.csv")


Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=1151, y=1486
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=1249, y=1561
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=1336, y=1619
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Point selected at: x=1401, y=1658
Click on the image to select a point. Use '+' to zoom in, '-' to zoom out, 'n' for next image.
Click on 

=================================

In [18]:
import pandas as pd
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class FanDataset(Dataset):
    def __init__(self, annotations_file, img_size=64):
        self.annotations = pd.read_csv(annotations_file)
        self.img_size = img_size
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_path = self.annotations.iloc[idx, 0]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (self.img_size, self.img_size)) / 2047.0  # Resize and normalize
        image = image.reshape(1, self.img_size, self.img_size)  # Reshape for CNN input

        # Label: x and y coordinates of the corner
        label = self.annotations.iloc[idx, 1:3].values.astype(np.float32)
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 使用自己的标注文件
dataset = FanDataset('selected_points_1114.csv')
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [1]:
import torch.nn as nn
import torch.nn.functional as F

class CNNPointDetector(nn.Module):
    def __init__(self):
        super(CNNPointDetector, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)  # Output (x, y) coordinates
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.relu(F.max_pool2d(self.conv3(x), 2))
        x = x.view(-1, 64 * 8 * 8)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model
model = CNNPointDetector()

In [22]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1500
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}", 'about',(1499-epoch)/12,'min left')

Epoch [1/1500], Loss: 76365.6191 about 124.91666666666667 min left
Epoch [2/1500], Loss: 70663.3320 about 124.83333333333333 min left
Epoch [3/1500], Loss: 67505.6562 about 124.75 min left
Epoch [4/1500], Loss: 56694.7188 about 124.66666666666667 min left
Epoch [5/1500], Loss: 62997.4082 about 124.58333333333333 min left
Epoch [6/1500], Loss: 64038.4688 about 124.5 min left
Epoch [7/1500], Loss: 58909.6016 about 124.41666666666667 min left
Epoch [8/1500], Loss: 55721.2422 about 124.33333333333333 min left
Epoch [9/1500], Loss: 59215.7500 about 124.25 min left
Epoch [10/1500], Loss: 58352.9824 about 124.16666666666667 min left
Epoch [11/1500], Loss: 56927.4277 about 124.08333333333333 min left
Epoch [12/1500], Loss: 57815.1992 about 124.0 min left
Epoch [13/1500], Loss: 58136.8535 about 123.91666666666667 min left
Epoch [14/1500], Loss: 57067.2070 about 123.83333333333333 min left
Epoch [15/1500], Loss: 56808.9922 about 123.75 min left
Epoch [16/1500], Loss: 56690.3066 about 123.6666666

In [25]:
def evaluate(model, dataset, num_samples=30):
    model.eval()
    with torch.no_grad():
        for i in range(num_samples):
            img, label = dataset[i]
            img = img.unsqueeze(0)  # Add batch dimension
            
            # Model prediction
            prediction = model(img)
            print(f"True: {label.numpy()}, Predicted: {prediction.numpy().flatten()}")

# Test on some sample
evaluate(model, dataset)


True: [ 874. 1251.], Predicted: [ 873.7272 1187.0973]
True: [759. 894.], Predicted: [782.4226  943.75964]
True: [ 885. 1075.], Predicted: [ 887.7649 1076.9205]
True: [ 897. 1095.], Predicted: [ 898.4181 1092.9603]
True: [ 909. 1112.], Predicted: [ 909.65405 1109.308  ]
True: [ 922. 1131.], Predicted: [ 923.1651 1131.4318]
True: [ 932. 1150.], Predicted: [ 930.1482 1141.2787]
True: [ 947. 1165.], Predicted: [ 946.23413 1166.8523 ]
True: [ 957. 1184.], Predicted: [ 960.1891 1186.9962]
True: [ 967. 1198.], Predicted: [ 965.09576 1194.1664 ]
True: [ 981. 1217.], Predicted: [ 977.06464 1210.7899 ]
True: [ 988. 1232.], Predicted: [ 989.1035 1227.6542]
True: [774. 915.], Predicted: [778.44006 933.1844 ]
True: [ 999. 1247.], Predicted: [1000.2354 1245.9167]
True: [1007. 1262.], Predicted: [1010.62305 1266.0848 ]
True: [1015. 1277.], Predicted: [1017.8845 1272.5126]
True: [1028. 1292.], Predicted: [1025.0977 1286.0288]
True: [1038. 1305.], Predicted: [1033.9382 1300.0475]
True: [1044. 1318.], P

In [23]:
torch.save(model.state_dict(), './fan_corner_detector_1114.pth')

===================================================1114

In [6]:
import torch
model = CNNPointDetector()
model.load_state_dict(torch.load('fan_corner_detector_1114.pth'))
model.eval() 

  model.load_state_dict(torch.load('fan_corner_detector_1114.pth'))


CNNPointDetector(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)

In [3]:
import pandas as pd
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [4]:
class FanDataset(Dataset):
    def __init__(self, annotations_file, img_size=64):
        self.annotations = pd.read_csv(annotations_file)
        self.img_size = img_size
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_path = self.annotations.iloc[idx, 0]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (self.img_size, self.img_size)) / 2047.0  # Resize and normalize
        image = image.reshape(1, self.img_size, self.img_size)  # Reshape for CNN input

        # Label: x and y coordinates of the corner
        label = self.annotations.iloc[idx, 1:3].values.astype(np.float32)
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# 使用自己的标注文件
dataset = FanDataset('selected_points_2.csv')

In [7]:
def evaluate(model, dataset, num_samples=40):
    model.eval()
    with torch.no_grad():
        for i in range(num_samples):
            img, label = dataset[i]
            img = img.unsqueeze(0)  # Add batch dimension
            
            # Model prediction
            prediction = model(img)
            print(f"True: {label.numpy()}, Predicted: {prediction.numpy().flatten()}")

# Test on some sample
evaluate(model, dataset)

True: [1078. 1428.], Predicted: [1068.0464 1406.3397]
True: [1094. 1444.], Predicted: [1084.3955 1422.9469]
True: [1119. 1459.], Predicted: [1101.6335 1460.2899]
True: [1136. 1474.], Predicted: [1131.2876 1477.3566]
True: [1153. 1486.], Predicted: [1150.9265 1484.466 ]
True: [1169. 1501.], Predicted: [1159.7096 1487.786 ]
True: [1188. 1513.], Predicted: [1164.613 1512.206]
True: [1203. 1524.], Predicted: [1185.8314 1508.0747]
True: [1219. 1538.], Predicted: [1197.6195 1522.8973]
True: [1239. 1551.], Predicted: [1203.9678 1532.4856]
True: [1253. 1561.], Predicted: [1220.4241 1540.379 ]
True: [1266. 1571.], Predicted: [1253.1174 1555.8607]
True: [1281. 1583.], Predicted: [1251.1953 1553.0763]
True: [1296. 1591.], Predicted: [1284.8665 1588.1522]
True: [1311. 1601.], Predicted: [1304.0955 1608.6276]
True: [1318. 1608.], Predicted: [1322.063  1620.7413]
True: [1336. 1616.], Predicted: [1340.7534 1620.39  ]
True: [1348. 1626.], Predicted: [1347.2761 1613.0803]
True: [1358. 1633.], Predicted