# 

In [1]:
import os
import torch
from torchvision import transforms
from PIL import Image
import numpy as np

In [2]:
def read_images_to_tensor(directory):
    transform = transforms.ToTensor()
    image_tensors = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(directory, filename)
            image = Image.open(image_path)
            image_tensor = transform(image)
            image_tensors.append(image_tensor)
    return torch.stack(image_tensors)

In [51]:
images= read_images_to_tensor("./data/test")

In [4]:
images.shape

torch.Size([1, 4, 1064, 1900])

In [5]:
def compute_vertical_gradient(images_tensor):
    kernel = (
        torch.tensor([[1, 0, -1], [1, 0, -1], [1, 0, -1]], dtype=torch.float32)
        .unsqueeze(0)
        .unsqueeze(0)
    )
    gradient = torch.nn.functional.conv2d(images_tensor, kernel, padding=1)
    gradient_image = gradient.abs().sum(dim=1, keepdim=True)
    return gradient, gradient_image

In [6]:
ver_grad,ver_grad_img = compute_vertical_gradient(images)

RuntimeError: Given groups=1, weight of size [1, 1, 3, 3], expected input[1, 4, 1064, 1900] to have 1 channels, but got 4 channels instead

In [9]:
def compute_vertical_gradient(images_tensor):
    channels = images_tensor.shape[1]
    kernel = (
        torch.tensor([[1, 0, -1], [1, 0, -1], [1, 0, -1]], dtype=torch.float32)
        .unsqueeze(0)
        .unsqueeze(0)
    )
    kernel = kernel.repeat( 1,channels, 1, 1)  # Repeat the kernel for each channel
    gradient = torch.nn.functional.conv2d(images_tensor, kernel, padding=1)
    gradient_image = gradient.abs().sum(dim=1, keepdim=True)
    return gradient, gradient_image

In [10]:
ver_grad,ver_grad_img = compute_vertical_gradient(images)

In [11]:
ver_grad.shape

torch.Size([1, 1, 1064, 1900])

In [12]:
ver_grad_img.shape

torch.Size([1, 1, 1064, 1900])

In [13]:
def save_images(tensor, directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    to_pil = transforms.ToPILImage()
    for i, img_tensor in enumerate(tensor):
        img = to_pil(img_tensor)
        img.save(os.path.join(directory, f"image_{i}.png"))

In [14]:
save_images(ver_grad_img,"./fig")

In [15]:
def compute_horizontal_gradient(images_tensor):
    channels = images_tensor.shape[1]
    kernel = (
        torch.tensor([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=torch.float32)
        .unsqueeze(0)
        .unsqueeze(0)
    )
    kernel = kernel.repeat(1, channels, 1, 1)  # Repeat the kernel for each channel
    gradient = torch.nn.functional.conv2d(images_tensor, kernel, padding=1)
    gradient_image = gradient.abs().sum(dim=1, keepdim=True)
    return gradient, gradient_image

In [16]:
ver_grad,ver_grad_img = compute_horizontal_gradient(images)

In [17]:
save_images(ver_grad_img,"./fig")

In [18]:
ver_grad,ver_grad_img = compute_vertical_gradient(images)

In [19]:
hor_grad,hor_grad_img = compute_horizontal_gradient(images)

In [20]:
grad_img=torch.cat([ver_grad_img,hor_grad_img],dim=0)

In [21]:
grad_img.shape

torch.Size([2, 1, 1064, 1900])

In [22]:
save_images(grad_img,"./fig")

In [23]:
def unpickle(file):
        import pickle
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return dict

In [25]:
data=unpickle("./data/cifar-10-batches-py/data_batch_1")

In [27]:
type(data)

dict

In [28]:
data.keys()

dict_keys([b'batch_label', b'labels', b'data', b'filenames'])

In [30]:
image_data = data[b'data']

In [31]:
data_tensor = torch.tensor(image_data, dtype=torch.float32)

In [32]:
data_tensor.shape

torch.Size([10000, 3072])

In [33]:
data_tensor=data_tensor.reshape(data_tensor.shape[0],3,32,32)

In [34]:
data_tensor.shape

torch.Size([10000, 3, 32, 32])

In [35]:
save_images(data_tensor,"./fig")

In [36]:
labels = data[b'labels']

In [37]:
labels = torch.tensor(labels)

In [38]:
labels.shape

torch.Size([10000])

In [39]:
labels[:10]

tensor([6, 9, 9, 4, 1, 1, 2, 7, 8, 3])

In [7]:
def read_cifar10_to_tensor(directory):
    def unpickle(file):
        import pickle

        with open(file, "rb") as fo:
            dict = pickle.load(fo, encoding="bytes")
        return dict

    def extract_data_and_labels(batch):
        data = batch[b"data"]
        labels = batch[b"labels"]
        return data, labels

    all_images = []
    all_labels = []
    test_images = []
    test_labels = []
    for filename in os.listdir(directory):
        if filename.startswith("data_batch"):
            data = unpickle(os.path.join(directory, filename))
            images, labels = extract_data_and_labels(data)
            images = torch.tensor(images, dtype=torch.float32).reshape(-1, 3, 32, 32)
            labels = torch.tensor(labels)
            all_images.append(images)
            all_labels.append(labels)
        if filename.startswith("test_batch"):
            data = unpickle(os.path.join(directory, filename))
            images, labels = extract_data_and_labels(data)
            images = torch.tensor(images, dtype=torch.float32).reshape(-1, 3, 32, 32)
            labels = torch.tensor(labels)
            test_images.append(images)
            test_labels.append(labels)
    all_images = torch.cat(all_images, dim=0)
    all_labels = torch.cat(all_labels, dim=0)
    test_images = test_images[0]
    test_labels = test_labels[0]
    return all_images, all_labels, test_images, test_labels

In [8]:
train_images,train_labels,test_images,test_labels = read_cifar10_to_tensor("./data/cifar-10-batches-py/")

In [10]:
import cv2

In [11]:
def calcSiftFeature(img):
    # 设置图像sift特征关键点最大为200
    sift = cv2.SIFT_create()
    # 计算图片的特征点和特征点描述
    # Ensure the input tensor is on CPU and convert to numpy array
    img = img.cpu().numpy()

    # Initialize SIFT detector
    img = np.transpose(img, (1, 2, 0))
    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype("uint8")
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # print(img.shape)
    # break

    # Detect SIFT features
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    return descriptors

In [12]:
# 计算词袋
def learnVocabulary(features):
    wordCnt = 50
    # criteria表示迭代停止的模式   eps---精度0.1，max_iter---满足超过最大迭代次数20
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 0.1)
    # 得到k-means聚类的初始中心点
    flags = cv2.KMEANS_RANDOM_CENTERS
    # 标签，中心 = kmeans(输入数据（特征)、聚类的个数K,预设标签，聚类停止条件、重复聚类次数、初始聚类中心点
    compactness, labels, centers = cv2.kmeans(
        features, wordCnt, None, criteria, 20, flags
    )
    return centers


# 计算特征向量
def calcFeatVec(features, centers):
    featVec = np.zeros((1, 50))
    for i in range(0, features.shape[0]):
        # 第i张图片的特征点
        fi = features[i]
        diffMat = np.tile(fi, (50, 1)) - centers
        # axis=1按行求和，即求特征到每个中心点的距离
        sqSum = (diffMat**2).sum(axis=1)
        dist = sqSum**0.5
        # 升序排序
        sortedIndices = dist.argsort()
        # 取出最小的距离，即找到最近的中心点
        idx = sortedIndices[0]
        # 该中心点对应+1
        featVec[0][idx] += 1
    return featVec


# 建立词袋
def build_center(images):
    features = np.float32([]).reshape(0, 128)
    for idx in range(images.shape[0]):
        img = images[idx]
        # 获取图片sift特征点
        img_f = calcSiftFeature(img)
        # 特征点加入训练数据
        # print(img_f.shape)
        # break
        if img_f is None:
            continue
        features = np.append(features, img_f, axis=0)
    # 训练集的词袋
    centers = learnVocabulary(features)
    # #将词袋保存
    filename = "./svm_centers.npy"
    np.save(filename, centers)
    print("词袋:", centers.shape)
    return centers


# 计算训练集图片特征向量
def cal_vec(images):
    centers = np.load("./svm_centers.npy")
    data_vec = np.float32([]).reshape(0, 50)  # 存放训练集图片的特征
    labels = np.float32([])
    # cate=[path+'/'+x for x in os.listdir(path) if os.path.isdir(path+'/'+x)]
    for idx in range(images.shape[0]):
        # 获取图片sift特征点
        # print(idx)
        img_f = calcSiftFeature(images[idx])
        if img_f is None:
            continue
        img_vec = calcFeatVec(img_f, centers)
        data_vec = np.append(data_vec, img_vec, axis=0)
        labels = np.append(labels, idx)
    print("data_vec:", data_vec.shape)
    print("image features vector done!")
    return data_vec, labels

In [13]:
build_center(train_images)

词袋: (50, 128)


array([[16.843645 , 15.270925 , 15.28492  , ...,  4.868086 ,  4.606535 ,
         6.266907 ],
       [41.428413 , 24.268282 , 16.101927 , ...,  5.388106 ,  4.769053 ,
         6.2546806],
       [23.229614 , 49.211975 , 43.35914  , ..., 13.342103 , 14.902228 ,
        18.714949 ],
       ...,
       [18.13149  , 52.194286 , 88.70355  , ..., 10.448127 , 11.786611 ,
        12.380846 ],
       [16.309874 , 16.54392  , 18.711662 , ...,  3.780453 ,  3.5309558,
         8.99455  ],
       [ 8.055738 ,  6.537564 ,  8.386214 , ...,  4.802091 ,  5.3674326,
         7.7283006]], dtype=float32)

In [43]:
data_vec,labels = cal_vec(train_images)

data_vec: (49945, 50)
image features vector done!


In [45]:
y_train = train_labels

In [47]:
data_vec.shape

(49945, 50)

In [46]:
# train the svm model
SVM_Train(data_vec,y_train)

ValueError: Found input variables with inconsistent numbers of samples: [49945, 50000]

In [14]:
import torchvision

In [16]:
from torchvision import transforms

In [17]:
transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

In [18]:
trainset = torchvision.datasets.CIFAR10(
        root="./data", train=True, download=True, transform=transform
    )

Files already downloaded and verified


In [21]:
batch_size=16

In [22]:
trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=batch_size, shuffle=True
    )

In [23]:
for d in trainloader:
    a=d
    break

In [26]:
a[0].shape

torch.Size([16, 3, 32, 32])

In [31]:
a[0][0].shape

torch.Size([3, 32, 32])

In [32]:
trainset = torchvision.datasets.CIFAR10(
        root="./data", train=True, download=True, transform=transform
    )

Files already downloaded and verified


In [35]:
trainset.data.shape

(50000, 32, 32, 3)

In [38]:
trainset.targets.shape

AttributeError: 'list' object has no attribute 'shape'

In [40]:
from sklearn import svm

ImportError: cannot import name 'joblib' from 'sklearn.externals' (C:\Users\Admin\miniconda3\envs\data_process\lib\site-packages\sklearn\externals\__init__.py)

In [41]:
#训练SVM分类器
def SVM_Train(data_vec,labels):
    #设置SVM模型参数
    clf = svm.SVC(decision_function_shape='ovo')
    #利用x_train,y_train训练SVM分类器，获得参数
    clf.fit(data_vec,labels)
    joblib.dump(clf, "e:/flowers/svm/svm_model.m")


In [42]:
# train the svm model
SVM_Train(data_vec,y_train)

NameError: name 'data_vec' is not defined

In [None]:
if __name__ == "__main__":
    train_path = 'e:/flowers/train'
    test_path = 'e:/flowers/test'
    #A建立词袋
    build_center(train_path)
    #构建训练集特征向量
    data_vec,labels = cal_vec(train_path)
    #将特征向量和标签输入到SVM分类器中
    SVM_Train(data_vec,y_train)
    print(x_train.shape)
    print(y_train)
    #计算测试集的正确率
    acc,res = SVM_Test(test_path)
    print(acc)
    print(res)
