In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, precision_score, f1_score
import cv2
import numpy as np
import os
from skimage.feature import graycomatrix, graycoprops
from sklearn.preprocessing import StandardScaler
from torchvision import datasets
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Fruits360Dataset(Dataset):
    def __init__(self, data_dir, transform=None, selected_classes=None):
        self.data_dir = data_dir
        self.transform = transform
        self.selected_classes = selected_classes
        self.image_paths = []
        self.labels = []
        
        for i, class_name in enumerate(self.selected_classes):
            class_dir = os.path.join(self.data_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.image_paths.append(img_path)
                self.labels.append(i)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        label = self.labels[idx]
        
        image = cv2.resize(image, (100, 100))
        
        if self.transform:
            image = self.transform(image)
        
        # pca_features = self.pca_transform(image)
        sift_features = self.extract_sift(image)
        glcm_features = self.extract_glcm(image)

        features = np.concatenate((sift_features, glcm_features), axis=0)
        
        return torch.tensor(features, dtype=torch.float32), label

    def extract_sift(self, image):
        sift = cv2.SIFT_create()
        keypoints, descriptors = sift.detectAndCompute(image, None)
        if descriptors is None:
            return np.zeros((128,))
        return np.mean(descriptors, axis=0)

    def extract_glcm(self, image):
        glcm = graycomatrix(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        return np.array([contrast, homogeneity])

    def pca_transform(self, image):
        flat_img = image.flatten().reshape(1, -1)
        pca = PCA(n_components=20)
        return pca.fit_transform(flat_img).flatten()

class CNNModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x


In [3]:
data_dir = "./fruits"
test_dir = "./test_fruits"
selected_classes = ["Banana 1", "Banana Lady Finger 1", "Banana Red 1"]
transform = transforms.Compose([transforms.ToTensor()])

dataset = Fruits360Dataset(data_dir, transform=None, selected_classes=selected_classes)
test_dataset = Fruits360Dataset(test_dir, transform=None, selected_classes=selected_classes)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

input_size = 128 + 2  # SIFT(128), GLCM(2)
num_classes = len(selected_classes)
model = CNNModel(input_size=input_size, num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [4]:
def train_model(model, train_loader, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')


In [5]:
def evaluate_model(model, train_loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in train_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())
    
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'F1 Score: {f1}')

In [6]:
train_model(model, train_loader, num_epochs=10)
evaluate_model(model, train_loader)

Epoch 1/10, Loss: 0.5057341373629041
Epoch 2/10, Loss: 0.19403033388985527
Epoch 3/10, Loss: 0.20189508414930768
Epoch 4/10, Loss: 0.09502367230339183
Epoch 5/10, Loss: 0.072396225689186
Epoch 6/10, Loss: 0.06587015695145561
Epoch 7/10, Loss: 0.07433514073491096
Epoch 8/10, Loss: 0.09086800155540307
Epoch 9/10, Loss: 0.04204151162670718
Epoch 10/10, Loss: 0.05186029197906868
Accuracy: 0.993006993006993
Precision: 0.9932900325582162
F1 Score: 0.993088905270743


In [8]:
print("Test score : ")
evaluate_model(model, test_loader)

Test score : 
Accuracy: 0.9256198347107438
Precision: 0.9311926843841737
F1 Score: 0.9257751037412055
