In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, ToTensor, Resize, Normalize
from transformers import ViTModel
from torch.optim import Adam
from PIL import Image
import numpy as np
import os

# 1. تعریف Dataset برای Triplet ها
class TripletDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.classes = dataset.classes
        self.class_to_indices = {cls: np.where(np.array(dataset.targets) == idx)[0] for idx, cls in enumerate(self.classes)}
        self.transform = Compose([
            Resize((224, 224)),  # تغییر اندازه تصاویر به 224x224
            ToTensor(),  # تبدیل تصاویر به تانسور
            Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # نرمال سازی
        ])

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        anchor_path, anchor_label = self.dataset.samples[idx]  # مسیر فایل و برچسب

        # انتخاب Positive (همان کلاس)
        positive_idx = np.random.choice(self.class_to_indices[self.classes[anchor_label]])
        positive_path, _ = self.dataset.samples[positive_idx]

        # انتخاب Negative (کلاس متفاوت)
        negative_label = np.random.choice([cls for cls in self.classes if cls != self.classes[anchor_label]])
        negative_idx = np.random.choice(self.class_to_indices[negative_label])
        negative_path, _ = self.dataset.samples[negative_idx]

        # پیش پردازش تصاویر
        anchor_image = self.transform(Image.open(anchor_path).convert("RGB"))
        positive_image = self.transform(Image.open(positive_path).convert("RGB"))
        negative_image = self.transform(Image.open(negative_path).convert("RGB"))

        return anchor_image, positive_image, negative_image

# 2. تعریف Attention Layer
class AttentionLayer(nn.Module):
    def __init__(self, feature_dim):
        super(AttentionLayer, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(feature_dim, feature_dim // 2),
            nn.ReLU(),
            nn.Linear(feature_dim // 2, 1),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        # x shape: (batch_size, sequence_length, feature_dim)
        attention_weights = self.attention(x)  # (batch_size, sequence_length, 1)
        weighted_features = x * attention_weights  # (batch_size, sequence_length, feature_dim)
        return weighted_features.sum(dim=1)  # (batch_size, feature_dim)

# 3. تعریف مدل با Attention Mechanism
class ViTWithAttention(nn.Module):
    def __init__(self, model_name):
        super(ViTWithAttention, self).__init__()
        self.vit = ViTModel.from_pretrained(model_name)
        self.attention = AttentionLayer(self.vit.config.hidden_size)

    def forward(self, x):
        outputs = self.vit(x)
        last_hidden_state = outputs.last_hidden_state  # (batch_size, sequence_length, hidden_size)
        features = self.attention(last_hidden_state)  # (batch_size, hidden_size)
        return features
    
    def get_embedding(self, x):
        # استخراج ویژگی ها (embeddings)
        with torch.no_grad():
            outputs = self.vit(x)
            return outputs.last_hidden_state.mean(dim=1)  # میانگین گیری از توکن ها

# 4. تعریف Triplet Loss
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        distance_positive = F.pairwise_distance(anchor, positive)
        distance_negative = F.pairwise_distance(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)
        return losses.mean()

# 5. بارگذاری داده ها از پوشه ها
data_dir = 'f:/Meysam-Khodarahi/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/src/dataset'
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")
test_dir = os.path.join(data_dir, "test")

# بارگذاری داده ها با ImageFolder
train_dataset = ImageFolder(train_dir)
val_dataset = ImageFolder(val_dir)
test_dataset = ImageFolder(test_dir)

# ایجاد TripletDataset
train_triplet_dataset = TripletDataset(train_dataset)
train_loader = DataLoader(train_triplet_dataset, batch_size=32, shuffle=True)

# 6. تعریف مدل، Optimizer و Loss Function
model_name = "google/vit-base-patch16-224"
model = ViTWithAttention(model_name)
optimizer = Adam(model.parameters(), lr=1e-5)
triplet_loss = TripletLoss(margin=1.0)

# 7. آموزش مدل و محاسبه Accuracy
num_epochs = 10
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0
    correct = 0
    total = 0

    for batch in train_loader:
        anchor, positive, negative = batch

        # استخراج ویژگی ها (embeddings)
        anchor_features = model(anchor)
        positive_features = model(positive)
        negative_features = model(negative)

        # محاسبه ی loss
        loss = triplet_loss(anchor_features, positive_features, negative_features)
        epoch_loss += loss.item()

        # محاسبه accuracy
        distance_positive = F.pairwise_distance(anchor_features, positive_features)
        distance_negative = F.pairwise_distance(anchor_features, negative_features)
        predictions = (distance_positive < distance_negative).float()  # 1 اگر درست، 0 اگر نادرست
        correct += predictions.sum().item()
        total += predictions.size(0)

        # به روزرسانی مدل
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    accuracy = correct / total
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_loader)}, Accuracy: {accuracy * 100:.2f}%")

# 8. ذخیره‌سازی مدل Fine-Tuned
torch.save(model.state_dict(), "f:/Meysam-Khodarahi/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/vitWithAttentionState.pth")
torch.save(model, "f:/Meysam-Khodarahi/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/vitWithAttention.pth")

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.2731054686009884, Accuracy: 89.33%
Epoch 2, Loss: 0.2093217857182026, Accuracy: 92.89%
Epoch 3, Loss: 0.2727052979171276, Accuracy: 90.67%
Epoch 4, Loss: 0.22591569274663925, Accuracy: 90.67%
Epoch 5, Loss: 0.1403079628944397, Accuracy: 93.33%
Epoch 6, Loss: 0.14155160263180733, Accuracy: 95.11%
Epoch 7, Loss: 0.2088945060968399, Accuracy: 92.44%
Epoch 8, Loss: 0.1017427071928978, Accuracy: 96.44%
Epoch 9, Loss: 0.2026631087064743, Accuracy: 92.44%
Epoch 10, Loss: 0.31031903252005577, Accuracy: 94.67%


In [None]:
import torch
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader

# فرض بر این است که test_loader داده‌های تست را می‌گیرد
# و مدل قبلاً آموزش دیده است.

def evaluate_triplet_model(model, test_loader, device):
    model.eval()  # مدل را در حالت ارزیابی قرار می‌دهیم.
    
    y_true = []  # برای ذخیره برچسب‌های واقعی
    y_pred = []  # برای ذخیره پیش‌بینی‌ها
    
    with torch.no_grad():  # بدون محاسبه گرادیان‌ها
        for anchor, positive, negative in test_loader:
            # انتقال داده‌ها به دستگاه (GPU یا CPU)
            anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)

            # استخراج ویژگی‌ها (embeddings)
            anchor_features = model.get_embedding(anchor)
            positive_features = model.get_embedding(positive)
            negative_features = model.get_embedding(negative)

            # محاسبه فاصله‌ها
            distance_positive = F.pairwise_distance(anchor_features, positive_features)
            distance_negative = F.pairwise_distance(anchor_features, negative_features)

            # پیش‌بینی‌ها (پیش‌بینی 1 اگر فاصله مثبت کمتر از منفی باشد)
            predictions = (distance_positive < distance_negative).float()

            # اضافه کردن برچسب‌ها و پیش‌بینی‌ها به لیست‌ها
            y_true.extend([1] * len(anchor))  # جفت‌های مثبت
            y_pred.extend(predictions.cpu().numpy())

    # تبدیل لیست‌ها به آرایه‌های numpy برای محاسبه معیارها
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # محاسبه معیارهای ارزیابی
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(f'Accuracy: {accuracy * 100:.2f}%')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1 Score: {f1:.2f}')

    return accuracy, precision, recall, f1

# استفاده از کد ارزیابی
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# فرض کنید که test_loader در دسترس باشد
evaluate_triplet_model(model, test_loader, device)
