In [4]:
from torch import nn
import sys
sys.path.insert(0,'f:/Meysam-Khodarahi/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/src/')
import numpy as np
from sklearn.manifold import TSNE
import matplotlib as mpl
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision import transforms
from torch.autograd import Variable
import os
import pandas as pd
import seaborn as sns
from dataloaders import get_train_transforms, get_val_transforms, get_triplet_dataloader
from transformers import ViTForImageClassification, ViTFeatureExtractor
import torch
from sklearn.metrics import accuracy_score, f1_score , precision_score , recall_score
from sklearn.manifold import TSNE
import torch
import torch.nn as nn
import torchvision

path_data = 'f:/Meysam-Khodarahi/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/src/dataset'
device = torch.cuda.is_available()

def generate_embeddings(data_loader, model):
    with torch.no_grad():
        #device = 'cuda'
        model.eval()
        #model.to(device)
        embeddings = []
        labels = []
        for batch_imgs, batch_labels in data_loader:
            if device:
                batch_imgs = batch_imgs.cuda()
            batch_E = model.get_embedding(batch_imgs)
            embeddings.append(batch_E.cpu().numpy())
            labels.append(batch_labels.numpy())
    return np.concatenate(embeddings), np.concatenate(labels)

##############################

class PatchExtractor(nn.Module):
    def __init__(self, patch_size=14):
        super(PatchExtractor, self).__init__()
        self.patch_size = patch_size

    def forward(self, x):
        """
        تصویر ورودی را به پچ های 14x14 تقسیم می کند.
        ورودی: x با ابعاد (batch_size, channels, height, width)
        خروجی: پچ ها با ابعاد (batch_size, num_patches_h, num_patches_w, channels, patch_size, patch_size)
        """
        batch_size, channels, height, width = x.shape
        
        # اطمینان از اینکه ابعاد تصویر بر patch_size بخش پذیر است
        assert height % self.patch_size == 0 and width % self.patch_size == 0, \
            "Image dimensions must be divisible by patch_size"
        
        # تعداد پچ ها در هر بعد
        num_patches_h = height // self.patch_size
        num_patches_w = width // self.patch_size
        
        # استفاده از unfold برای تقسیم تصویر به پچ ها
        patches = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size)
        # patches shape: (batch_size, channels, num_patches_h, num_patches_w, patch_size, patch_size)
        
        # تغییر شکل پچ ها به (batch_size, num_patches_h, num_patches_w, channels, patch_size, patch_size)
        patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
        
        return patches

def generate_embeddings(data_loader, patch_extractor, model):
    """
    تصاویر را به پچ های 14x14 تقسیم می کند و هر پچ را به امبدینگ تبدیل می کند.
    سپس امبدینگ ها را در یک ماتریس به همان ترتیب مکانی قرار می دهد.
    """
    patch_extractor.eval()
    model.eval()
    all_embeddings = []
    all_labels = []

    with torch.no_grad():
        for batch_imgs, batch_labels in data_loader:
            if torch.cuda.is_available():
                batch_imgs = batch_imgs.cuda()
            # استخراج پچ ها
            patches = patch_extractor(batch_imgs)  # (batch_size, num_patches_h, num_patches_w, channels, patch_size, patch_size)
            batch_size, num_patches_h, num_patches_w, channels, _, _ = patches.shape

            # تبدیل هر پچ به امبدینگ
            patch_embeddings = []
            for i in range(batch_size):
                single_image_patches = patches[i]  # (num_patches_h, num_patches_w, channels, patch_size, patch_size)
                single_image_embeddings = []
                for h in range(num_patches_h):
                    for w in range(num_patches_w):
                        # هر پچ را به model.get_embedding می دهیم
                        patch = single_image_patches[h, w]  # (channels, patch_size, patch_size)
                        patch = patch.unsqueeze(0)  # (1, channels, patch_size, patch_size)
                        embedding = model.get_embedding(patch)  # (1, embedding_dim)
                        single_image_embeddings.append(embedding.squeeze(0))  # (embedding_dim)
                
                # تبدیل لیست امبدینگ ها به یک ماتریس
                single_image_embeddings = torch.stack(single_image_embeddings, dim=0)  # (num_patches_h * num_patches_w, embedding_dim)
                single_image_embeddings = single_image_embeddings.view(num_patches_h, num_patches_w, -1)  # (num_patches_h, num_patches_w, embedding_dim)
                patch_embeddings.append(single_image_embeddings)
            
            # جمع آوری امبدینگ ها برای کل بچ
            patch_embeddings = torch.stack(patch_embeddings, dim=0)  # (batch_size, num_patches_h, num_patches_w, embedding_dim)
            all_embeddings.append(patch_embeddings.cpu())
            all_labels.append(batch_labels)

    # Concatenate all embeddings and labels
    all_embeddings = torch.cat(all_embeddings, dim=0)  # (total_images, num_patches_h, num_patches_w, embedding_dim)
    all_labels = torch.cat(all_labels, dim=0)  # (total_images)
    return all_embeddings, all_labels

# Initialize models
# if device:
#patch_extractor = PatchExtractor(patch_size=14).cuda()
patch_extractor = PatchExtractor(patch_size=14)   
# Load your data
train_data = torchvision.datasets.ImageFolder(root=path_data + '/train/', transform=get_val_transforms())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
  
siamese_model = torch.load("C:/Users/Mey/Documents/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/siamese_model_EmbeddingGEnerator.h5",map_location=torch.device('cuda'))
siamese_model.eval()
# Generate embeddings
train_embeddings, train_labels = generate_embeddings(train_loader, patch_extractor, siamese_model)

# Now train_embeddings is a tensor of shape (total_images, num_patches_h, num_patches_w, embedding_dim)

 
print("siamese_model")
   
''' 
class RefinedViT(nn.Module):
    def __init__(self, original_vit_model,embedding_dim,  num_classes):
        super(RefinedViT, self).__init__()
        # حفظ لایه های اصلی ViT
        self.num_patches = (224 // 16) * (224 // 16)  # برای تصویر 224x224 با پچ 16x16
        self.embed_dim = 768  # سایز استاندارد برای vit-base
        
        # تبدیل embedding به پچ ها
        self.embedding_to_patch = nn.Linear(1792, self.num_patches * self.embed_dim)
       # self.embedding_to_patch = nn.Linear(1280, self.num_patches * self.embed_dim)
        #self.embedding_to_patch = nn.Linear(embedding_dim,  self.num_patches * self.embed_dim)
        
        # Position Embedding
        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_patches + 1, self.embed_dim))
        
        # CLS Token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim))
        
        # Transformer Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=12,  # تعداد heads در self-attention
            dim_feedforward=3072,
            dropout=0.1,
            activation='gelu',
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=12)
        
        # لایه های نهایی
        self.norm = nn.LayerNorm(self.embed_dim)
        self.fc = nn.Linear(self.embed_dim, num_classes)
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        batch_size = x.size(0)
        
        # تبدیل embedding به پچ ها
        x = self.embedding_to_patch(x)  # (batch_size, num_patches * embed_dim)
        x = x.view(batch_size, self.num_patches, self.embed_dim)  # (batch_size, num_patches, embed_dim)
       
        # اضافه کردن CLS token
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)  # (batch_size, num_patches + 1, embed_dim)
        
        # اضافه کردن position embedding
        x = x + self.pos_embed
        
        # Transformer Encoder
        x = self.transformer_encoder(x)
        
        # استفاده از CLS token برای طبقه بندی
        x = x[:, 0]  # فقط CLS token
        x = self.norm(x)
        x = self.dropout(x)
        x = self.fc(x)
        
        return x  
    def get_embedding(self, x):
        """
        دریافت embedding برای استفاده در t-SNE
        """
        batch_size = x.size(0)
        
        # تغییر شکل ورودی
        if len(x.shape) == 4:
            x = x.view(batch_size, -1)  # تبدیل به بردار یک بعدی
        
        # تغییر ابعاد برای تطابق با عملیات ضرب ماتریسی
        x = x.reshape(batch_size, 1792, -1)  # تقسیم 150528 به 1792 و باقیمانده
        x = x.mean(dim=-1)  # میانگین گیری روی بعد آخر
        
        # حالا x دارای ابعاد (batch_size, 1792) است
        x = self.embedding_to_patch(x)  # عملیات خطی
        x = x.view(batch_size, self.num_patches, self.embed_dim)
        
        # اضافه کردن CLS token
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        
        # اضافه کردن position embedding
        x = x + self.pos_embed
        
        # Transformer Encoder
        x = self.transformer_encoder(x)
        
        # استفاده از CLS token
        x = x[:, 0]
        x = self.norm(x)
        
        return x
'''
 

class RefinedViT(nn.Module):
    def __init__(self, original_vit_model,embedding_dim,  num_classes):
        super(RefinedViT, self).__init__()    
        # تعداد پچ‌ها برای تصویر 224x224 با پچ 14x14
        self.num_patches = (224 // 14) * (224 // 14)  # 256 پچ
        self.embed_dim = embedding_dim  # ابعاد امبدینگ هر پچ
        
        # Position Embedding
        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_patches + 1, self.embed_dim))
        
        # CLS Token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim))
        
        # Transformer Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=12,  # تعداد heads در self-attention
            dim_feedforward=3072,
            dropout=0.1,
            activation='gelu',
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=12)
        
        # لایه‌های نهایی
        self.norm = nn.LayerNorm(self.embed_dim)
        self.fc = nn.Linear(self.embed_dim, num_classes)
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        """
        ورودی: x با ابعاد (batch_size, num_patches_h, num_patches_w, embed_dim)
        """
        batch_size, num_patches_h, num_patches_w, embed_dim = x.shape
        
        # تغییر شکل x به (batch_size, num_patches, embed_dim)
        x = x.view(batch_size, -1, embed_dim)  # (batch_size, num_patches_h * num_patches_w, embed_dim)
        
        # اضافه کردن CLS token
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # (batch_size, 1, embed_dim)
        x = torch.cat((cls_tokens, x), dim=1)  # (batch_size, num_patches + 1, embed_dim)
        
        # اضافه کردن position embedding
        x = x + self.pos_embed
        
        # Transformer Encoder
        x = self.transformer_encoder(x)
        
        # استفاده از CLS token برای طبقه‌بندی
        x = x[:, 0]  # فقط CLS token
        x = self.norm(x)
        x = self.dropout(x)
        x = self.fc(x)
        
        return x
    

train_data = torchvision.datasets.ImageFolder(root=path_data + '/train/', transform=get_val_transforms())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32)

test_data = torchvision.datasets.ImageFolder(root=path_data + '/test/', transform=get_val_transforms())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)

# بارگذاری مدل ViT از Hugging Face
vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
vit_model.classifier = torch.nn.Linear(vit_model.config.hidden_size, 15)  # تعداد کلاس ها

embedding_dim = vit_model.config.hidden_size  # ابعاد embedding
#model = RefinedViT(vit_model, embedding_dim, num_classes=15)
model = RefinedViT(vit_model,embedding_dim , num_classes=15).cuda()
print("model")


outputs = model(train_embeddings.to(device))

# استخراج ویژگی ها از داده های آموزش
train_embeddings, train_labels = generate_embeddings(train_loader, siamese_model )
# استخراج ویژگی ها از داده های تست
test_embeddings, test_labels = generate_embeddings(test_loader, siamese_model )
 
# تبدیل داده ها به تنسور
X_train, y_train = train_embeddings, train_labels
X_val, y_val = test_embeddings, test_labels

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# انتقال مدل به دستگاه CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if torch.cuda.is_available():
    model.cuda()

# تنظیمات loss function و optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)  # استفاده از AdamW
# انتقال به GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# تنظیمات آموزش
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000)
print("train")
# آموزش
model.train()
for epoch in range(100):
    optimizer.zero_grad()
    outputs = model(X_train_tensor.to(device))
    loss = criterion(outputs, y_train_tensor.to(device))
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}')
 
# ارزیابی مدل
model.eval()
with torch.no_grad():
    val_outputs = model(X_val_tensor.to(device))
    _, predicted = torch.max(val_outputs.data, 1)
 

# محاسبه دقت و امتیاز F1
accuracy = accuracy_score(y_val_tensor.cpu(), predicted.cpu())
f1 = f1_score(y_val_tensor.cpu(), predicted.cpu(), average='weighted')
precision = precision_score(y_val_tensor.cpu(), predicted.cpu(), average='macro') 
recall = recall_score(y_val_tensor.cpu(), predicted.cpu(), average='macro')

# نمایش نتایج
print(f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}')
print(f'Precision Score: {precision}')
print(f'Recall Score: {recall}')
  

  siamese_model = torch.load("C:/Users/Mey/Documents/PlantDiseaseDiagnosisFewShotLearning/siamese_triplet_net/siamese_model_EmbeddingGEnerator.h5",map_location=torch.device('cuda'))


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.