<a href="https://colab.research.google.com/github/nannthd/project_AIEngineer/blob/main/TestCLIPModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip '/content/drive/MyDrive/drug/data 50 class add_augment3.zip'

#image + text เทียบimage

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CLIP model and processor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Load the CSV file containing the vectors
csv_file_path = '/content/drive/MyDrive/drug/FineTune/CLIPModel/class_vectors.csv'
df = pd.read_csv(csv_file_path)

# Extract the text labels and embeddings from the CSV
text_labels = df['TextLabel'].tolist()
image_embeddings = df[[col for col in df.columns if 'ImageEmbed_' in col]].values
text_embeddings = df[[col for col in df.columns if 'TextEmbed_' in col]].values

# Convert embeddings from lists to numpy arrays
image_embeddings = np.array(image_embeddings)
text_embeddings = np.array(text_embeddings)

def compare_image_with_stored_vectors(new_image_path):
    # Open the new image
    image = Image.open(new_image_path)

    # Prepare the inputs for the CLIP model
    inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Forward pass through the model
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the image embedding for the new image
    new_image_embedding = outputs.image_embeds.squeeze().cpu().numpy().reshape(1, -1)  # Ensure 2D array

    # Compute cosine similarity between new image embedding and stored image embeddings
    image_similarities = cosine_similarity(new_image_embedding, image_embeddings)
    text_similarities = cosine_similarity(new_image_embedding, text_embeddings)

    # Combine similarities (weighted average can be adjusted if needed)
    combined_similarities = (image_similarities + text_similarities) / 2

    # Get index of the most similar class
    most_similar_index = np.argmax(combined_similarities, axis=1)[0]

    # Return the predicted label
    predicted_label = text_labels[most_similar_index]
    return predicted_label

# Process all images in the base folder
base_folder = '/content/drive/MyDrive/drug/CorpTest'  # Base folder containing images

# Initialize lists to store results
predicted_labels = []
true_labels = []

for image_file in os.listdir(base_folder):
    image_path = os.path.join(base_folder, image_file)
    if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):  # Check for image files
        # Extract true class from the image file name (e.g., 'ClassName_1.jpg')
        true_label = os.path.splitext(image_file)[0]

        print(f"\nProcessing image: {image_path}")
        predicted_label = compare_image_with_stored_vectors(image_path)

        predicted_labels.append(predicted_label)
        true_labels.append(true_label)

# Create a sorted list of unique class labels
labels = sorted(set(true_labels + predicted_labels))  # All unique class labels, sorted alphabetically

# Create a confusion matrix
confusion_matrix = np.zeros((len(labels), len(labels)), dtype=int)

for true, pred in zip(true_labels, predicted_labels):
    true_index = labels.index(true)
    pred_index = labels.index(pred)
    confusion_matrix[true_index, pred_index] += 1

# Create a heatmap from the confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

#image เทียบtext

In [None]:
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Path to your local image file
image_path = "/content/drive/MyDrive/drug/CorpTest/Caduet5_10mg.jpg"

# Open the image
image = Image.open(image_path)

# Define multiple text labels for comparison
text_labels = [
    'Amlopine10mg', 'Amlopine5mg', 'Anapril5mg', 'Betalol10mg', 'Betalol40mg',
    'Blopress16mg', 'Blopress8mg', 'BlopressPlus16mg', 'BlopressPlus8mg', 'Caduet10_10mg',
    'Caduet5_10mg', 'Daonil5mg', 'DiamicronMR60mg', 'Diovan160mg', 'Diovan80mg',
    'Forxiga10mg', 'Galvus50mg', 'GalvusMet50_1000mg', 'Gliclazide80mg', 'Gliparil2mg',
    'Glucophage500mg', 'Glucophage850mg', 'GlucophageXR1000mg', 'GlucophageXR750mg', 'Glyxambi25_5mg',
    'Janumet50_1000mg', 'Januvia100mg', 'Jardiance10mg', 'Jardiance25mg', 'JardianceDuo12.5_1000mg',
    'Lanzaar100mg', 'Lercadip20mg', 'Madiplot10mg', 'Madiplot20mg', 'MetoprololStada100mg',
    'Micardis40mg', 'MicardisPlus80_12.5mg', 'Minidiab5mg', 'Novonorm1mg', 'Novonorm2mg',
    'Oseni25_15mg', 'Poli_uretic', 'Prenolol100mg', 'Prenolol25mg', 'Prenolol50mg',
    'Tanzaril50mg', 'Utmos15mg', 'Utmos30mg', 'XigduoXR10_1000mg', 'Zanidip10mg'
]

# Prepare the inputs for the CLIP model
inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True)

# Forward pass through the model
outputs = model(**inputs)

# Get the image-text similarity scores
logits_per_image = outputs.logits_per_image

# Compute the probabilities
probs = logits_per_image.softmax(dim=1)

# Get the top 5 similarity scores and their corresponding text labels
top_probs, top_indices = probs.topk(5, dim=1)

# Convert to lists for easier manipulation
top_probs = top_probs.squeeze().tolist()
top_indices = top_indices.squeeze().tolist()

# Print the top 5 similarity scores and their corresponding text labels in descending order
print("Top 5 similarity scores and corresponding text labels:")
for i, idx in enumerate(top_indices):
    print(f"{text_labels[idx]}: {top_probs[i]:.4f}")

Top 5 similarity scores and corresponding text labels:
Caduet5_10mg: 0.2959
GalvusMet50_1000mg: 0.2251
Caduet10_10mg: 0.1914
Madiplot10mg: 0.1615
Madiplot20mg: 0.0378


In [None]:
import os
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Path to your local image folder
image_folder_path = "/content/drive/MyDrive/drug/CorpTest"

# Define multiple text labels for comparison
text_labels = [
    'Amlopine10mg', 'Amlopine5mg', 'Anapril5mg', 'Betalol10mg', 'Betalol40mg',
    'Blopress16mg', 'Blopress8mg', 'BlopressPlus16mg', 'BlopressPlus8mg', 'Caduet10_10mg',
    'Caduet5_10mg', 'Daonil5mg', 'DiamicronMR60mg', 'Diovan160mg', 'Diovan80mg',
    'Forxiga10mg', 'Galvus50mg', 'GalvusMet50_1000mg', 'Gliclazide80mg', 'Gliparil2mg',
    'Glucophage500mg', 'Glucophage850mg', 'GlucophageXR1000mg', 'GlucophageXR750mg', 'Glyxambi25_5mg',
    'Janumet50_1000mg', 'Januvia100mg', 'Jardiance10mg', 'Jardiance25mg', 'JardianceDuo12.5_1000mg',
    'Lanzaar100mg', 'Lercadip20mg', 'Madiplot10mg', 'Madiplot20mg', 'MetoprololStada100mg',
    'Micardis40mg', 'MicardisPlus80_12.5mg', 'Minidiab5mg', 'Novonorm1mg', 'Novonorm2mg',
    'Oseni25_15mg', 'Poli_uretic', 'Prenolol100mg', 'Prenolol25mg', 'Prenolol50mg',
    'Tanzaril50mg', 'Utmos15mg', 'Utmos30mg', 'XigduoXR10_1000mg', 'Zanidip10mg'
]

# Loop through all image files in the folder
for filename in os.listdir(image_folder_path):
    if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp')):  # Check for valid image files
        image_path = os.path.join(image_folder_path, filename)

        # Open the image
        image = Image.open(image_path)

        # Prepare the inputs for the CLIP model
        inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True)

        # Forward pass through the model
        outputs = model(**inputs)

        # Get the image-text similarity scores
        logits_per_image = outputs.logits_per_image

        # Compute the probabilities
        probs = logits_per_image.softmax(dim=1)

        # Get the top 5 similarity scores and their corresponding text labels
        top_probs, top_indices = probs.topk(5, dim=1)

        # Convert to lists for easier manipulation
        top_probs = top_probs.squeeze().tolist()
        top_indices = top_indices.squeeze().tolist()

        # Print the results for this image
        print(f"\nResults for {filename}:")
        for i, idx in enumerate(top_indices):
            print(f"{text_labels[idx]}: {top_probs[i]:.4f}")

In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Path to your image folder
image_folder = "/content/drive/MyDrive/drug/CorpTest"

# Define multiple text labels for comparison
text_labels = [
    'Amlopine10mg', 'Amlopine5mg', 'Anapril5mg', 'Betalol10mg', 'Betalol40mg',
    'Blopress16mg', 'Blopress8mg', 'BlopressPlus16mg', 'BlopressPlus8mg', 'Caduet10_10mg',
    'Caduet5_10mg', 'Daonil5mg', 'DiamicronMR60mg', 'Diovan160mg', 'Diovan80mg',
    'Forxiga10mg', 'Galvus50mg', 'GalvusMet50_1000mg', 'Gliclazide80mg', 'Gliparil2mg',
    'Glucophage500mg', 'Glucophage850mg', 'GlucophageXR1000mg', 'GlucophageXR750mg', 'Glyxambi25_5mg',
    'Janumet50_1000mg', 'Januvia100mg', 'Jardiance10mg', 'Jardiance25mg', 'JardianceDuo12.5_1000mg',
    'Lanzaar100mg', 'Lercadip20mg', 'Madiplot10mg', 'Madiplot20mg', 'MetoprololStada100mg',
    'Micardis40mg', 'MicardisPlus80_12.5mg', 'Minidiab5mg', 'Novonorm1mg', 'Novonorm2mg',
    'Oseni25_15mg', 'Poli_uretic', 'Prenolol100mg', 'Prenolol25mg', 'Prenolol50mg',
    'Tanzaril50mg', 'Utmos15mg', 'Utmos30mg', 'XigduoXR10_1000mg', 'Zanidip10mg'
]

# Initialize lists to store results
predicted_labels = []
true_labels = []

# Initialize confusion matrix
num_classes = len(text_labels)
confusion_matrix = np.zeros((num_classes, num_classes), dtype=int)

# Function to get index of a label
def get_label_index(label):
    return text_labels.index(label)

# Loop through all images in the folder
for image_filename in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_filename)
    if image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        # Open the image
        image = Image.open(image_path)

        # Prepare the inputs for the CLIP model
        inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True)

        # Forward pass through the model
        outputs = model(**inputs)

        # Get the image-text similarity scores
        logits_per_image = outputs.logits_per_image

        # Compute the probabilities
        probs = logits_per_image.softmax(dim=1).squeeze().tolist()

        # Get the predicted label
        predicted_index = np.argmax(probs)
        predicted_label = text_labels[predicted_index]

        # Extract true class from the image file name
        true_label = os.path.splitext(image_filename)[0]

        # Update confusion matrix
        if true_label in text_labels:
            true_index = get_label_index(true_label)
            confusion_matrix[true_index, predicted_index] += 1

        # Store results
        predicted_labels.append(predicted_label)
        true_labels.append(true_label)

# Plotting the heatmap (confusion matrix)
plt.figure(figsize=(12, 10))
sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=text_labels, yticklabels=text_labels)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
import os
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Path to your local image folder
image_folder_path = "/content/drive/MyDrive/drug/CorpTest"

# Define multiple text labels for comparison
text_labels = [
    'Amlopine10mg', 'Amlopine5mg', 'Anapril5mg', 'Betalol10mg', 'Betalol40mg',
    'Blopress16mg', 'Blopress8mg', 'BlopressPlus16mg', 'BlopressPlus8mg', 'Caduet10_10mg',
    'Caduet5_10mg', 'Daonil5mg', 'DiamicronMR60mg', 'Diovan160mg', 'Diovan80mg',
    'Forxiga10mg', 'Galvus50mg', 'GalvusMet50_1000mg', 'Gliclazide80mg', 'Gliparil2mg',
    'Glucophage500mg', 'Glucophage850mg', 'GlucophageXR1000mg', 'GlucophageXR750mg', 'Glyxambi25_5mg',
    'Janumet50_1000mg', 'Januvia100mg', 'Jardiance10mg', 'Jardiance25mg', 'JardianceDuo12.5_1000mg',
    'Lanzaar100mg', 'Lercadip20mg', 'Madiplot10mg', 'Madiplot20mg', 'MetoprololStada100mg',
    'Micardis40mg', 'MicardisPlus80_12.5mg', 'Minidiab5mg', 'Novonorm1mg', 'Novonorm2mg',
    'Oseni25_15mg', 'Poli_uretic', 'Prenolol100mg', 'Prenolol25mg', 'Prenolol50mg',
    'Tanzaril50mg', 'Utmos15mg', 'Utmos30mg', 'XigduoXR10_1000mg', 'Zanidip10mg'
]

# Initialize counters for correct and incorrect predictions
correct_predictions = 0
incorrect_predictions = 0

# Loop through all image files in the folder
for filename in os.listdir(image_folder_path):
    if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp')):  # Check for valid image files
        # Get the correct class label from the filename (remove file extension)
        correct_label = os.path.splitext(filename)[0]

        image_path = os.path.join(image_folder_path, filename)

        # Open the image
        image = Image.open(image_path)

        # Prepare the inputs for the CLIP model
        inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True)

        # Forward pass through the model
        outputs = model(**inputs)

        # Get the image-text similarity scores
        logits_per_image = outputs.logits_per_image

        # Compute the probabilities
        probs = logits_per_image.softmax(dim=1)

        # Get the top 1 predicted label
        top_prob, top_index = probs.topk(1, dim=1)
        predicted_label = text_labels[top_index.item()]

        # Compare the predicted label with the correct label
        if predicted_label == correct_label:
            correct_predictions += 1
        else:
            incorrect_predictions += 1
            print(f"Incorrect prediction for {filename}: Predicted {predicted_label} instead of {correct_label}")

# Print the total number of correct and incorrect predictions
print(f"\nTotal correct predictions: {correct_predictions}")
print(f"Total incorrect predictions: {incorrect_predictions}")

#image เทียบimage

In [None]:
import os
import torch
import pandas as pd
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from datetime import datetime

# Check if GPU is available and use it
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Path to the main directory containing 50 subfolders
main_directory = "/content/data 50 class add_augment"

# Prepare lists to store rows for CSV
rows = []

# Start timer for the whole process
start_time = datetime.now()

# Collect images and convert them to vectors
for folder_name in os.listdir(main_directory):
    folder_path = os.path.join(main_directory, folder_name)
    if os.path.isdir(folder_path):

        # Start timer for the current folder
        folder_start_time = datetime.now()

        # Collect all images from the current folder
        image_files = [os.path.join(folder_path, image_name) for image_name in os.listdir(folder_path)]

        for image_path in image_files:
            # Open the image
            image = Image.open(image_path)

            # Prepare the inputs for the CLIP model
            inputs = processor(text=[folder_name], images=image, return_tensors="pt", padding=True)

            # Move inputs to the GPU
            inputs = {k: v.to(device) for k, v in inputs.items()}

            # Forward pass through the model
            with torch.no_grad():
                outputs = model(**inputs)

            # Get the image embedding
            image_embedding = outputs.image_embeds.squeeze().cpu().tolist()

            # Create a row for the CSV
            row = [folder_name] + image_embedding
            rows.append(row)

        # End timer for the current folder
        folder_end_time = datetime.now()
        folder_duration = folder_end_time - folder_start_time
        print(f"Converted vectors for folder: {folder_name} in {folder_duration}")

# End timer for the whole process
end_time = datetime.now()
total_duration = end_time - start_time

# Define the header for the CSV file
header = ['TextLabel'] + [f'ImageEmbed_{i}' for i in range(len(image_embedding))]

# Write the data to a CSV file
csv_file_path = '/content/drive/MyDrive/drug/FineTune/CLIPModel_new.csv'
df = pd.DataFrame(rows, columns=header)
df.to_csv(csv_file_path, index=False)

print(f"Vectors have been successfully saved to: {csv_file_path}")
print(f"Total time for processing all folders: {total_duration}")

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CLIP model and processor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Load the CSV file containing the vectors
csv_file_path = '/content/drive/MyDrive/drug/FineTune/CLIPModel_new.csv'
df = pd.read_csv(csv_file_path)

# Extract the text labels and image embeddings from the CSV
text_labels = df['TextLabel'].tolist()
image_embeddings = df[[col for col in df.columns if 'ImageEmbed_' in col]].values

# Convert embeddings from lists to numpy arrays
image_embeddings = np.array(image_embeddings)

def compare_image_with_stored_vectors(new_image_path):
    # Open the new image
    image = Image.open(new_image_path)

    # Prepare the inputs for the CLIP model
    inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Forward pass through the model
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the image embedding for the new image
    new_image_embedding = outputs.image_embeds.squeeze().cpu().numpy().reshape(1, -1)  # Ensure 2D array

    # Compute cosine similarity between new image embedding and stored image embeddings
    image_similarities = cosine_similarity(new_image_embedding, image_embeddings)

    # Get index of the most similar class
    most_similar_index = np.argmax(image_similarities, axis=1)[0]

    # Return the predicted label
    predicted_label = text_labels[most_similar_index]
    return predicted_label

# Process all images in the base folder
base_folder = '/content/drive/MyDrive/drug/CorpTest'  # Base folder containing images

# Initialize lists to store results
predicted_labels = []
true_labels = []

for image_file in os.listdir(base_folder):
    image_path = os.path.join(base_folder, image_file)
    if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):  # Check for image files
        # Extract true class from the image file name (e.g., 'ClassName_1.jpg')
        true_label = os.path.splitext(image_file)[0]

        print(f"\nProcessing image: {image_path}")
        predicted_label = compare_image_with_stored_vectors(image_path)

        predicted_labels.append(predicted_label)
        true_labels.append(true_label)

# Create a sorted list of unique class labels
labels = sorted(set(true_labels + predicted_labels))  # All unique class labels, sorted alphabetically

# Create a confusion matrix
confusion_matrix = np.zeros((len(labels), len(labels)), dtype=int)

for true, pred in zip(true_labels, predicted_labels):
    true_index = labels.index(true)
    pred_index = labels.index(pred)
    confusion_matrix[true_index, pred_index] += 1

# Create a heatmap from the confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()