In [1]:
import torch
from torchvision import datasets, transforms
import clip
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.pyplot as plt

# Define constants
DATA_DIR = "your_data_folder"
IMAGE_SIZE = 224
NUM_CLASSES = 2  # Cats and Dogs
TOTAL_IMAGES = 1000

# Define data subsets
TRAIN_SIZE = 800
TEST_SIZE = 200

# Data augmentation
train_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.225, 0.225]),
])
test_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.225, 0.225]),
])

# Efficient data loading
dataset = datasets.ImageFolder(DATA_DIR, transform=train_transforms)
train_data, test_data = torch.utils.data.random_split(dataset, (TRAIN_SIZE, TEST_SIZE))

# Load CLIP model
model = clip.load("ViT-Base-Patch32").cuda()

# Extract CLIP embeddings
def extract_embeddings(data, model):
    with torch.no_grad():
        embeddings = []
        for image, _ in data:
            image = image.cuda()
            embedding = model.encode_image(image.unsqueeze(0))
            embeddings.append(embedding.detach().cpu())
        return torch.cat(embeddings, dim=0)

train_embeddings = extract_embeddings(train_data, model)
test_embeddings = extract_embeddings(test_data, model)

# Train Fisher algorithm
lda = LinearDiscriminantAnalysis()
lda.fit(train_embeddings, train_data.targets)

# Predict
predictions = lda.predict(test_embeddings)

# Calculate metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(test_data.targets, predictions)
precision = precision_score(test_data.targets, predictions, average="weighted")
recall = recall_score(test_data.targets, predictions, average="weighted")
f1 = f1_score(test_data.targets, predictions, average="weighted")

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

# Visualize confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(test_data.targets, predictions)
plt.imshow(cm, cmap="Blues")
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.title("Confusion Matrix")
plt.show()


100%|███████████████████████████████████████| 338M/338M [04:42<00:00, 1.25MiB/s]


FileNotFoundError: [Errno 2] No such file or directory: 'dog.138.jpg'