In [1]:
import torch
import clip
from PIL import Image
import os
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [10]:
def load_dataset(dataset_path):
    features = []
    labels = []
    class_names = ['my_wallet', 'friend_wallet', 'purse', 'rao_wallet', 'haris_wallet', 'talha_wallet']  
    
    for label, class_name in enumerate(class_names): 
        class_path = os.path.join(dataset_path, class_name)
        for img_file in os.listdir(class_path):
            image_path = os.path.join(class_path, img_file)
            image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
            with torch.no_grad():
                image_features = model.encode_image(image).cpu().numpy()
            features.append(image_features)
            labels.append(label)
            print(label, image_features)  
        print(labels)
    
    return np.vstack(features), np.array(labels)


In [11]:
dataset_path = "dataset"
features, labels = load_dataset(dataset_path)

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

0 [[-2.45598540e-01  6.93276525e-02  2.34535355e-02  1.61882892e-01
  -4.72270668e-01  2.37853080e-01 -1.41398191e-01  4.92222130e-01
   1.19735673e-01  3.51587385e-01  1.72037140e-01 -2.04262465e-01
   2.62827873e-01 -5.11537313e-01  4.85804290e-01 -3.08016390e-01
   1.36781931e-01 -2.87849665e-01  1.35227069e-01 -2.14656174e-01
   4.98556614e-01  3.69825512e-01  1.02377608e-01 -1.53810084e-01
   1.01756260e-01  5.90859801e-02 -6.87343776e-01 -1.01022810e-01
   8.86802226e-02  2.76106656e-01  1.37097523e-01 -7.42965937e-03
   1.28477991e-01 -3.40087324e-01  2.12163925e-01  1.02125928e-01
  -3.90837610e-01 -1.77277043e-01  1.14384450e-01  8.79205287e-01
  -6.76383913e-01 -3.24727982e-01 -6.18025362e-02 -1.45381287e-01
   1.96637183e-01  8.14697504e-01  8.04401875e-01  1.92989051e-01
   1.35609314e-01  3.98137212e-01  5.12670219e-01 -3.55686337e-01
   5.71641862e-01 -3.13936532e-01  4.36142087e-02  3.54911387e-01
   5.72075009e-01  3.30522656e-02 -1.11467928e-01 -1.31011203e-01
  -1.115

In [12]:
clf = LogisticRegression(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 100.00%


In [13]:
def classify(image_path):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image).cpu().numpy()
    
    prediction = clf.predict(image_features)
    probabilities = clf.predict_proba(image_features)[0]
    # predicted_class = "my_wallet" if prediction[0] == 0 else "friend_wallet" if prediction[0] == 1 else "purse"
    # predicted_class = "my_wallet" if prediction[0] == 0 else "friend_wallet" if prediction[0] == 1 else "purse" if prediction[0] == 2 else "rao_wallet"
    predicted_class = "my_wallet" if prediction[0] == 0 else "friend_wallet" if prediction[0] == 1 else "purse" if prediction[0] == 2 else "rao_wallet" if prediction[0] == 3 else "haris_wallet" if prediction[0] == 4 else "talha_wallet"
    return predicted_class, probabilities


In [16]:
test_image_path = "WhatsApp Image 2025-02-18 at 14.01.34_1e94cb9a.jpg"


prediction = classify(test_image_path)
print(f"Predicted class: {prediction}")

Predicted class: ('haris_wallet', array([0.0557821 , 0.02584508, 0.01360371, 0.11498404, 0.69678059,
       0.09300449]))
