In [14]:
from fastai.vision.all import *
import os
import clip
import torch

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from torch.utils.data import DataLoader
from tqdm import tqdm

from helper import *


# Load the CLIP-Model

In [16]:
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)



In [17]:
# setting path variables 
Path.BASE_PATH = Path('../../MelvinWevers#9512/sample_images')
Path.BASE_PATH.ls()
path = Path.BASE_PATH

In [18]:
data = get_dls(128, 224, augment=False)

In [9]:
# Calculate the image features
train_features, train_labels = get_features(data.train)
test_features, test_labels = get_features(data.valid)


100%|██████████| 49/49 [00:09<00:00,  5.37it/s]
100%|██████████| 13/13 [00:04<00:00,  2.78it/s]


In [10]:
# Perform logistic regression
classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1, n_jobs=-1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.


Accuracy = 71.877


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  2.6min finished


In [10]:
# Param Sweep

C_param_range = [0.5, 0.55, 0.575, 0.6, 0.65, 0.7]

acc_Table = pd.DataFrame(columns = ['C_parameter','Accuracy'])
acc_Table['C_parameter'] = C_param_range

In [11]:
j = 0 
for i in C_param_range:
    print(i)
    classifier = LogisticRegression(random_state=0, C = i, max_iter=1000, n_jobs=-1)
    classifier.fit(train_features, train_labels)
   
    predictions = classifier.predict(test_features)
    
    acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.
    
    j += 1


0.5


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.


0.55


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.


0.575


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.


0.6


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.


0.65


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.


0.7


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  acc_Table.iloc[j, 1] = np.mean((test_labels == predictions).astype(np.float)) * 100.


In [12]:
acc_Table

Unnamed: 0,C_parameter,Accuracy
0,0.5,70.558376
1,0.55,70.621827
2,0.575,70.621827
3,0.6,70.558376
4,0.65,70.431472
5,0.7,70.431472


In [35]:
# Perform logistic regression
classifier = LogisticRegression(random_state=0, C=0.575, max_iter=1000, verbose=0, n_jobs=-1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

Accuracy = 70.622


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.


In [None]:
# Save Model

In [36]:
filename = 'linear_probe_model.sav'
pickle.dump(classifier, open(filename, 'wb'))
 
# load the model from disk
# loaded_model = pickle.load(open(filename, 'rb'))
# result = loaded_model.score(X_test, Y_test)
# print(result)

In [38]:
test_labels_ = [data.vocab[x] for x in test_labels]
predictions_ = [data.vocab[x] for x in predictions]

In [None]:
print(classification_report(test_labels_, predictions_))

In [45]:
## Calculate top-5 accuracy
correct = []
pred_5 = [] 

top5 = 0.0 

probs = classifier.predict_proba(test_features)
best_n = np.argsort(probs, axis=1)[:,-5:]

for i, preds in enumerate(best_n):
    if test_labels[i] in best_n[i]:
        top5 += 1.0

    
print("top5 acc", top5/len(best_n))   

top5 acc 0.9302030456852792


## Predict a single image

In [23]:
img_features = get_single_img_features('../../MelvinWevers#9512/DeBoer_Train/auto_ongeluk/NL-HlmNHA_1478_08130_G.jpg')

In [24]:
predictions = classifier.predict(img_features)

In [107]:
probs = classifier.predict_proba(img_features)
best_n = np.argsort(probs, axis=1)[:,-5:]
probabilities = np.round(np.sort(probs, axis=1)[:,-5:], 4)
#test_labels[i] in best_n[i]:

In [110]:
probabilities = np.round(np.sort(probs, axis=1)[:,-5:], 3)

In [108]:
labels = [data.vocab[i] for i in best_n]

In [136]:
output = {}
for i, _ in enumerate(probabilities[0]):
    print(i)
    output[labels[0][i]] = probabilities[0][i]

0
1
2
3
4
