### Object Material Type (OMT) Classifier, based on OpenAI's CLIP Model
Source: https://openai.com/research/clip

In [1]:
from model_functions import *

# Load dataset
df_dataset = load_from_pickle(dataset_file)

# Take 10% of stratified samples for zero-shot classification testing
_x_train, zs_x, _y_train, zs_y = train_test_split(df_dataset['File'], df_dataset['Material Class'], test_size=0.01, stratify=df_dataset['Material Class'], random_state=9876)
_x_train, zs_x_test, _y_train, zs_y_test = train_test_split(df_dataset['File'], df_dataset['Material Class'], test_size=0.01, stratify=df_dataset['Material Class'], random_state=5678)

# 80-20 Train-Test split
x_train, x_test, y_train, y_test = train_test_split(df_dataset['File'], df_dataset['Material Class'], test_size=0.2, stratify=df_dataset['Material Class'], random_state=1234)

In [2]:
train_features, train_classes = get_clip_features(zs_x, zs_y)
test_features, test_classes = get_clip_features(zs_x_test, zs_y_test)

100%|██████████| 1/1 [00:54<00:00, 54.42s/it]
100%|██████████| 1/1 [00:55<00:00, 55.35s/it]


In [6]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1, multi_class='multinomial')
classifier.fit(train_features, zs_y)

predictions = classifier.predict(test_features)
accuracy = np.mean((zs_y_test == predictions).astype(float)) * 100
print(f"Accuracy = {accuracy:.3f}")

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =         3845     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.60944D+00    |proj g|=  1.59483D+00

At iterate   50    f=  3.65345D-01    |proj g|=  9.89559D-03

At iterate  100    f=  3.63552D-01    |proj g|=  8.57894D-04


 This problem is unconstrained.



At iterate  150    f=  3.62902D-01    |proj g|=  1.37224D-02

At iterate  200    f=  3.62159D-01    |proj g|=  6.30518D-04

At iterate  250    f=  3.62138D-01    |proj g|=  6.99656D-04

At iterate  300    f=  3.62112D-01    |proj g|=  2.24169D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
 3845    304    325      1     0     0   9.460D-05   3.621D-01
  F =  0.36211051056947430     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
Accuracy = 20.576


In [None]:
""" Initial Performance """
# Get model specifications
input_resolution = model.visual.input_resolution
context_length = model.context_length
vocab_size = model.vocab_size

print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size, end='\n\n')

# Get model performance
results = multi_class_metrics(list(zs_y), zs_y_pred)
accuracy = results['accuracy']
precision = results['precision']
recall = results['recall']
f1 = results['f1']
mcc = results['mcc']
kappa = results['kappa']
hamming_loss_val = results['hamming_loss_val']
cm = results['cm']
class_report = results['class_report']

# Print results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Matthews Correlation Coefficient (MCC):", mcc)
print("Cohen's Kappa:", kappa)
print("Hamming Loss:", hamming_loss_val, end='\n\n')
print("Confusion Matrix:\n", cm, end="\n\n")
print("Classification Report:\n", class_report, end="\n\n\n")