In this notebook, we are loading an existing model and features saved from unlabeled data from a single folder. Using these features we let the model make predictions for each image. Finally, we save the predictions to csv and json files.

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame as df
import tensorflow as tf

2024-06-11 15:12:11.274518: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-11 15:12:11.274577: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-11 15:12:11.275584: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-11 15:12:11.280637: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Load validation set in tensorflow format

In [2]:
def load_dataset(path):
    data = pd.read_pickle(path)
    n_features = data['clip_features'][0].shape[-1]
    class_names = data['y'].unique()
    y = np.array([np.where(class_names == e)[0][0] for e in data['y']])
    x = np.concatenate(data['clip_features'].to_numpy())
    print(f'Loaded {len(x)} instances from {path}.')
    print(f'X shape {x.shape} and y shape {y.shape} with labels:\n{data["y"].value_counts()}.')
    train_ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(16)
    return train_ds, class_names, n_features, data['paths']

path = '../new_data_2023/ai/predictions.pk'
validation_ds, class_names, n_features, paths = load_dataset(path)

Loaded 22464 instances from ../new_data_2023/ai/predictions.pk.
X shape (22464, 512) and y shape (22464,) with labels:
y
Complex AI    22464
Name: count, dtype: int64.


2024-06-11 15:12:14.596387: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-11 15:12:14.634468: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-11 15:12:14.634543: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-11 15:12:14.640124: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-11 15:12:14.640190: I external/local_xla/xla/stream_executor

In [3]:
pd.read_pickle(path)

Unnamed: 0,paths,y,clip_features
0,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.0018835, 0.001931, 0.001597, 0.001449, 0.0..."
1,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.001838, 0.002176, 0.001371, 0.001315, 0.00..."
2,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.00225, 0.001859, 0.0011425, 0.00102, 0.002..."
3,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.002016, 0.001717, 0.001439, 0.00218, 0.001..."
4,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.00253, 0.00294, 0.002, 0.002037, 0.00188, ..."
...,...,...,...
22459,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.00161, 0.0015, 0.001351, 0.001371, 0.00240..."
22460,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.002205, 0.002024, 0.001784, 0.00219, 0.002..."
22461,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.001947, 0.001683, 0.001744, 0.002134, 0.00..."
22462,../new_data_2023/ai/images_by_class/validation...,Complex AI,"[[0.002428, 0.001525, 0.002174, 0.002693, 0.00..."


In [4]:
from pathlib import Path
p = Path(r"../robot_or_brain_enriched_and_combined_data/images_by_class/validation")
class_list = [d.name for d in p.iterdir()]

In [5]:
class_list

['Acting or Performing machine',
 'Collaborative or Interactive AI',
 'Complex AI',
 'Learning or recognition machine',
 'Mysterious AI',
 'None of the above',
 'Superior human',
 'Thinking machine']

In [6]:
trues = [class_list[int(y)] for _x, y in validation_ds.unbatch()]

### Print class counts for each split

In [7]:
df([y for y in trues]).value_counts()

Acting or Performing machine    22464
Name: count, dtype: int64

Some of these counts are horrible. The number of examples in 'Learning or recognition machine', 'Superior human' and 'Mysterious AI' are really too small. Also, the 'None of the above' class is very large, although that's not really a problem.

### Load our trained model

In [8]:
import tensorflow as tf

# model = tf.keras.models.load_model('./fine_tuned_model_3m6herki')
# model = tf.keras.models.load_model('./fine_tuned_model_3f0vzk68')
# model = tf.keras.models.load_model('./fine_tuned_model_qdesgan9')
# model = tf.keras.models.load_model('./clip_features_model_kuw3ehqp')
model = tf.keras.models.load_model('./clip_features_model_kf5cnvvi')

### Make predictions

In [9]:
logits = model.predict(validation_ds)
predicted = [class_list[v] for v in np.argmax(logits, 1)]
from pandas import DataFrame as df

print(df(predicted).value_counts())
print(df(trues).value_counts())

  83/1404 [>.............................] - ETA: 1s

2024-06-11 15:12:18.633616: W external/local_xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc:504] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  ./cuda_sdk_lib
  /usr/local/cuda-12.2
  /usr/local/cuda
  /home/christiaan/anaconda3/envs/robot311/lib/python3.11/site-packages/tensorflow/python/platform/../../../nvidia/cuda_nvcc
  /home/christiaan/anaconda3/envs/robot311/lib/python3.11/site-packages/tensorflow/python/platform/../../../../nvidia/cuda_nvcc
  .
You can choose the search directory by setting xla_gpu_cuda_data_dir in HloModule's DebugOptions.  For most apps, setting the environment variable XLA_FLAGS=--xla_gpu_cuda_data_dir=/path/to/cuda will work.


None of the above                  8558
Acting or Performing machine       4414
Thinking machine                   3845
Complex AI                         2819
Collaborative or Interactive AI    1734
Superior human                      829
Mysterious AI                       265
Name: count, dtype: int64
Acting or Performing machine    22464
Name: count, dtype: int64


In [10]:
predicted[:10]

['Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Collaborative or Interactive AI',
 'None of the above',
 'Thinking machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine']

In [11]:
trues[:10]

['Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine',
 'Acting or Performing machine']

In [12]:
paths[:10]

0    ../new_data_2023/ai/images_by_class/validation...
1    ../new_data_2023/ai/images_by_class/validation...
2    ../new_data_2023/ai/images_by_class/validation...
3    ../new_data_2023/ai/images_by_class/validation...
4    ../new_data_2023/ai/images_by_class/validation...
5    ../new_data_2023/ai/images_by_class/validation...
6    ../new_data_2023/ai/images_by_class/validation...
7    ../new_data_2023/ai/images_by_class/validation...
8    ../new_data_2023/ai/images_by_class/validation...
9    ../new_data_2023/ai/images_by_class/validation...
Name: paths, dtype: object

In [13]:
df_predicted = df(predicted).reset_index()
df_trues = df(trues).reset_index()

In [14]:
df_predicted = df_predicted.rename(columns={0:'predictions'})
df_trues = df_trues.rename(columns={0:'folder_validation'})

In [15]:
results = []

In [16]:
for prediction, path in zip(predicted, paths):
    r = {}
    r['prediction'] = prediction
    r['path'] = path
    r['filename'] = path.name
    results.append(r)

In [17]:
results = pd.DataFrame(results)

In [18]:
from IPython.display import display
from ipywidgets import widgets, HBox, VBox, Layout


hboxes = []
for class_name in class_list:    
    label = widgets.Label(class_name, layout=Layout(width='220px'))
    
    image_paths = [str(r[1]['path']) for r in list(results.iterrows()) if r[1]['prediction'] == class_name]
    images = [widgets.Image(value=open(p, 'rb').read(), layout=Layout(width='120px')) for p in image_paths[:5]]
    # images=[widgets.Label(t,layout=Layout(width='220px')) for t in image_paths[:10]]
    hbox = HBox([label] + images, layout=Layout(height='90px'))
    hboxes += [hbox]
vbox = VBox(hboxes)
display(vbox)

VBox(children=(HBox(children=(Label(value='Acting or Performing machine', layout=Layout(width='220px')), Image…

In [19]:
results.to_csv('predictions_imago_chris.csv.new', index=False)
results.to_pickle('predictions_imago_chris.pkl.new')
