In [63]:
%load_ext autoreload
%autoreload 2
import os

import pandas as pd
import tensorflow as tf

from nn.utils import load_labels_and_features

project_dir = '/home/rp218/luke-for-roko'
large_data_dir = f'{project_dir}/Thesis_Data'
path = os.path.join(project_dir, "Extracted_Concepts/final_dict_new_codex.pkl")
codex_output = pd.read_pickle(path)
labels_keys = ['id', 'label', 'concepts']
labels_dict = {key: codex_output[key] for key in labels_keys}
labels_df_filtered = pd.DataFrame.from_dict(labels_dict)

FEATURE_EXTRACTOR = 'Resnet50V2'

features_dir = f'{large_data_dir}/Feature_vectors_{FEATURE_EXTRACTOR}'
print(features_dir)

X, labels = load_labels_and_features(labels_df_filtered, features_dir)
X_test0 = X[1700:, :, :]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/home/rp218/luke-for-roko/Thesis_Data/Feature_vectors_Resnet50V2
(1919, 360, 2048)
                id label                                           concepts
0     SP7Y6KCFF2TD   out  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1     LMH26GKJFGQW  play  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2     HST5K3C5L9WS  ball  [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
3     ZJ5T4M8F9USB  ball  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4     257MNU1H3O56  foul  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
...            ...   ...                                                ...
1914  ZDEUXDLTP1TL  play  [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
1915  863JKGRGLKMG  play  [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
1916  619MYTG7OTT0   out  [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1917  BWJ683S12AE4  play  [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 

In [64]:
# number of concepts after pruning
from keras.models import load_model

n_concepts = 78
# Model which trained the best solution
model_id_new = 1656151443
# Solution file containing the best solution
sol_file = 'solution.txt'
background_file = '../../ilasp/classification/baseball_model_attn/background.lp'
model_path_new = f"../../Thesis_Data/Models/best_concept_Conv_attn_{n_concepts}_{model_id_new}.h5"
model = load_model(model_path_new)

print(model.summary())

Model: "Video_concepts"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input_1 (InputLayer)           [(None, 360, 2048)]  0           []                               
                                                                                                  
 Conv_1 (Conv1D)                (None, 360, 64)      393280      ['Input_1[0][0]']                
                                                                                                  
 Max_pool_1 (MaxPooling1D)      (None, 90, 64)       0           ['Conv_1[0][0]']                 
                                                                                                  
 Bn_1 (BatchNormalization)      (None, 90, 64)       256         ['Max_pool_1[0][0]']             
                                                                                     

In [65]:
import numpy as np

old_concepts_text = np.array(
    pd.read_pickle(os.path.join(project_dir, "Extracted_Concepts/final_dict_old_codex.pkl"))["explanations"])
concepts_text = np.array(codex_output["explanations"])

class_dict = {
    'strike': 0,
    'ball': 1,
    'play': 2,
    'foul': 3,
    'out': 4}
categories = class_dict.keys()

inv_class_dict = {v: k for k, v in class_dict.items()}

n_concepts = 78
concept_matrix = labels['concepts'].values
concept_matrix = np.stack(concept_matrix, axis=0)
idx = np.argwhere(np.all(concept_matrix[..., :] == 0, axis=0))
concept_matrix = np.delete(concept_matrix, idx, axis=1)
concept_matrix = concept_matrix[:, :n_concepts]

y = np.array([class_dict[label] for label in labels['label']])
y_test = y[1700:]
concept_test = concept_matrix[1700:, :]
id_test0 = np.array(labels["id"].values)[1700:]

In [66]:
from concept_processing.io import lookup_explanations
from IPython.display import Video

visualise_point = 1

X_visualize = X_test0[visualise_point]
y_true = y_test[visualise_point]
print(f"True label is {inv_class_dict[y_true]}")
curr_id = id_test0[visualise_point]
print(f"The video id we are observing is {curr_id}")
# print(f"The image id we are observing is {birds_img_ids[curr_point]}")

print("Human-written explanation:")
print(lookup_explanations(curr_id, f"{project_dir}/full_dataset"))
Video('/home/rp218/luke-for-roko/cmd/demo/4RSRO1YFRUCG.mp4', embed=True)

True label is play
The video id we are observing is TJ9WTG3OYT97
Human-written explanation:
the batter hit a ground ball to the third baseman.  It led to an out rather than a hit because the third baseman threw the ball to the first baseman to complete the out.


In [67]:
from concept_processing.classification.example_generator import get_concept_bottleneck_context, predict
from nn.utils import attn_prediction

concept_preds = attn_prediction(model, tf.expand_dims(X_visualize, axis=0)).squeeze()
ctx = get_concept_bottleneck_context(categories, concept_preds, concepts_text=concepts_text)
print(f"The model predicted that the following concepts are present:")
print(ctx)

The model predicted that the following concepts are present:
concept("It was caught.").
concept("The batter hit the ball in the air.").
concept("The batter hit the ball on the ground.").
concept("It hit the ground.").
concept("The batter hit a ground ball.").


In [68]:
print("Using the learned solution file:")
print(open(sol_file).read())

label = predict(ctx, sol_file, background_file)
print(f"The model predicted the label: {label}")

Using the learned solution file:

conds(strike) :- concept("The fielder caught the ball."), concept("It was outside the strike zone.").
conds(strike) :- concept("It was.").
conds(strike) :- concept("The batter made contact."), concept("The batter missed.").
conds(play) :- concept("It was caught.").
conds(out) :- concept("The batter hit a fly ball."), concept("The fielder caught the ball.").
conds(foul) :- concept("The batter hit the ball for a foul ball.").

#show selected/1.

The model predicted the label: play
