In [1]:
import json
import time
import subprocess
import math
import regex
import sys
import os

import spacy
from tqdm import tqdm
from nltk.corpus import wordnet as wn
import numpy as np
from sklearn.model_selection import KFold

from json_parser import Parser
from common import BBT_PEOPLE, BoundingBox
from image_processing import ObjectDetector
import language_processing as lp
import utils
from utils import split_data_set
import inference as inf

In [2]:
with open('hold_base.lp') as f:
    background_knowledge = f.read().splitlines()

parser = Parser()

# With Object Detector

In [3]:
def one_iter_od(train_set, test_set):

    # 1. Generate examples based on train_set
    total_examples = 0    

    with open('pos_eg_od', 'w') as out:
        for t in train_set:
            examples = parser.get_pos_example(t)
            total_examples += len(examples)
            for e in examples:
                print(e.gen_example(), file=out)
    print(F'Total examples: {total_examples}')
    
    ##########################################################

    # 2. Learn with ILASP and write new rules to base.lp
    cp = subprocess.run(['./ILASP', '--version=4',
                     '--override-default-sm',
                     'hold_bk.las', 'pos_eg_od', 'ovr.las'],
                    capture_output=True)

    print(cp.stdout.decode('utf-8'))

    with open('base.lp', 'w') as f:
        for line in background_knowledge:
            print(line, file=f)
        print(cp.stdout.decode('utf-8').split('\n')[0],file=f)

    ##########################################################
    
    # 3. Test on test set
    jacc_score = 0
    num_questions = 0
    parsing_error = []
    not_full_score = []
    zero_score = []

    for test in test_set:
        pred_ans_idx = inf.inference(test)
        if pred_ans_idx == [-1]:
            parsing_error.append(test['qid'])
            continue

        score = inf.get_jaccard_score(pred_ans_idx, test)

        if 0 < score < 1:
            not_full_score.append(test['qid'])
        elif score == 0:
            zero_score.append(test['qid'])

        jacc_score += score
        num_questions += 1

    ##########################################################
    
    # 4. Print out result
    print(F'# tests:     {num_questions}')
    print(F'jacc score:   {jacc_score}')
    print(F'norm jacc score: {jacc_score / num_questions}')
    print()

    print('PARSING ERROR')
    print(parsing_error)
    print()

    print('NOT FULL SCORE')
    print(not_full_score)
    print()

    print('ZERO SCORE')
    print(zero_score)
    
    return jacc_score, num_questions

In [4]:
with open('train_hold.json') as f:
    attempted_question = json.load(f)

print(F'# total tests: {len(attempted_question)}')

a_q_dict = dict()

for q in attempted_question:
    qid = q['qid']
    a_q_dict[qid] = q

kf = KFold(n_splits=5)
kf.get_n_splits(attempted_question)

# total tests: 91


5

In [5]:
start_time = time.time()

run_id = 0
total_jacc_score = 0
total_questions = 0

for train_index, test_index in kf.split(attempted_question):
    print(F'--------Run {run_id}--------')
    train_set = [attempted_question[i] for i in train_index]
    test_set = [attempted_question[i] for i in test_index]
    
    jacc, n_q = one_iter_od(train_set, test_set)
    total_jacc_score += jacc
    total_questions += n_q
    run_id += 1

    print(F'----------------------------')
    print()    

print(F'Total runtime: {time.time() - start_time}')
print(F'Avg norm jacc score across folds: {total_jacc_score / total_questions}')

--------Run 0--------
Total examples: 224
initiatedAt(holding(V2,V1),V3) :- happensAt(close(V2,V1,71),V3).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Pre-processing                          : 0.007s
%% Hypothesis Space Generation             : 3.418s
%% Conflict analysis                       : 81.301s
%%   - Positive Examples                   : 81.301s
%% Counterexample search                   : 1.222s
%%   - CDOEs                               : 0.082s
%%   - CDPIs                               : 1.137s
%% Hypothesis Search                       : 37.614s
%% Propagation                             : 125.832s
%%   - CDPIs                               : 125.832s
%% Total                                   : 249.576s
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%





# tests:     19
jacc score:   13.5
norm jacc score: 0.7105263157894737

PARSING ERROR
[]

NOT FULL SCORE
[87060]

ZERO SCORE
[72563, 25720, 38557, 20686, 48275]
----------------------------

--------Run 1--------
Total examples: 230
initiatedAt(holding(V1,V2),V3) :- happensAt(close(V1,V2,75),V3).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Pre-processing                          : 0.008s
%% Hypothesis Space Generation             : 3.461s
%% Conflict analysis                       : 52.262s
%%   - Positive Examples                   : 52.262s
%% Counterexample search                   : 0.714s
%%   - CDOEs                               : 0.049s
%%   - CDPIs                               : 0.664s
%% Hypothesis Search                       : 22.208s
%% Propagation                             : 105.213s
%%   - CDPIs                               : 105.213s
%% Total                                   : 184.003s
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



# tests:     18
jacc score:   13.033333333333331
norm jacc score: 0.724074074074074

PARSING ERROR
[]

NOT FULL SCORE
[71186, 33374, 61212, 45051, 92313]

ZERO SCORE
[46741, 3120]
----------------------------

--------Run 2--------
Total examples: 243
initiatedAt(holding(V3,V1),V2) :- happensAt(close(V3,V1,72),V2).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Pre-processing                          : 0.008s
%% Hypothesis Space Generation             : 3.496s
%% Conflict analysis                       : 90.127s
%%   - Positive Examples                   : 90.127s
%% Counterexample search                   : 1.485s
%%   - CDOEs                               : 0.157s
%%   - CDPIs                               : 1.325s
%% Hypothesis Search                       : 42.046s
%% Propagation                             : 128.02s
%%   - CDPIs                               : 128.02s
%% Total                                   : 265.371s
%%%%%%%%%%%%%%%%%%%%%%



# tests:     18
jacc score:   14.0
norm jacc score: 0.7777777777777778

PARSING ERROR
[]

NOT FULL SCORE
[5164, 90225]

ZERO SCORE
[72501, 121553, 119537]
----------------------------

Total runtime: 1827.2125840187073
Avg norm jacc score across folds: 0.7073260073260074


In [None]:
from image_processing import draw_bounding_box
from PIL import Image
from IPython.display import display


test = a_q_dict[25720]

# time_span_to_timestamps_list(test)
vid_folder = frame_folder + test['vid_name'] + '/'

time = 30
qa_objects = od.get_frame_qa_objects(vid_folder, 0.7, time)
bboxes = [o.bbox for o in qa_objects]
display(Image.fromarray(draw_bounding_box(vid_folder + F'000{time}.jpg', bboxes)))