# Image generation

In [5]:
%cd image_generation
!../blender/blender --background \
    --python render_images.py -- \
    --num_images 100 --use_gpu 1 --min_objects 2 --max_objects 6
%cd ..

/home/kevin/clevr-dataset-gen/image_generation
found bundled python: /home/kevin/clevr-dataset-gen/blender/2.78/python
read blend: data/base_scene.blend
convertViewVec: called in an invalid context
convertViewVec: called in an invalid context
convertViewVec: called in an invalid context
0.3408721748853005 0.4 front
BROKEN MARGIN!
convertViewVec: called in an invalid context
0.341003207378054 0.4 right
BROKEN MARGIN!
0.23837811886526605 0.4 right
BROKEN MARGIN!
convertViewVec: called in an invalid context
0.15744848870422068 0.4 left
BROKEN MARGIN!
convertViewVec: called in an invalid context
Fra:1 Mem:17.53M (0.00M, Peak 17.53M) | Time:00:00.00 | Preparing Scene data
Fra:1 Mem:27.59M (0.00M, Peak 29.06M) | Time:00:00.02 | Preparing Scene data
Fra:1 Mem:27.59M (0.00M, Peak 29.06M) | Time:00:00.02 | Creating Shadowbuffers
Fra:1 Mem:27.59M (0.00M, Peak 29.06M) | Time:00:00.02 | Raytree.. preparing
Fra:1 Mem:38.32M (0.00M, Peak 38.32M) | Time:00:00.03 | Raytree.. building
Fra:1 Mem:37.73M 

# Question template generation

In [4]:
!pip install pyjson5

Collecting pyjson5
  Downloading pyjson5-1.5.2-cp37-cp37m-manylinux2014_x86_64.whl (232 kB)
[K     |████████████████████████████████| 232 kB 6.6 MB/s eta 0:00:01
[?25hInstalling collected packages: pyjson5
Successfully installed pyjson5-1.5.2


In [5]:
%cd relation_generator
!python generate_relations.py
%cd ..

/dfs/user/liuk/reasoning/clevr_dataset_gen/relation_generator
Generating relations
Saving templates to ../question_generation/babyarc
Generating relations for same_shape and same_shape
Generating relations for same_shape and same_material
Generating relations for same_shape and same_size
Generating relations for same_shape and same_color
Generating relations for same_material and same_shape
Generating relations for same_material and same_material
Generating relations for same_material and same_size
Generating relations for same_material and same_color
Generating relations for same_size and same_shape
Generating relations for same_size and same_material
Generating relations for same_size and same_size
Generating relations for same_size and same_color
Generating relations for same_color and same_shape
Generating relations for same_color and same_material
Generating relations for same_color and same_size
Generating relations for same_color and same_color
/dfs/user/liuk/reasoning/clevr_dat

# Question generation

In [7]:
%cd question_generation/
!python generate_questions.py --input_scene_file ../output/CLEVR_scenes.json --output_questions_file questions.json --template_dir babyarc --max-num-objects 6
%cd ..

/dfs/user/liuk/reasoning/clevr_dataset_gen/question_generation
Read 32 templates from disk
starting image CLEVR_new_000000.png (1 / 1)
resetting counts
Writing output to questions.json
/dfs/user/liuk/reasoning/clevr_dataset_gen


# Question analysis

In [33]:
from collections import defaultdict
import json
from typing import List

from relation_generator.generate_relations import RELATIONS

def get_unique_task_string(program: List[str]):
    """
    Parses the program for a given question and returns a unique string that identifies the 
    babyARC task that it embodies.

    This function is somewhat hacky in that it doesn't deal with the AST directly, but it
    works for the generated babyARC template programs.
    """
    inputs = []
    object_str = []
    for node in program:
        # Generate a new object str every time we see a new "scene" (which implies
        # a new object)
        if node["type"] == "scene":
            if len(object_str) != 0:
                inputs.append(",".join(object_str))
                object_str = []
            continue

        # If we're not at a scene, then we're in the middle of an object
        if node["type"].startswith("filter_"):
            # This node filters some property of the input. Let's consider it.
            object_str.append(node["type"][7:] + "=" + node["value_inputs"][0])
    inputs.append(",".join(object_str))
    relations = sorted([node["type"] for node in program if node["type"] in RELATIONS])
    
    return "+".join(relations) + "-" + ";".join(inputs)


# Load the question data
file = "question_generation/test.json"
with open(file) as f:
    data = json.load(f)
question_list = data["questions"]

observed_question_types = dict()

# Count the number of times each question type occurs
for question in question_list:
    template_filename = question["template_filename"]
    question_family_index = question["question_family_index"]
    program = question["program"]
    image = question["image"]

    task_str = get_unique_task_string(program)

    if task_str not in observed_question_types:
        observed_question_types[task_str] = {"count": 0, "questions": [], "images": []}
    
    observed_question_types[task_str]["count"] += 1
    observed_question_types[task_str]["questions"].append(question)
    observed_question_types[task_str]["images"].append(image)
for task_str, data in observed_question_types.items():
    print("{} - {}".format(task_str, data["count"]))

same_material+same_material-size=large,shape=cylinder;color=gray,material=metal - 1
same_shape+same_size-color=red,material=metal;color=green,material=rubber - 1
same_material+same_size-size=small,color=green;color=brown,material=rubber - 1
same_shape-color=brown - 2
same_shape+same_size-color=red,material=metal;color=gray - 1
same_material+same_size-color=brown;size=small,color=red,material=rubber,shape=cube - 1
same_color-size=large,material=metal - 1
same_color+same_size-size=large,material=metal;size=small,color=red,material=metal,shape=cube - 1
same_shape-color=brown,material=rubber - 1
same_shape+same_shape-color=gray,material=metal;color=red,material=rubber,shape=cube - 1
same_size-size=large,material=rubber - 1
same_size-color=purple - 2
same_size-material=metal - 4
same_color-size=small,material=rubber,shape=sphere - 1
same_color+same_shape-shape=cube;size=large,color=red,shape=sphere - 1
same_material+same_shape-color=red,shape=sphere;size=small,shape=sphere - 1
same_color+sa

In [34]:
import pandas as pd

df = pd.DataFrame.from_dict(observed_question_types, orient='index')

In [42]:
df.sort_values(by=["count"], ascending=False).images[0]

['CLEVR_new_000001',
 'CLEVR_new_000002',
 'CLEVR_new_000002',
 'CLEVR_new_000003']

In [20]:
observed_question_types["same_color-size=large,shape=cylinder"]

{'count': 1,
 'questions': [{'split': 'new',
   'image_filename': 'CLEVR_new_000009.png',
   'image_index': 9,
   'image': 'CLEVR_new_000009',
   'question': 'What is the object with the same_color as the big cylinder?',
   'program': [{'type': 'scene',
     'inputs': [],
     '_output': [0, 1, 2, 3],
     'value_inputs': []},
    {'type': 'filter_size',
     'inputs': [0],
     '_output': [0, 1, 2, 3],
     'value_inputs': ['large']},
    {'type': 'filter_shape',
     'inputs': [1],
     '_output': [3],
     'value_inputs': ['cylinder']},
    {'type': 'unique', 'inputs': [2], '_output': 3, 'value_inputs': []},
    {'type': 'same_color', 'inputs': [3], '_output': [0], 'value_inputs': []},
    {'type': 'unique', 'inputs': [4], '_output': 0, 'value_inputs': []}],
   'answer': 0,
   'template_filename': 'same_color-same_shape.json',
   'question_family_index': 0,
   'question_index': 88}]}

# Dataset

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dataset import ClevrRelationDataset

In [19]:
dataset = ClevrRelationDataset(image_dir="output/images", question_dir="question_generation/")

In [22]:
dataset[0]

{'count': 1,
 'inputs': [{'image': tensor([[[0.4118, 0.4157, 0.4118,  ..., 0.4000, 0.4039, 0.4000],
            [0.4118, 0.4157, 0.4118,  ..., 0.4039, 0.4078, 0.3961],
            [0.4118, 0.4118, 0.4118,  ..., 0.4000, 0.4000, 0.4039],
            ...,
            [0.4667, 0.4627, 0.4627,  ..., 0.6235, 0.6235, 0.6235],
            [0.4627, 0.4627, 0.4667,  ..., 0.6275, 0.6235, 0.6235],
            [0.4627, 0.4667, 0.4627,  ..., 0.6275, 0.6275, 0.6235]],
   
           [[0.4118, 0.4157, 0.4118,  ..., 0.4000, 0.4039, 0.4000],
            [0.4078, 0.4118, 0.4118,  ..., 0.4039, 0.4078, 0.3961],
            [0.4078, 0.4118, 0.4118,  ..., 0.4000, 0.4000, 0.4039],
            ...,
            [0.4627, 0.4627, 0.4588,  ..., 0.6157, 0.6118, 0.6118],
            [0.4588, 0.4588, 0.4627,  ..., 0.6118, 0.6157, 0.6118],
            [0.4588, 0.4627, 0.4588,  ..., 0.6157, 0.6157, 0.6118]],
   
           [[0.4078, 0.4118, 0.4118,  ..., 0.4000, 0.4039, 0.4000],
            [0.4078, 0.4118, 0.4078,  ..