#### Notebook to create clevr/cogent simulated data

In [20]:
import json
import os
from pprint import pprint

import numpy as np
import pandas as pd
import scipy.stats as stats


In [21]:
area_dict = {
    "small": 30,
    "large": 70
}

color_dict = {
    "gray": [87, 87, 87],
    "red": [173, 34, 35],
    "blue": [44, 76, 215],
    "green": [29, 105, 20],
    "brown": [126, 72, 25],
    "purple": [130, 39, 192],
    "cyan": [40, 208, 208],
    "yellow": [255, 238, 51]
}

roughness_dict = {
    "metal": 8,
    "rubber": 2,
}

sides_dict = {
    "cube": 6,
    "sphere": 1,
    "cylinder": 3,
}

corners_dict = {
    "cube": 8,
    "sphere": 0,
    "cylinder": 2,
}

whratio_dict = {
    "cube": 1.0,
    "sphere": 1.0,
    "cylinder": 0.5,
}

In [22]:
def add_variance(mu, sigma, min_bound, max_bound):
    el = np.random.normal(loc=mu, scale=sigma)
    el = min(max_bound, max(min_bound, el))
    return el

In [23]:
def normalise_val_in_range(x, a, b):
    return (x - a) / (b - a)

def normalise_val(attr, val):
    match attr:
        case "area":
            min, max = 0, 100
        case "wh-ratio":
            min, max = 0, 1
        case "nr-of-corners":
            min, max = 0, 8
        case "nr-of-sides":
            min, max = 1, 6
        case "roughness":
            min, max = 0, 10
        case "r":
            min, max = 0, 255
        case "g":
            min, max = 0, 255
        case "b":
            min, max = 0, 255
        case "xpos":
            min, max = 0, 480
        case "ypos":
            min, max = 0, 320
        case "zpos":
            min, max = 6, 16
        case "xpos-3d":
            min, max = -3, 3
        case "ypos-3d":
            min, max = -3, 3
        case "zpos-3d":
            min, max = 0, 1
        case "rotation":
            min, max = 0, 360
    return normalise_val_in_range(val, min, max)

def transform_data(scene):
    new_objects = []
    for object in scene["objects"]:
        new_object ={}
        attributes = {}
        r, g, b = color_dict[object["color"]]
        attributes["r"] = add_variance(r, 2.55, 0, 255)
        attributes["g"] = add_variance(g, 2.55, 0, 255)
        attributes["b"] = add_variance(b, 2.55, 0, 255)

        area = area_dict[object["size"]]
        attributes["area"] = add_variance(area, 1, 0, 100)

        roughness = roughness_dict[object["material"]]
        attributes["roughness"] = add_variance(roughness, 0.1, 0, 10)

        attributes["nr-of-sides"] = sides_dict[object["shape"]]
        attributes["nr-of-corners"] = corners_dict[object["shape"]]
        attributes["wh-ratio"] = add_variance(whratio_dict[object["shape"]], 0.01, 0, 1.0)
        
        xpos, ypos, zpos = object["pixel_coords"]
        xpos_3d, ypos_3d, zpos_3d = object["3d_coords"]

        attributes["xpos"] = xpos
        attributes["ypos"] = ypos
        attributes["zpos"] = zpos
        attributes["xpos-3d"] = xpos_3d
        attributes["ypos-3d"] = ypos_3d
        attributes["zpos-3d"] = zpos_3d
        attributes["rotation"] = object["rotation"]
        

        for key, val in attributes.items():
            norm_val = normalise_val(key, val)
            attributes[key] = round(norm_val, 5)
            
        new_object["attributes"] = attributes
        new_object["description"] = {
            "color": object["color"],
            "size": object["size"],
            "material": object["material"],
            "shape": object["shape"],
        }
        new_objects.append(new_object)
    scene["objects"] = new_objects
    # delete the relationship key value pair in scene
    del scene["relationships"]
    del scene["directions"]
    del scene["split"]
    scene["image_filename"] = scene["image_filename"].replace("CLEVR", "clevr")
    return scene

In [19]:
base_dir = os.path.join(os.getenv('HOME'), "Corpora/CLEVR-simulated/scenes/") # to change!!!
input_dir = os.path.join(base_dir, "val")
output_dir = os.path.join(base_dir, "t-val")
os.makedirs(output_dir, exist_ok=True)

for root, dirs, files in os.walk(input_dir):
    for fname in files:
        with open(os.path.join(root, fname)) as f:
            scene = json.load(f)
            scene = transform_data(scene)
        
        with open(os.path.join(output_dir, fname.replace("CLEVR", "clevr")), "w") as f:
            json.dump(scene, f)