# summary of this notebook
- 3 methods to read the scenes: get names, get object counts, get scenes 
- display stats on number of objects

In [1]:
import json
import matplotlib.pyplot as plt
from collections import Counter
from ipywidgets import interact, IntSlider
import numpy as np

In [2]:
def read_json(split):
    path = f"../data/sceneGraphs/{split}_sceneGraphs.json"
    with open(path) as f:
        d = json.load(f)
    return d

# scene_id need to be str
def get_scene_graph(scene_id):
    return d[scene_id]['objects']
def get_object_names(scenegraph):
    names = []
    for _, val in scenegraph.items():
        names.append(val['name'])
    return names
def get_object_count(scenegraph):
    return len(scenegraph)

# Define a function to update the plot dynamically
def update_plot(min_value, max_value, max_duplicates):
    # Filter the values based on the selected range
    filtered_quantities = []
    for key, val in counts.items():
        if min_value <= val <= max_value:
            if get_max_duplicates(key) <= max_duplicates:
                filtered_quantities.append(val)
                
    # filtered_quantities = [q for q in quantities if min_value <= q <= max_value]
    # Count the frequency of each value
    value_counts = Counter(filtered_quantities)

    # Prepare data for plotting
    x_values = list(value_counts.keys())
    y_values = list(value_counts.values())
    
    # Calculate the remaining percentage and the total number of objects
    total_quantities = len(quantities)
    filtered_total = len(filtered_quantities)
    remaining_percentage = (filtered_total / total_quantities) * 100
    mean_value = np.mean(filtered_quantities) if filtered_quantities else 0
    median_value = np.median(filtered_quantities) if filtered_quantities else 0

    # Clear the previous plot
    plt.clf()
    
    # Create the frequency plot
    plt.figure(figsize=(10, 6))
    plt.bar(x_values, y_values, color='blue')

    # Annotate with remaining percentage and total number of objects
    plt.text(0.95, 0.95, f'Remaining: {remaining_percentage:.2f}%\nTotal Scenes: {filtered_total}\nMean Value: {mean_value:.2f}\nMedian Value: {median_value:.2f}', 
             ha='right', va='top', transform=plt.gca().transAxes, fontsize=12, color='red')
    
    plt.xlabel('Number of objects')
    plt.ylabel('Frequency')
    plt.title(f'Frequency Plot of Object Quantities ({min_value} <= values <= {max_value})')
    plt.show()
    
# remove the duplicates
def get_max_duplicates(key):
    scene = get_scene_graph(key)
    names = get_object_names(scene)
    # Use Counter to count the frequency of each string
    counts = Counter(names)
    # Return the maximum count (or 0 if the list is empty)
    return max(counts.values()) if counts else 0

In [3]:
train_split = "train"
val_split = "val"

In [4]:
# read json file
d = read_json(train_split)
counts = {}
for key in d.keys():
    scene = get_scene_graph(key)
    count = get_object_count(scene)
    counts[f"{key}"] = count
print(len(d))

74942


Val_scenes: 10696
Train_scenes: 74942
Total_scenes: 85638

Total images in image: 148854

In [5]:
# Get the values from the dictionary
quantities = list(counts.values())
keys = counts.keys()

In [6]:
# Create sliders for the dynamic range selection
interact(update_plot, 
         min_value=IntSlider(min=0, max=max(quantities), step=1, value=0), 
         max_value=IntSlider(min=0, max=max(quantities), step=1, value=max(quantities)),
         max_duplicates=IntSlider(min=0, max=len(quantities), step=1, value=0));

interactive(children=(IntSlider(value=0, description='min_value', max=126), IntSlider(value=126, description='…