In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| default_exp helpers

In [None]:
#| export
import math
import numpy as np
import json
import time
from collections import Counter
from typing import Any, Dict, List, Tuple


## ListChecker

In [None]:
#| export
class ListChecker:
    @staticmethod
    def check_list_unchanged(float_list, rel_tol=1e-6, abs_tol=0.0):
        if not float_list:  # Check if the list is empty
            return True
        first_value = float_list[0]
        for value in float_list[1:]:
            if not math.isclose(value, first_value, rel_tol=rel_tol, abs_tol=abs_tol):
                return False
        return True

    @staticmethod
    def check_integer_list_unchanged(int_list):
        if not int_list:  # Check if the list is empty
            return True
        first_value = int_list[0]
        for value in int_list[1:]:
            if value != first_value:
                return False
        return True


In [None]:

# Example usage:
float_list = [1.00000001, 1.00000002, 1.00000003]
int_list = [1, 1, 1]

print(ListChecker.check_list_unchanged(float_list))  # Should print: True (depending on the tolerance)
print(ListChecker.check_integer_list_unchanged(int_list))  # Should print: True


True
True


In [None]:
#| export


class JSONDataManager:
    def __init__(self, path: str, show_timing: bool = False):
        self.data = self.load_json(path)
        self.show_timing = show_timing
    
    def load_json(self, path: str) -> Dict:
        with open(path, 'r') as file:
            return json.load(file)
    
    def timing_decorator(method):
        def timed_method(self, *args, **kwargs):
            start_time = time.time()
            result = method(self, *args, **kwargs)
            end_time = time.time()
            if self.show_timing:
                print(f"Execution time of {method.__name__}: {end_time - start_time:.4f} seconds")
            return result
        return timed_method


class ChallengesDataManager(JSONDataManager):
    
    @JSONDataManager.timing_decorator
    def get_all_keys(self) -> List[str]:
        return list(self.data.keys())
    
    @JSONDataManager.timing_decorator
    def count_all_keys(self) -> int:
        return len(self.data)
    
    @JSONDataManager.timing_decorator
    def get_keys_with_equal_size_input_output(self) -> Tuple[List[str], int]:
        equal_keys = [
            key for key, value in self.data.items()
            if all(len(value['train'][iter]['input']) == len(value['train'][iter]['output']) for iter in range(len(value['train'])))
        ]
        return equal_keys, len(equal_keys)
    
    @JSONDataManager.timing_decorator
    def get_keys_with_inconsistent_input_output_sizes(self) -> Tuple[List[str], int]:
        inconsistent_keys = []
        for key, value in self.data.items():
            input_sizes = [len(value['train'][iter]['input']) for iter in range(len(value['train']))]
            output_sizes = [len(value['train'][iter]['output']) for iter in range(len(value['train']))]
            if len(set(input_sizes)) == 1 and len(set(output_sizes)) == 1 and input_sizes[0] < output_sizes[0]:
                inconsistent_keys.append(key)
        return inconsistent_keys, len(inconsistent_keys)
    
    @JSONDataManager.timing_decorator
    def get_keys_with_variable_input_sizes(self) -> Tuple[List[str], int]:
        variable_keys = [
            key for key, value in self.data.items()
            if len(set(len(value['train'][iter]['input']) for iter in range(len(value['train'])))) > 1
        ]
        return variable_keys, len(variable_keys)
    
    @JSONDataManager.timing_decorator
    def get_input_array_histogram(self) -> Dict[int, int]:
        counts = Counter(len(value['train']) for value in self.data.values())
        return dict(counts)
    
    @JSONDataManager.timing_decorator
    def get_data_for_key(self, key: str) -> Dict[str, Any]:
        return self.data.get(key, {})
    
    @JSONDataManager.timing_decorator
    def get_arrays_for_key(self, key: str, array_type: str) -> List:
        if key not in self.data or 'train' not in self.data[key] or array_type not in self.data[key]['train']:
            return []
        return self.data[key]['train'][array_type]
    
    @JSONDataManager.timing_decorator
    def get_largest_array_size(self) -> Tuple[str, int]:
        max_size = 0
        max_key = ''
        for key, value in self.data.items():
            input_sizes = [np.array(value['train'][iter]['input']).size for iter in range(len(value['train']))]
            output_sizes = [np.array(value['train'][iter]['output']).size for iter in range(len(value['train']))]

            max_input_size = max(input_sizes, default=0)
            max_output_size = max(output_sizes, default=0)
            if max(max_input_size, max_output_size) > max_size:
                max_size = max(max_input_size, max_output_size)
                max_key = key
        return max_key, max_size

    @JSONDataManager.timing_decorator
    def analyze_arrays(self) -> Dict[str, Any]:
        analysis = {
            "equal_input_output": [],
            "consistent_but_different_sizes": [],
            "variable_output_sizes": []
        }
        for key, value in self.data.items():
            input_sizes = [np.array(value['train'][iter]['input']).size for iter in range(len(value['train']))]
            output_sizes = [np.array(value['train'][iter]['output']).size for iter in range(len(value['train']))]
            if all(size == input_sizes[0] for size in input_sizes) and all(size == output_sizes[0] for size in output_sizes):
                if input_sizes[0] == output_sizes[0]:
                    analysis["equal_input_output"].append(key)
                else:
                    analysis["consistent_but_different_sizes"].append(key)
            else:
                analysis["variable_output_sizes"].append(key)
        
        return {
            "analysis": analysis,
            "counts": {k: len(v) for k, v in analysis.items()}
        }


class SolutionsDataManager(JSONDataManager):
    
    @JSONDataManager.timing_decorator
    def get_all_keys(self) -> List[str]:
        return list(self.data.keys())
    
    @JSONDataManager.timing_decorator
    def count_all_keys(self) -> int:
        return len(self.data)
    
    @JSONDataManager.timing_decorator
    def get_data_for_key(self, key: str) -> Dict[str, Any]:
        data = self.data.get(key, [])
        return data[0] if data else {}
    
    @JSONDataManager.timing_decorator
    def get_arrays_for_key(self, key: str, array_type: str) -> List:
        if key not in self.data or array_type not in self.data[key]:
            return []
        return self.data[key][array_type]




In [None]:

# class JSONDataManager:
#     def __init__(self, path: str):
#         self.data = self.load_json(path)
    
#     def load_json(self, path: str) -> Dict:
#         with open(path, 'r') as file:
#             return json.load(file)
    
#     def timing_decorator(method):
#         def timed_method(*args, **kwargs):
#             start_time = time.time()
#             result = method(*args, **kwargs)
#             end_time = time.time()
#             print(f"Execution time of {method.__name__}: {end_time - start_time:.4f} seconds")
#             return result
#         return timed_method



In [None]:
#| export

# class ChallengesDataManager(JSONDataManager):
    
#     @JSONDataManager.timing_decorator
#     def get_all_keys(self) -> List[str]:
#         return list(self.data.keys())
    
#     @JSONDataManager.timing_decorator
#     def count_all_keys(self) -> int:
#         return len(self.data)
    
#     @JSONDataManager.timing_decorator
#     def get_keys_with_equal_size_input_output(self) -> Tuple[List[str], int]:
#         equal_keys = [
#             key for key, value in self.data.items()
#             if all(len(value['train'][iter]['input']) == len(value['train'][iter]['output']) for iter in range(len(value['train'])))
#         ]
#         return equal_keys, len(equal_keys)
    
#     @JSONDataManager.timing_decorator
#     def get_keys_with_inconsistent_input_output_sizes(self) -> Tuple[List[str], int]:
#         inconsistent_keys = []
#         for key, value in self.data.items():
#             input_sizes = [len(value['train'][iter]['input']) for iter in range(len(value['train']))]
#             output_sizes = [len(value['train'][iter]['output']) for iter in range(len(value['train']))]
#             if len(set(input_sizes)) == 1 and len(set(output_sizes)) == 1 and input_sizes[0] < output_sizes[0]:
#                 inconsistent_keys.append(key)
#         return inconsistent_keys, len(inconsistent_keys)
    
#     @JSONDataManager.timing_decorator
#     def get_keys_with_variable_input_sizes(self) -> Tuple[List[str], int]:
#         variable_keys = [
#             key for key, value in self.data.items()
#             if len(set(len(value['train'][iter]['input']) for iter in range(len(value['train'])))) > 1
#             # if len(set(len(arr) for arr in value['train']['input'])) > 1
#         ]
#         return variable_keys, len(variable_keys)
    
#     @JSONDataManager.timing_decorator
#     def get_input_array_histogram(self) -> Dict[int, int]:
#         counts = Counter(len(value['train']) for value in self.data.values())
#         return dict(counts)
    
#     @JSONDataManager.timing_decorator
#     def get_data_for_key(self, key: str) -> Dict[str, Any]:
#         return self.data.get(key, {})
    
#     @JSONDataManager.timing_decorator
#     def get_arrays_for_key(self, key: str, array_type: str) -> List:
#         if key not in self.data or 'train' not in self.data[key] or array_type not in self.data[key]['train']:
#             return []
#         return self.data[key]['train'][array_type]
    



In [None]:
# #| export
# class SolutionsDataManager(JSONDataManager):
    
#     @JSONDataManager.timing_decorator
#     def get_all_keys(self) -> List[str]:
#         return list(self.data.keys())
    
#     @JSONDataManager.timing_decorator
#     def count_all_keys(self) -> int:
#         return len(self.data)
    
#     @JSONDataManager.timing_decorator
#     def get_data_for_key(self, key: str) -> Dict[str, Any]:
#         rtn = self.data.get(key, {})
#         return rtn[0]
    
#     @JSONDataManager.timing_decorator
#     def get_arrays_for_key(self, key: str, array_type: str) -> List:
#         if key not in self.data or array_type not in self.data[key]:
#             return []
#         return self.data[key][array_type]




In [None]:
#| gui
# Example usage
start_time = time.time()
challenges_manager = ChallengesDataManager("C:\\packages\\arc-prize-2024\\arc-agi_training_challenges.json")
end_time = time.time()
print(f"Execution time of challenges load: {end_time - start_time:.4f} seconds")

start_time = time.time()
solutions_manager = SolutionsDataManager("C:\\packages\\arc-prize-2024\\arc-agi_training_solutions.json")
end_time = time.time()
print(f"Execution time of solutions load: {end_time - start_time:.4f} seconds")

print(challenges_manager.get_all_keys())
print(challenges_manager.count_all_keys())
print(challenges_manager.get_keys_with_equal_size_input_output())
gkweqsio = challenges_manager.get_keys_with_equal_size_input_output()
print(gkweqsio[0])
print(gkweqsio[1])
gkwiios = challenges_manager.get_keys_with_inconsistent_input_output_sizes()
print(gkwiios[0])
print(gkwiios[1])
gkwvts = challenges_manager.get_keys_with_variable_input_sizes()
print(gkwvts[0])
print(gkwvts[1])
hist = challenges_manager.get_input_array_histogram()
print(hist)
count = 0
for key, value in hist.items():
    count += value
print(count)
print('get_data_for_key',challenges_manager.get_data_for_key('007bbfb7'))
print(challenges_manager.get_arrays_for_key('007bbfb7', 'train'))
print(challenges_manager.get_arrays_for_key('007bbfb7', 'test'))

print('get_largest_array_size', challenges_manager.get_largest_array_size())
print('analyze_arrays', challenges_manager.analyze_arrays())


print('get_all_keys', solutions_manager.get_all_keys())
print(solutions_manager.count_all_keys())
get_data_for_key = solutions_manager.get_data_for_key('007bbfb7')
print('get_data_for_key', get_data_for_key)
print(solutions_manager.get_arrays_for_key('007bbfb7', 'output'))

Execution time of challenges load: 0.0970 seconds
Execution time of solutions load: 0.0040 seconds
['007bbfb7', '00d62c1b', '017c7c7b', '025d127b', '045e512c', '0520fde7', '05269061', '05f2a901', '06df4c85', '08ed6ac7', '09629e4f', '0962bcdd', '0a938d79', '0b148d64', '0ca9ddb6', '0d3d703e', '0dfd9992', '0e206a2e', '10fcaaa3', '11852cab', '1190e5a7', '137eaa0f', '150deff5', '178fcbfb', '1a07d186', '1b2d62fb', '1b60fb0c', '1bfc4729', '1c786137', '1caeab9d', '1cf80156', '1e0a9b12', '1e32b0e9', '1f0c79e5', '1f642eb9', '1f85a75f', '1f876c06', '1fad071e', '2013d3e2', '2204b7a8', '22168020', '22233c11', '2281f1f4', '228f6490', '22eb0ac0', '234bbc79', '23581191', '239be575', '23b5c85d', '253bf280', '25d487eb', '25d8a9c8', '25ff71a9', '264363fd', '272f95fa', '27a28665', '28bf18c6', '28e73c20', '29623171', '29c11459', '29ec7d0e', '2bcee788', '2bee17df', '2c608aff', '2dc579da', '2dd70a9a', '2dee498d', '31aa019c', '321b1fc6', '32597951', '3345333e', '3428a4f5', '3618c87e', '3631a71a', '363442ee', 

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()