In [5]:
from glob import glob 
file_locs = "../datacrawl/crawled_data/*/*.json"
import json

def read_json_file(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        raise FileNotFoundError(f"The file {file_path} was not found")
    except json.JSONDecodeError as e:
        raise json.JSONDecodeError(f"Error parsing JSON file: {str(e)}", e.doc, e.pos)


In [101]:
glob(file_locs)[0]

'../datacrawl/crawled_data/61723086/data.json'

In [102]:
data_1.keys()

dict_keys(['configuration', 'description', 'id', 'info', 'name', 'rewards', 'schema_version', 'specification', 'statuses', 'steps', 'title', 'version'])

In [103]:
def get_json_schema(data):
    """
    Extract schema from JSON data
    Returns a dict representing the structure, with types as values
    """
    if isinstance(data, dict):
        return {
            key: get_json_schema(value)
            for key, value in data.items()
        }
    elif isinstance(data, list):
        # If list is empty, return empty list schema
        if not data:
            return ["array (empty)"]
        # If all items are same type, just return first item's schema
        if all(isinstance(x, type(data[0])) for x in data):
            return [get_json_schema(data[0])]
        # If mixed types, return schema for each unique type
        return [get_json_schema(x) for x in data]
    else:
        return type(data).__name__

def print_schema(schema, indent=0):
    """Pretty print the schema"""
    if isinstance(schema, dict):
        result = "{\n"
        for key, value in schema.items():
            result += " " * (indent + 2) + f'"{key}": '
            result += print_schema(value, indent + 2)
            result += ",\n"
        result += " " * indent + "}"
        return result
    elif isinstance(schema, list):
        if len(schema) == 1:
            return f"[{print_schema(schema[0], indent)}]"
        else:
            result = "[\n"
            for item in schema:
                result += " " * (indent + 2)
                result += print_schema(item, indent + 2)
                result += ",\n"
            result += " " * indent + "]"
            return result
    else:
        return f'"{schema}"'



schema = get_json_schema(data_1)
print(print_schema(schema))

{
  "configuration": {
    "actTimeout": "int",
    "env_cfg": {
      "map_height": "int",
      "map_width": "int",
      "match_count_per_episode": "int",
      "max_steps_in_match": "int",
      "max_units": "int",
      "num_teams": "int",
      "unit_move_cost": "int",
      "unit_sap_cost": "int",
      "unit_sap_range": "int",
      "unit_sensor_range": "int",
    },
    "episodeSteps": "int",
    "runTimeout": "int",
    "seed": "int",
  },
  "description": "str",
  "id": "str",
  "info": {
    "EpisodeId": "int",
    "LiveVideoPath": "NoneType",
    "TeamNames": ["str"],
  },
  "name": "str",
  "rewards": ["int"],
  "schema_version": "int",
  "specification": {
    "action": {
      "default": "int",
      "description": "str",
      "type": "str",
    },
    "agents": ["int"],
    "configuration": {
      "actTimeout": {
        "default": "int",
        "description": "str",
        "minimum": "int",
        "type": "str",
      },
      "env_cfg": {
        "description": 

In [107]:
data_1.keys()

dict_keys(['configuration', 'description', 'id', 'info', 'name', 'rewards', 'schema_version', 'specification', 'statuses', 'steps', 'title', 'version'])

In [123]:
data_1 = read_json_file('../datacrawl/crawled_data/58304511/data.json')
steps = data_1["steps"]

In [124]:
len(steps)

506

In [None]:
len(steps[1])

2

In [134]:
steps[1][0].keys()

dict_keys(['action', 'info', 'observation', 'reward', 'status'])

In [140]:
replay = steps[1][0]["info"]["replay"]

In [148]:
for i in replay["observations"][0].keys():
    print(i)

energy_nodes
map_features
match_steps
relic_node_configs
relic_nodes
steps
team_points
team_wins
units
units_mask
vision_power_map


In [149]:
observations = replay["observations"][0]

In [153]:
observations["energy_nodes"]

[[7, 10], [4, 14], [13, 16], [9, 19]]

In [155]:
observations["map_features"].keys()

dict_keys(['energy', 'tile_type'])

In [159]:
energy = observations["map_features"]["energy"]
tile_type = observations["map_features"]["tile_type"]

In [167]:
observations["units"].keys()

dict_keys(['energy', 'position'])

In [172]:
observations["vision_power_map"]

[[[3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 0, 0, 0, -3, 0, 0, 0, 0],
  [1,
   1,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   -3,
   -3,
   -3,
   -3,
   0,
   0,
   0,
   0,
   -3,
   0,
   0,
   0,
   0],
  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0],
  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0,
   0,
   -3,
   -3,
   -3,
   -3,
   -3,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  [0,
   0,
   -3,
   -3,
   -3,
   -3,
   -3,
   -3,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  [0,
   0,
   0,
   -3,
   -3,
   -3,
   -3,
   0,
   0,
   0,
   0,
   0,
   0,
  

In [170]:
observations["units_mask"]

[[True,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False],
 [True,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False]]

In [180]:
sample_input

{'step': 2,
 'obs': {'units': {'position': [[[-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1]],
    [[22, 23],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1],
     [-1, -1]]],
   'energy': [[-1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0],
    [99.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0,
     -1.0]]},
  'units_mask': [[False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False

In [184]:
for i in sample_input["obs"].keys():
    print(i)#["sensor_mask"]

units
units_mask
sensor_mask
map_features
relic_nodes
relic_nodes_mask
team_points
team_wins
steps
match_steps


In [50]:
steps[0]["info"].keys()

dict_keys(['replay'])

In [51]:
steps[1].keys()

dict_keys(['action', 'info', 'observation', 'reward', 'status'])

In [None]:
len(steps[0])#["info"]["replay"].keys()

2

In [53]:
import numpy as np
energy = steps[0]["info"]["replay"]["observations"][0]["units"]["energy"] 
energy = np.array(energy)
energy.shape

(2, 16, 1)

In [46]:
len(energy)

2

In [10]:
data_1["rewards"]

[0, 5]

In [None]:
import torch 
def process_game_json(self, game_data):
    """
    Process a complete game JSON into observation tensors and metrics
    Returns:
        - team_0_obs: Tensor of observations for team 0
        - team_1_obs: Tensor of observations for team 1
        - metrics: Tensor of performance metrics
    """
    match_observations = []
    for step_data in game_data['steps']:
        if len(step_data) < 2:  # Sometimes steps can be empty
            continue
            
        obs_0 = self._process_observation(step_data[0]['observation'], step_data[0]['info'])
        obs_1 = self._process_observation(step_data[1]['observation'], step_data[1]['info'])
        match_observations.append((obs_0, obs_1))
    
    # Stack all observations into a single tensor per team
    team_0_obs = torch.stack([obs[0] for obs in match_observations])
    team_1_obs = torch.stack([obs[1] for obs in match_observations])
    
    # Calculate metrics
    metrics = self._calculate_metrics(game_data)
    return 

In [76]:
game_data = data_1#["steps"][0]

In [81]:
# game_data#[0]["replay"]

In [85]:
def _process_observation(info_data,obs_data):
    """Convert a single observation into tensor format"""
    replay_data = info_data['replay']['observations'][0]
    
    # Process map features
    map_features = {
        'energy': torch.tensor(replay_data['map_features']['energy'], dtype=torch.float32),
        'tile_type': torch.tensor(replay_data['map_features']['tile_type'], dtype=torch.float32)
    }
    
    # Process vision power map
    vision_power = torch.tensor(replay_data['vision_power_map'], dtype=torch.float32)
    
    # Process units mask
    units_mask = torch.tensor(replay_data['units_mask'], dtype=torch.float32)
    
    # Process units
    units = {
        'position': torch.tensor(replay_data['units']['position'], dtype=torch.float32),
        'energy': torch.tensor(replay_data['units']['energy'], dtype=torch.float32)
    }
    
    # Process nodes
    energy_nodes = torch.tensor(replay_data['energy_nodes'], dtype=torch.float32)
    relic_nodes = torch.tensor(replay_data['relic_nodes'], dtype=torch.float32)
    relic_configs = torch.tensor(replay_data['relic_node_configs'], dtype=torch.float32)
    
    return {
        'map_features': map_features,
        'vision_power_map': vision_power,
        'units_mask': units_mask,
        'units': units,
        'energy_nodes': energy_nodes,
        'relic_nodes': relic_nodes,
        'relic_node_configs': relic_configs
    }

In [99]:
json.loads(step_data[1]["observation"]["obs"])

{'units': {'position': [[[-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1]],
   [[-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1],
    [-1, -1]]],
  'energy': [[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
   [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]]},
 'units_mask': [[False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False],
  [False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False,
   False]],
 'sensor_mask': [[False,
   False,
   Fal

In [86]:
match_observations = []
for step_data in game_data['steps']:
    if len(step_data) < 2:  # Sometimes steps can be empty
        continue
        
    obs_0 = _process_observation(step_data[0]['observation'], step_data[0]['info'])
    obs_1 = _process_observation(step_data[1]['observation'], step_data[1]['info'])
    match_observations.append((obs_0, obs_1))

# Stack all observations into a single tensor per team
team_0_obs = torch.stack([obs[0] for obs in match_observations])
team_1_obs = torch.stack([obs[1] for obs in match_observations])

# Calculate metrics
metrics = self._ca

KeyError: 'replay'