In [None]:
import json
import numpy as np

with open("data/move_right/arc-synth_move_right_training_challenges.json", "r") as f:
  tasks = json.load(f)
  max_height = 0
  max_width = 0
  max_count = 0
  for task in tasks.values():
    count = len(task["train"])
    if count > max_count:
      max_count = count
    for pair in task["train"]:
      input = np.array(pair["input"])
      height, width = input.shape
      if height > max_height:
        max_height = height
      if width > max_width:
        max_width = width
  
  print(max_count, max_height, max_width)
      


In [1]:
from arc_prize.env import modal_app
from arc_prize.model import ARCTransformer
from arc_prize.train import train_arc_transformer
from arc_prize.vis import  visualize_tensors
import torch
from torch.utils.data import DataLoader
from arc_prize.data import ARCDataset, ARCDatasetConfig, collate_arc_fn

# Hyperparameters
d_model = 32
num_layers = 3
dim_feedforward = 128
max_grid_size = 10 # 30
num_heads = 4
max_context_pairs = 4 # 10
batch_size = 20
num_epochs = 5
num_colors = 10
learning_rate = 1e-4
dropout = 0.1

synth_arc_dataset_config = ARCDatasetConfig(max_grid_size=max_grid_size, max_train_grids=max_context_pairs, color_offset=1)

train_dataset = ARCDataset("data/move_right/arc-synth_move_right_training_challenges.json", "data/move_right/arc-synth_move_right_training_solutions.json", config=synth_arc_dataset_config)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_arc_fn, num_workers=0)

val_dataset = ARCDataset("data/move_right/arc-synth_move_right_evaluation_challenges.json", "data/move_right/arc-synth_move_right_evaluation_solutions.json", config=synth_arc_dataset_config)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_arc_fn, num_workers=0)

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ARCTransformer(d_model=d_model, num_heads=num_heads, num_layers=num_layers, d_ff=dim_feedforward, grid_dim=max_grid_size, num_colors=num_colors, num_train_pairs=max_context_pairs, dropout=dropout).to(device)

model_file_name = "synth_transformer_2.pth"
# if model_file_name is not None:
#     state_dict = torch.load(model_file_name)
#     model.load_state_dict(state_dict)

# Train the model

with modal_app.run():
    trained_model, history = modal_app.run(train_arc_transformer.remote(model, train_loader, val_loader, num_epochs, learning_rate), silent=True)
    

    # train_arc_transformer(model=model, train_loader=train_loader, val_loader=val_loader, num_epochs=num_epochs, learning_rate=learning_rate)

    # Save the trained model
    torch.save(trained_model.state_dict(), model_file_name)



model.eval()
eval_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=collate_arc_fn, num_workers=0)
for i, batch in enumerate(eval_loader):
    grids, grid_masks, output_grid = [item.to(device) for item in batch]
    
    predictions = model(grids, grid_masks)

    clamped_predictions = torch.clamp(torch.argmax(predictions, dim=-1), min=0, max=num_colors)

    visualize_tensors(grids.squeeze(0), output_grid.squeeze(0), clamped_predictions.squeeze(0))

    
    

print("Training completed and model saved.")

<modal.app.App object at 0x1092b7c20>


/opt/homebrew/Caskroom/miniconda/base/lib/python3.12/contextlib.py:210: DeprecationError: 2024-07-18: 
Note that output will soon not be be printed with `app.run`.

If you want to print output, use `modal.enable_output()`:

```python
with modal.enable_output():
    with app.run():
        ...
```

use `app.run(..., show_progress=False)`.

  return await anext(self.gen)


Output()

Output()

Output()

DeserializationError: Encountered an error when deserializing an object in the local environment (see above for details).

In [None]:

# arc_dataset_config = ARCDatasetConfig(max_grid_size=max_grid_size, max_train_grids=max_context_pairs, color_offset=1)

# train_dataset = ARCDataset("data/arc-agi_training_challenges.json", "data/arc-agi_training_solutions.json", config=arc_dataset_config)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_arc_fn, num_workers=0)

# val_dataset = ARCDataset("data/arc-agi_evaluation_challenges.json", "data/arc-agi_evaluation_solutions.json", config=arc_dataset_config)
# val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_arc_fn, num_workers=0)

# # Initialize model
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = ARCTransformer(d_model=d_model, num_heads=num_heads, num_layers=num_layers, d_ff=dim_feedforward, grid_dim=max_grid_size, num_colors=num_colors, num_train_pairs=max_context_pairs).to(device)

# # Train the model
# train_arc_transformer(model=model, train_loader=train_loader, val_loader=val_loader, num_epochs=num_epochs, learning_rate=learning_rate, device=device)

# model_file_name = "arc_transformer_model.pth"
# # Save the trained model
# torch.save(model.state_dict(), model_file_name)

# print("Training completed and model saved.")