<a href="https://colab.research.google.com/github/tridung682000/thinkable/blob/main/thinkable.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Utils
import random
import matplotlib.pyplot as plt
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from torchsummary import summary

def start_train(train_func, *args, **kwargs):
    for epoch in range(50):
        running_loss = 0.0
        print("epoch", epoch+1)
        i = 0
        train_func(*args, **kwargs)


In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

arc_prize_2025_path = kagglehub.competition_download('arc-prize-2025')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import json
from pathlib import Path

DATA_DIR = '/kaggle/input/arc-prize-2025/'
data = {}
with open(Path(DATA_DIR) / 'arc-agi_training_challenges.json') as f:
    train_challenges = json.load(f)
with open(Path(DATA_DIR) / 'arc-agi_training_solutions.json') as f:
    train_solutions = json.load(f)

with open(Path(DATA_DIR) / 'arc-agi_evaluation_challenges.json') as f:
    eval_challenges = json.load(f)
with open(Path(DATA_DIR) / 'arc-agi_evaluation_solutions.json') as f:
    eval_solutions = json.load(f)

with open(Path(DATA_DIR) / 'arc-agi_test_challenges.json') as f:
    test_challenges = json.load(f)

print(f"Training tasks: {len(train_challenges)}")
print(f"Training solutions: {len(train_solutions)}")
print(f"Evaluation tasks: {len(eval_challenges)}")
print(f"Evaluation solutions: {len(eval_solutions)}")
print(f"Test tasks: {len(test_challenges)}")
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Training tasks: 1000
Training solutions: 1000
Evaluation tasks: 120
Evaluation solutions: 120
Test tasks: 240


In [None]:
# load data
import pandas as pd
for i in range(0, 1):
    key = list(train_challenges.keys())[i]
    print(train_challenges[key])
    print("----train----")
    print(train_solutions[key])
    print("----train----")

{'train': [{'input': [[7, 9], [4, 3]], 'output': [[7, 9, 7, 9, 7, 9], [4, 3, 4, 3, 4, 3], [9, 7, 9, 7, 9, 7], [3, 4, 3, 4, 3, 4], [7, 9, 7, 9, 7, 9], [4, 3, 4, 3, 4, 3]]}, {'input': [[8, 6], [6, 4]], 'output': [[8, 6, 8, 6, 8, 6], [6, 4, 6, 4, 6, 4], [6, 8, 6, 8, 6, 8], [4, 6, 4, 6, 4, 6], [8, 6, 8, 6, 8, 6], [6, 4, 6, 4, 6, 4]]}], 'test': [{'input': [[3, 2], [7, 8]]}]}
----train----
[[[3, 2, 3, 2, 3, 2], [7, 8, 7, 8, 7, 8], [2, 3, 2, 3, 2, 3], [8, 7, 8, 7, 8, 7], [3, 2, 3, 2, 3, 2], [7, 8, 7, 8, 7, 8]]]
----train----


# Import libs and utils

In [None]:
#
class ARCData(Dataset):
    def __init__(self, dataframe):
        self.task_name = dataframe["task_name"]
        self.input_values = dataframe["task_input"]
        self.task_output = databframe["task_output"]
        self.labels = dataframe["labels"]

    def __len__(self):
        return len(self.text)

    def __getitem__(self, idx):
        # train_challenges
        df = (
            pd.DataFrame(x)
            .transpose()
            .reset_index()
            .rename(columns={'index': 'task_id'})
            .explode('train')
            .reset_index(drop=True)
        )
        df_train = df.join(pd.json_normalize(df['train']))
        df_test = df.join(pd.json_normalize(df['test']))
        df_train = df_train.drop(columns="train")
        df_test = df_test.drop(columns="test")
        labels = self.labels.iloc[idx]

In [None]:
# custom ConvBlock
class ParallelConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ParallelConvBlock, self).__init__()

        self.conv1x3_1 = nn.Conv2d(in_channels, out_channels, kernel_size=(3, 1), padding=1)
        self.conv1x3_2 = nn.Conv2d(out_channels, out_channels, kernel_size=(3, 1), padding=1)
        self.conv3x1_1 = nn.Conv2d(in_channels, out_channels, kernel_size=(1, 3), padding=1)
        self.conv3x1_2 = nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=1)

        self.pool(x).view(-1)

    def forward(self, x):
        horizontal = self.conv1x3_1(x)
        horizontal = self.conv1x3_2(horizontal)

        vertical = self.conv3x1_1(x)
        vertical = self.conv3x1_2(vertical)

        out = torch.cat([horizontal, vertical], dim=1)
        out = self.bn(out)
        return out

# Discriminator

## data

In [None]:
## prepare train data
inputs = []
labels = []
discriminator_train_data = []
discriminator_test_data = []
# get the inputs; data is a list of [inputs, labels]
for task_name, task_data in train_challenges.items():
    for temp in task_data["train"]:
        for k, v in temp.items():
            # i_input = torch.nn.functional.normalize(torch.tensor([v]).float())
            i_input = torch.tensor([v]).float()/10
            i_label = torch.tensor([int(k == "output")])
            discriminator_train_data.append((i_input, i_label, task_name))
            # discriminator_train_data.append(i_label)
    for temp in task_data["test"]:
        for k, v in temp.items():
            i_input = torch.tensor([v])
            i_label = torch.tensor([int(k == "output")])
            discriminator_test_data.append(())
print(len(discriminator_train_data))
discriminator_train_data[0]



6464


(tensor([[[0.7000, 0.9000],
          [0.4000, 0.3000]]]),
 tensor([0]),
 '00576224')

## model

In [None]:
# create a pretrain model for discriminator
class Discriminator(nn.Module):
    """
        Discriminate if an image is a task input or a task output
        Must be fast train for each task
        NO LABEL
    """
    def __init__(self, img_channels):
        super(Discriminator, self).__init__()
        # self.block1 = ParallelConvBlock(img_channels, 32)
        # self.block2 = ParallelConvBlock(32, 64)

        self.cnn_block_s = nn.Sequential(
            nn.Conv2d(img_channels, 128, kernel_size=(3, 3), padding=1),
            nn.Conv2d(128, 256, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(256),
            nn.AdaptiveAvgPool2d((1))
        )
        self.cnn_block_w = nn.Sequential(
            nn.Conv2d(img_channels, 128, kernel_size=(1, 3), padding=1),
            nn.Conv2d(128, 256, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(256),
            nn.AdaptiveAvgPool2d((1))

        )
        self.cnn_block_h = nn.Sequential(
            nn.Conv2d(img_channels, 128, kernel_size=(3, 1), padding=1),
            nn.Conv2d(128, 256, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(256),
            nn.AdaptiveAvgPool2d((1))
        )

        self.mh_attn = nn.MultiheadAttention(256, 64, dropout=0, add_zero_attn=True)
        self.mh_attn_norm = nn.LayerNorm(256)


        self.ffn_block = nn.Sequential(
            nn.Linear(256, 512, bias=False),
            nn.ReLU(),
            nn.Linear(512, 256, bias=False),
            nn.LayerNorm(256),
        )

        self.fc = nn.Linear(256, 2)
        self.softmax = nn.Softmax()


    def forward(self, x):
        # x = self.block1(x)
        # # x = self.batch_norm(x)
        # x = self.block2(x)
        # x = self.pool(x).view(1, 128)
        # x = self.fc(x)
        # x = self.fc2(x)
        # x = self.fc3(x)
        x = x.unsqueeze(0)
        x_s = self.cnn_block_s(x).squeeze(2, 3)
        x_h = self.cnn_block_h(x).squeeze(2, 3)
        x_v = self.cnn_block_w(x).squeeze(2, 3)
        x, _ = self.mh_attn(x_s, x_h, x_v)
        x = self.mh_attn_norm(x)
        x = self.ffn_block(x)
        x = self.fc(x)
        return self.softmax(x)

discriminator = Discriminator(1)
try:
    # discriminator.load_state_dict(torch.load(f'/kaggle/working/{discriminator.__class__.__name__}.pth', weights_only=True))
    discriminator.load_state_dict(torch.load(f'/kaggle/working/discriminator.pth', weights_only=True))
except FileNotFoundError as e:
    display(e)
except Exception as e:
    display(e)

discriminator.eval()


RuntimeError('Error(s) in loading state_dict for Discriminator:\n\tMissing key(s) in state_dict: "mh_attn.in_proj_weight", "mh_attn.in_proj_bias", "mh_attn.out_proj.weight", "mh_attn.out_proj.bias", "mh_attn_norm.weight", "mh_attn_norm.bias". \n\tUnexpected key(s) in state_dict: "mh_attn_block.0.in_proj_weight", "mh_attn_block.0.in_proj_bias", "mh_attn_block.0.out_proj.weight", "mh_attn_block.0.out_proj.bias", "mh_attn_block.1.weight", "mh_attn_block.1.bias", "ffn_block.0.bias", "ffn_block.2.bias". ')

Discriminator(
  (cnn_block_1): Sequential(
    (0): Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): AdaptiveAvgPool2d(output_size=1)
  )
  (cnn_block_w): Sequential(
    (0): Conv2d(1, 128, kernel_size=(1, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): AdaptiveAvgPool2d(output_size=1)
  )
  (cnn_block_h): Sequential(
    (0): Conv2d(1, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): AdaptiveAvgPool2d(output_size=1)
  )
  (mh_attn): MultiheadAttention(
 

## train

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(discriminator.parameters(), lr=0.0001)
def train_discriminator(epoch=1, batch_size = 500):
    for epoch in range(epoch):
        running_loss = 0.0
        print("epoch", epoch+1)
        i = 0
        for i_input, i_label, task_name in discriminator_train_data:
            for sub_epoch in range(50):
                # zero the parameter gradients
                optimizer.zero_grad()
                # get output
                outputs = discriminator(i_input)
                # calculate loss
                loss = criterion(outputs, i_label)
                loss.backward()

                optimizer.step()

                # print statistics
                running_loss += loss.item()
                i += 1

                if (i % batch_size) == (batch_size - 1):    # print every 2000 mini-batches
                    print(outputs, i_label)
                    print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / batch_size:.3f}')
                    running_loss = 0.0
        torch.save(discriminator.state_dict(), f'/kaggle/working/{discriminator.__class__.__name__}.pth')

train_discriminator()




epoch 1
tensor([[0.3843, 0.6157]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,   500] loss: 0.668
tensor([[0.4226, 0.5774]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  1000] loss: 0.700
tensor([[0.4544, 0.5456]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  1500] loss: 0.721
tensor([[0.4653, 0.5347]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  2000] loss: 0.714
tensor([[0.4707, 0.5293]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  2500] loss: 0.711
tensor([[0.4736, 0.5264]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  3000] loss: 0.709
tensor([[0.4726, 0.5274]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  3500] loss: 0.707
tensor([[0.7141, 0.2859]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  4000] loss: 0.638
tensor([[0.4907, 0.5093]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  4500] loss: 0.715
tensor([[0.3166, 0.6834]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  5000] loss: 0.687
tensor([[0.4592, 0.5408]], grad_fn=<SoftmaxBackward0>) tensor([1])
[1,  5500] loss: 0.766
te

KeyboardInterrupt: 

In [None]:
# real training
train_discriminator(50)

# Operator

## Data for OperatorEncoder

In [None]:
# testing
random_size = tuple(torch.randint(2, 10, (2,)))
print(torch.randint(
    1, 10,
    random_size
    )
)
print("-----")
print(torch.randint(
    1, 10,
    random_size
    )[:, :random_size[1] - 1]
)


tensor([[6, 7, 8, 7, 7, 1, 9],
        [1, 8, 8, 7, 5, 5, 3],
        [3, 8, 3, 2, 2, 1, 4],
        [6, 2, 1, 1, 7, 1, 8],
        [3, 7, 9, 7, 4, 7, 8],
        [5, 6, 4, 1, 8, 8, 7]])
-----
tensor([[6, 1, 1, 7, 5, 1],
        [1, 8, 8, 4, 9, 8],
        [1, 7, 7, 7, 5, 8],
        [2, 1, 2, 1, 9, 5],
        [8, 3, 7, 9, 2, 9],
        [9, 3, 9, 3, 1, 6]])
Original tensor:
 tensor([[1, 2],
        [3, 4]])
Tensor after adding 1:
 tensor([[2, 3],
        [4, 5]])


In [3]:
# create data for training OperatorEncoder
class OperatorEncoderDataset(Dataset):
    def __init__(self, sample_size=100):
        # operator learning
        self.op_label = []
        self.op_input = []
        self.op_output = []
        # w_add
        for i in range(0, sample_size):
            random_size = tuple(torch.randint(1, 10, (2,)))
            seed = torch.randint(0, 10, random_size)

            self.op_input.append(seed[:, :random_size[1] - (i % 9) + 1])
            if (i / 10) % 2 == 0:
                self.op_output.append(seed)
            self.op_label.append("w_add")
        # w_substract
        for i in range(0, sample_size):
            """"""
            random_size = tuple(torch.randint(1, 10, (2,)))
            seed = torch.randint(0, 10, random_size)

            self.op_input.append(seed)
            if (i / 10) % 2 == 0:
                self.op_output.append(seed[:, :random_size[1] - (i % 9) + 1])
            self.op_label.append("w_subtract")
        # h_add
        for i, j in zip(range(0, sample_size), range(0, 100)):
            random_size = tuple(torch.randint(1, 10, (2,)))
            seed = torch.randint(0, 10, random_size)

            self.op_input.append(seed)
            if (i / 10) % 2 == 0:
                self.op_output.append(seed[:random_size[1] - (i % 9) + 1, :])
            self.op_label.append("h_add")
        # h_substract
        for i, j in zip(range(0, sample_size), range(0, 100)):
            random_size = tuple(torch.randint(1, 10, (2,)))
            seed = torch.randint(0, 10, random_size)

            self.op_input.append(seed[:random_size[1] - (i % 9) + 1, :] )
            if (i / 10) % 2 == 0:
                self.op_output.append(seed)
            self.op_label.append("h_subtract")
        # # transform
        # for i, j in zip(range(0, sample_size), range(0, 100)):
        #     torch.randint(0, 10, (3,))
        #     self.op_input.append()
        #     self.op_output.append()
        #     self.op_label.append("transform")

    def __len__(self):
        return len(self.text)

    def __getitem__(self, idx):
        """
            Get item
        """
        op_input = self.op_input[idx]
        op_output = self.op_output[idx]
        op_label = self.op_label[idx]
        return op_input, op_output, op_label

op_dataset = OperatorEncoderDataset()

TypeError: list.append() takes exactly one argument (0 given)

In [None]:
i1_dim = 3
i2_dim = 3
m = nn.Bilinear(i1_dim, i2_dim, 2)
m.weight.data.fill_(0)
input1 = torch.randn(2, i1_dim).fill_(0)
print(input1)
input2 = torch.randn(2, i2_dim).fill_(1)
print(input2)
output = m(input1, input2)
print(output)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.1775, 0.1738],
        [0.1775, 0.1738]], grad_fn=<AddBackward0>)


In [None]:
# create a pretrain model for discriminator
class OperatorLearningBlock(nn.Module):
    def __init__(self, ):
        """
            The inputs: task_input, concate(task_input, task_output)
            condition = embedding(task_input)
            rule = embedding(task_input, task_output)
            Bilinear(condition, rule, task_output(h x w))
        """
        # create a block of operator
        # use bilinear_transform to change color
        self.bilinear_1 = nn.Sequetial([
            nn.Bilinear(2, 3, 15)
        ])
        self.op_layers = nn.ModuleList([nn.Linear(10, 10) for _ in range(num_layers)])
        self.scalar.weight.data.fill_(0.01)

    def forward(self, task_input, task_output):
        """
            Bilinear
        """
        self.bilinear_1(task_input)

            for layer in self.layers:
                x = layer(x)
            return x


class OperatorEncoder(nn.Module):
    """
        Encode operator
    """
    def __init__(self, discriminator:nn.Module, embed_dim=2707, num_heads=32, ff_dim=2511, dropout=0.1):
        super(Recoginzer, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_mask=None):
        src2 = self.self_attn(src, src, src, attn_mask=src_mask)[0]
        src = self.norm1(src + self.dropout(src2))
        src2 = self.ffn(src)
        src = self.norm2(src + self.dropout(src2))
        return src


operator_encoder = OperatorEncoder()
try:
    operator_encoder.load_state_dict(torch.load(f'/kaggle/working/{OperatorEncoder.__class__.__name__}.pth', weights_only=True))
except FileNotFoundError as e:
    display(e)

operator_encoder.eval()

TypeError: OperatorEncoder.__init__() missing 3 required positional arguments: 'embed_dim', 'num_heads', and 'ff_dim'

In [None]:
# prepare to count predictions for each class
labels = ["output", "input"]
correct_pred = {classname: 0 for classname in labels}
total_pred = {classname: 0 for classname in labels}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = discriminator(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

NameError: name 'testloader' is not defined

In [None]:

class Solver(nn.Module):
    def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=6):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers)
        self.fc_out = nn.Linear(d_model, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src).permute(1, 0, 2)  # (S, N, E)
        tgt = self.embedding(tgt).permute(1, 0, 2)
        output = self.transformer(src, tgt)
        return self.fc_out(output).permute(1, 0, 2)

class TransformerDecoderBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerDecoderBlock, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.cross_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.norm3 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None):
        tgt2 = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask)[0]
        tgt = self.norm1(tgt + self.dropout(tgt2))
        tgt2 = self.cross_attn(tgt, memory, memory, attn_mask=memory_mask)[0]
        tgt = self.norm2(tgt + self.dropout(tgt2))
        tgt2 = self.ffn(tgt)
        tgt = self.norm3(tgt + self.dropout(tgt2))
        return tgt


class Thinkable(nn.Module):
    """
        This is the main class
    """
    def __init__(self, epochs=10):
        super().__init__()
        self.fc1 = nn.Linear(256, 256)
        self.pixel_allocator = nn.Bilinear(256, 256, 10)
        self.softmax = nn.Softmax(256)

    def _selector(self, x):
        """
            activator
        """
        return (x>0.5).float()


    def separate_channel(self, x, color_channel):
        """
            Separated image into range 0-9 channels
        """
        return [float(data == target_number) for target_number in color_channel]

    def allocate_pixel(self, task_input, task_output, proposed_solution, color_channel=range(0, 10)):
        """
            set pixel color
        """
        # pick the color

        # choose the pixel position
        pixel_allocator(task_input, task_output)
        self.softmax
        return

    def reshape(self, task_input:torch.Tensor, task_output:torch.Tensor):
        """
            predict the output shape from task examples
        """
        # task input shape
        task_input.shape
        # task output shape
        task_output.shape
        # task input - task out shape translate
        width = nn.Linear(256, 256)()
        height = nn.Linear(256, 256)
        return

    def learn_to_solve(self, task_input, task_output):
        """

        """

    def learn_to_know(self, model, task_input, task_output):
        """
            generate the solver for task
        """
        # prepare train data
        # generalize

        tgt_input = task_input
        tgt_output = task_output

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

        print('Finished Training')

        for epoch in range(self.epochs):
            for src, tgt in dataloader:
                optimizer.zero_grad()
                output = model(src, tgt_input)
                loss = criterion(output.reshape(-1, vocab_size), tgt_output.reshape(-1))
                loss.backward()
                optimizer.step()

    def think(self, model, task_input, task_output):
        """
            train the model based on few task examples
        """
        return model


    def forward(self, task_input, task_output, target_tensor):
        """
            Operator layer
        """
        return solution



In [None]:
def train_model(model):
    """
        Training ground
    """
    # Prepare train data

    # Train Solver

    # Train Analyzer

    #
    for epoch in range(epochs):
        for src, tgt in dataloader:
            tgt_input = tgt[:, :-1]
            tgt_output = tgt[:, 1:]

            optimizer.zero_grad()
            output = model(src, tgt_input)
            loss = criterion(output.reshape(-1, vocab_size), tgt_output.reshape(-1))
            loss.backward()
            optimizer.step()


In [None]:
import torch
data = np.random.random_integers(5, size=(3,2))
print(data, data.shape)
for target_number in range(0, 9):
    print(torch.where(data == target_number*torch.ones(data.shape, -1)))

# GAN model

In [None]:
import torch
import torch.nn as nn


class Generator(nn.Module):
    def __init__(self, noise_dim, label_dim, img_channels):
        super(Generator, self).__init__()
        self.label_embed = nn.Embedding(label_dim, label_dim)
        self.fc = nn.Linear(noise_dim + label_dim, 128 * 7 * 7)

        self.block1 = ParallelConvBlock(128, 64)
        self.block2 = ParallelConvBlock(64, 32)
        self.block3 = nn.Conv2d()
        self.upsample = nn.Upsample(scale_factor=2)
        self.final = nn.Conv2d(64, img_channels, kernel_size=3, padding=1)
        self.tanh = nn.Tanh()

    def forward(self, noise, labels):
        label_embed = self.label_embed(labels)
        x = torch.cat([noise, label_embed], dim=1)
        x = self.fc(x).view(-1, 128, 7, 7)
        x = self.upsample(self.block1(x))
        x = self.upsample(self.block2(x))
        return self.tanh(self.final(x))




# Transformer model ( next operation)

In [None]:
import torch
import torch.nn as nn


class TransformerEncoderBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerEncoderBlock, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_mask=None):
        src2 = self.self_attn(src, src, src, attn_mask=src_mask)[0]
        src = self.norm1(src + self.dropout(src2))
        src2 = self.ffn(src)
        src = self.norm2(src + self.dropout(src2))
        return src

class TransformerDecoderBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerDecoderBlock, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.cross_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.norm3 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None):
        tgt2 = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask)[0]
        tgt = self.norm1(tgt + self.dropout(tgt2))
        tgt2 = self.cross_attn(tgt, memory, memory, attn_mask=memory_mask)[0]
        tgt = self.norm2(tgt + self.dropout(tgt2))
        tgt2 = self.ffn(tgt)
        tgt = self.norm3(tgt + self.dropout(tgt2))
        return tgt



## loss function

# Train model

In [None]:
import torch

# Train your model
model = Thinkable()
torch.save(model.state_dict(), '/kaggle/working/my_model.pth')
