In [1]:
import torch
from torch import nn
import torch.utils.data as data_utils
import numpy as np
from tqdm import tqdm
import pytorch_lightning as pl

from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint, LearningRateFinder
from os import cpu_count
import wandb

torch.set_float32_matmul_precision("medium")


In [3]:
from competition import game
from c4 import AgentRandom, AgentMC

In [2]:
with open('./data/games3.txt', 'r') as f:
    data=f.readlines()

# task 1

In [24]:
def encode_state(seq):
    M=np.zeros((3,6,7))
    col_height=np.zeros(7, dtype=np.uint8)
    for i, s in enumerate(seq):
        s=int(s)
        M[i%2, col_height[s], s] = 1
        M[2, col_height[s], s] = 1 - 2*(i%2)
        col_height[s]+=1
    return M

In [12]:
def no_twos(seq):
    M=np.zeros((3,6,7))
    col_height=np.zeros(7, dtype=np.uint8)
    for i, s in enumerate(seq):
        s=int(s)
        M[i%2, col_height[s], s] = 1
        M[2, col_height[s], s] = 1 - 2*(i%2)
        col_height[s]+=1

    return M

In [17]:


class C4_Dataset(data_utils.Dataset):
    def __init__(self, input_seqs):
        super().__init__()
        labels=[]
        self.label_dict={'A':0,'B':1,'D':2}
        self.inputs=[]
        for seq in tqdm(input_seqs):
            seq=seq.strip()[1:]
            label=seq[-1]
            for i in range(1, len(seq)):
                labels.append(self.label_dict[label])
                self.inputs.append(seq[:i])
        self.labels=np.array(labels)

    def __len__(self):
        return self.labels.shape[0]
    
    def longest_seq(self):
        return max(map(lambda x: len(x), self.inputs))

    def __getitem__(self, indexes):
        if isinstance(indexes, int):
            indexes = [indexes]
        states=[]
        lens=[]
        for i in indexes:
            states.append(encode_state(self.inputs[i]))
            lens.append(len(self.inputs[i]))
        return torch.tensor(np.array(states), dtype=torch.float32), torch.tensor(lens), torch.tensor(self.labels[indexes])

In [61]:
def adjacent_ones(m, axis=0):
    m=m.clone()
    if axis==0:
        for i in range(m.shape[0]):
            seq=[]
            connected=0
            j=0
            while j<m.shape[1]:
                if m[i,j]==0:
                    m[i,seq]=connected
                    seq=[]
                    connected=0
                else:
                    connected+=1
                    seq.append(j)
                j+=1
            if connected:
                m[i, seq]=connected
  
    elif axis==1:
        for i in range(m.shape[1]):
            seq=[]
            connected=0
            j=0
            while j<m.shape[0]:
                if m[j, i]==0:
                    m[seq, i]=connected
                    seq=[]
                    connected=0
                else:
                    connected+=1
                    seq.append(j)
                j+=1
            if connected:
                m[seq, i]=connected
    
    elif axis==-1:
        #direction=(1,1)
        for i, j in [(4,0),(3,0),(2,0),(1,0),(0,0),(0,1),(0,2),(0,3),(0,4),(0,5)]:
            seq=[]
            connected=0
            while i<6 and j<7:
                if m[i,j]==0:
                    for u,v in seq:
                        m[u,v]=connected
                    seq=[]
                    connected=0
                else:
                    connected+=1
                    seq.append((i,j))
                i+=1
                j+=1
            if connected:
                for u,v in seq:
                    m[u,v]=connected
    
    elif axis==-2:
        #direction=(1,-1)
        for i, j in [(4,6),(3,6),(2,6),(1,6),(0,1),(0,2),(0,3),(0,4),(0,5), (0,6)]:
            seq=[]
            connected=0
            while i<6 and j>-1:
                if m[i,j]==0:
                    for u,v in seq:
                        m[u,v]=connected
                    seq=[]
                    connected=0
                else:
                    connected+=1
                    seq.append((i,j))
                i+=1
                j-=1
            if connected:
                for u,v in seq:
                    m[u,v]=connected
    return m

def encode_state_torch(seq, device='cpu'):
    M=torch.zeros((3,6,7), device=device)
    col_height=torch.zeros(7, dtype=torch.int8)
    i=0
    #while seq[i]!=-1:
     #   s=seq[i]
    for s in seq:
      ##  print(s, col_height)
        M[i%2, col_height[s], s] = 1
        M[2, col_height[s], s] = 1 - 2*(i%2)
        col_height[s]=min(col_height[s]+1, 5)
        i+=1

    Ms=[]
    for axis in [0,1,-1,-2]:
        Ms.append(adjacent_ones(M[0], axis))
        Ms.append(adjacent_ones(M[1], axis))
    Ms=torch.stack(Ms)
    M=torch.cat([M, Ms])
    return M



In [7]:
label_dict={'A':0,'B':1,'D':2}

def encode_multiple_inputs(seqs, lock, labels, states, lens):
    seq_labels=[]
    seq_states=[]
    seq_lens=[]
    for seq in seqs:
        seq=seq.strip()[1:]
        label=seq[-1]



        for i in range(1, len(seq)):
            input_list=[int(x) for x in seq[:i]]
            state=encode_state_torch(input_list)
        
            seq_labels.append(label_dict[label])
            seq_states.append(state)#torch.tensor(input_list, dtype=torch.int8)))
            seq_lens.append(i)

    with lock:
        labels.extend(seq_labels)
        states.extend(seq_states)
        lens.extend(seq_lens)
        

In [49]:
from math import ceil
import threading

In [16]:
class C4_CUDA_Dataset(data_utils.Dataset):
    def __init__(self, input_seqs):
        super().__init__()
        labels=[]
        self.label_dict={'A':0,'B':1,'D':2}
        inputs=[]

        max_len=max(map(lambda x: len(x)-2, input_seqs))

        for seq in tqdm(input_seqs):
            seq=seq.strip()[1:]
            label=seq[-1]
            for i in range(1, len(seq)):
                labels.append(self.label_dict[label])
                input_list=[int(x) for x in seq[:i]]
                input_list.extend([-1]*(max_len-i))
                inputs.append(torch.tensor(input_list, dtype=torch.int8))

        self.labels=torch.tensor(labels)
        self.inputs=torch.vstack(inputs)
        self.device='cpu'

    def __len__(self):
        return self.labels.shape[0]
    
    def to(self, device='cpu'):
        self.labels=self.labels.to(device)
        self.inputs=self.inputs.to(device)
        self.device=device

    def longest_seq(self):
        return max(map(lambda x: len(x), self.inputs))

    def __getitem__(self, indexes):
        if isinstance(indexes, int):
            indexes = [indexes]
        states=[]
        lens=[]
        for i in indexes:
            try:
                states.append(encode_state_torch(self.inputs[i], device=self.device))
            except IndexError as err:
                print(i)
                print(indexes)
                raise IndexError(err)
            lens.append(torch.sum(self.inputs[i]!=-1))
        return torch.stack(states), torch.tensor(lens, device=self.device), self.labels[indexes]

In [28]:
class C4_StateModel(pl.LightningModule):
    def __init__(self, lr=1e-3, l2=1e-5, lr_dc_step=2, lr_dc=0.1):
        super().__init__()
        self.save_hyperparameters()

        c=self.model=nn.Sequential(
            nn.Conv2d(11,32, kernel_size=3, padding=2, padding_mode='zeros'),
           # nn.MaxPool2d(2),
            nn.LeakyReLU(),
            nn.Conv2d(32,64, kernel_size=3, padding=2, padding_mode='zeros'),
            nn.LeakyReLU(),
            nn.Flatten(),
            nn.Linear(10*11*64, 256),
            nn.Tanh(),
            nn.Linear(256, 16),
            nn.Tanh(),
            nn.Linear(16,3)
        )
        c='''self.model=nn.Sequential(
            nn.Conv2d(11, 64, kernel_size=3, padding=2, padding_mode='zeros'),
            nn.MaxPool2d(2),
            nn.LeakyReLU(),
            nn.Conv2d(8, 32, kernel_size=2, padding=1),
            nn.MaxPool2d(2),
            nn.LeakyReLU(),
            nn.Flatten(),
            nn.Linear(2*2*32, 16),
            nn.Tanh(),
            nn.Linear(16,3)
        )'''
        self.loss=nn.CrossEntropyLoss(reduction='none')
        self.lr=lr


    def forward(self, x):
        return self.model(x.squeeze())
    
    def training_step(self, batch, batch_idx):
        x, lens,y=batch
        logits=self(x)
        y=y.squeeze()
        lens=lens.squeeze()
        loss=self.loss(logits, y)
     #   print(loss, loss.shape)

        lens=lens/max(lens)
        loss=torch.mean(lens*loss)
      #  print(lens.shape)
       # print(lens)

        self.log('train_loss', loss, prog_bar=True)
     #   print(loss.shape)
        return loss
    
    def evaluate(self, batch, mode=None):
        x, lens,y=batch
        y=y.squeeze()
        logits=self(x)
       # print(logits)
        #print(y)
        loss=self.loss(logits, y)
        loss=torch.mean(loss)

        preds=torch.argmax(logits, axis=1)
        acc=torch.sum(preds==y)/y.shape[0]

        if mode:
            self.log(mode+'_loss', loss,  prog_bar=True)
            self.log(mode+'_acc', acc,  prog_bar=True)

    def validation_step(self, batch, *args, **kwargs):
        return self.evaluate(batch, "val")
    def test_step(self, batch, *args, **kwargs):
        return self.evaluate(batch, "test")

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), lr=self.lr, weight_decay=self.hparams.l2
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            patience=self.hparams.lr_dc_step,
            factor=self.hparams.lr_dc,
            cooldown=1,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val_loss",
                "strict": False,
                "interval": "epoch",
                "frequency": 1,
                "name": "scheduler_lr",
            },
        }

In [30]:
no_seqs=len(data)
train_s=int(0.8*no_seqs)

## train model

In [32]:
train_dataset=C4_CUDA_Dataset(data[:train_s])
val_dataset=C4_CUDA_Dataset(data[train_s:])

#train_dataset.to('cuda')
#val_dataset.to('cuda'              )

100%|██████████| 16000/16000 [00:03<00:00, 4229.46it/s]
100%|██████████| 4000/4000 [00:00<00:00, 4323.02it/s]


In [33]:

train_dataloader=data_utils.DataLoader(train_dataset, batch_size=128, num_workers=cpu_count(),
                                       shuffle=True, drop_last=True)
val_dataloader=data_utils.DataLoader(val_dataset, batch_size=128, num_workers=cpu_count(),)

In [34]:
c4_model=C4_StateModel(lr=1e-2)

In [35]:
wandb_logger = pl.loggers.WandbLogger(
        project="NN24", entity="kpuchalskixiv", log_model=True
    )

In [36]:
trainer=pl.Trainer(
        max_epochs=20,
        callbacks=[
            EarlyStopping(
                monitor="val_loss", patience=4, mode="min", check_finite=True, check_on_train_epoch_end=False
            ),
            LearningRateMonitor(),
            ModelCheckpoint(monitor="val_loss", mode="min"),
         #   LearningRateFinder(min_lr=1e-4, num_training_steps=1000)
        ],
        num_sanity_val_steps=100,
        logger=wandb_logger,
       # limit_train_batches=2137
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [37]:
trainer.fit(
        model=c4_model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader
    )

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkpuchalskixiv[0m. Use [1m`wandb login --relogin`[0m to force relogin


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | model | Sequential       | 1.8 M 
1 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
1.8 M     Trainable params
0         Non-trainable params
1.8 M     Total params
7.313     Total estimated model params size (MB)


Epoch 0:   2%|▏         | 106/4372 [00:12<08:29,  8.38it/s, v_num=vbrq, train_loss=0.444]

/home/kacper/anaconda3/envs/gpu_torch/lib/python3.12/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [None]:
wandb.finish()

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇███
scheduler_lr,██████▂▂▂▂▂▁
train_loss,▇▇█▅▆▆▆▆▇▆▆▆▇▇▇▇▇▇▇█▆▆▇▆▅▃▃▂▃▂▃▃▃▂▂▃▂▂▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▂▂▂▁▁▂▆█████
val_loss,▇█▇██▇▄▁▁▂▃▃

0,1
epoch,11.0
scheduler_lr,0.0001
train_loss,0.1697
trainer/global_step,39287.0
val_acc,0.60827
val_loss,0.89683


# create agent

In [16]:
class NeuralAgent():
    def __init__(self, second=1) -> None:
        self.name = 'NN_conv'
        self.model=c4_model
        self.second=second

    def best_move(self, b):
        seq=[x-1 for x in b.last_moves]
        possible_moves=[]
        for i in range(7):
          #  print(seq+[i,-1])
            possible_moves.append(encode_state_torch(torch.tensor(seq+[i, -1])))
       # print(possible_moves[0].shape)
        possible_moves=torch.stack(possible_moves)
       # print(possible_moves.shape)
        logits=self.model(possible_moves)

        best_moves=torch.sort(logits[:, self.second], descending=True).indices
      #  print(best_moves)
        for move in best_moves:
            if b.hs[move]<6:
              return move.item()
        
    def name(self):
        return 'Conv'

In [18]:
def my_duel(agent_a, agent_neural, N):
    score = {1:0, -1:0, 0:0}
    
    for i in range(N):
        agent_neural.second=1
        r1 = game(agent_a, agent_neural)
        score[r1] += 1
        agent_neural.second=0
        r2 = game(agent_neural, agent_a)
        score[-r2] += 1
    
    s = sum(score.values())
    
    for k in score:
        score[k] /= s    
    print (f'{agent_a.name}: {score[+1]}, {agent_neural.name}: {score[-1]}, Draw: {score[0]}')     

In [19]:
A = AgentRandom()
#A = AgentMC(50)    
#B = AgentMC(10)
B = NeuralAgent()

my_duel(A, B, 100)

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [31m⬤ [31m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[1m1 [22m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [34m⬤ [31m⬤ 
[31m⬤ [30m⬤ [34m⬤ [34m⬤ [30m⬤ [34m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [22m5 [22m6 [1m7 

1

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30

In [20]:
#A = AgentRandom()
A = AgentMC(50)    
#B = AgentMC(10)
B = NeuralAgent()

my_duel(A, B, 100)

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [22m5 [1m6 [22m7 

1

[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [34m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ 
[31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤ [30m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [1m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [34m⬤ [30m⬤ [30m⬤ [30

In [178]:
def game(agent_a, agent_b):
    b = Board()
    agents = [agent_a, agent_b]
    moves = []
   # print(agent_b.best_move(b))
    #return
    who = 0
    
    while not b.end():
        m = agents[who].best_move(b)
        b.apply_move(m) 
                          
        who = 1-who
    
    b.print() 
    print (b.result)
    print ()
    
    return b.result

# Task 2

## utils

In [None]:
def no_ones_twos_threes(m, axis=0):
    twos=0
    threes=0
    if axis==0:
        for i in range(m.shape[0]):
            connected=0
            j=0
            while j<m.shape[1]:
                if m[i,j]==0:
                    if connected==2: 
                        twos+=1
                    elif connected==3:
                        twos+=2
                        threes+=1
                    connected=0
                else:
                    connected+=1
                j+=1
            if connected:
                if connected==2: 
                    twos+=1
                elif connected==3:
                    twos+=2
                    threes+=1
  
    elif axis==1:
        for i in range(m.shape[1]):
            connected=0
            j=0
            while j<m.shape[0]:
                if m[j, i]==0:
                    if connected==2: 
                        twos+=1
                    elif connected==3:
                        twos+=2
                        threes+=1
                    connected=0
                else:
                    connected+=1
                j+=1
            if connected:
                if connected==2: 
                    twos+=1
                elif connected==3:
                    twos+=2
                    threes+=1
    
    elif axis==-1:
        #direction=(1,1)
        for i, j in [(4,0),(3,0),(2,0),(1,0),(0,0),(0,1),(0,2),(0,3),(0,4),(0,5)]:
            seq=[]
            connected=0
            while i<6 and j<7:
                if m[i,j]==0:
                    if connected==2: 
                        twos+=1
                    elif connected==3:
                        twos+=2
                        threes+=1
                    connected=0
                else:
                    connected+=1
                j+=1
                i+=1
            if connected:
                if connected==2: 
                    twos+=1
                elif connected==3:
                    twos+=2
                    threes+=1
    
    elif axis==-2:
        #direction=(1,-1)
        for i, j in [(4,6),(3,6),(2,6),(1,6),(0,1),(0,2),(0,3),(0,4),(0,5), (0,6)]:
            seq=[]
            connected=0
            while i<6 and j>-1:
                if m[i,j]==0:
                    if connected==2: 
                        twos+=1
                    elif connected==3:
                        twos+=2
                        threes+=1
                    connected=0
                else:
                    connected+=1
                i+=1
                j-=1
            if connected:
                if connected==2: 
                    twos+=1
                elif connected==3:
                    twos+=2
                    threes+=1
    return torch.tensor([twos, threes])



In [4]:
double_conv=nn.Conv2d(2,2,2, bias=False)
double_conv.requires_grad_(False)
a=torch.ones_like(double_conv.weight)
a[0,1,:,:]=0
a[1,0,:,:]=0
double_conv.weight = torch.nn.Parameter(a, requires_grad=False)
double_conv.weight.shape

def no_squares(m):
    return (double_conv(m)==4).sum(axis=(2,3))

kernels=[
    torch.tensor([[1,1,1],
                  [0,0,0],
                  [0,0,0]]),
    torch.tensor([[0,0,0],
                  [1,1,1],
                  [0,0,0]]),
    torch.tensor([[0,0,0],
                  [0,0,0],
                  [1,1,1]]),
    torch.tensor([[1,0,0],
                  [1,0,0],
                  [1,0,0]]),
    torch.tensor([[0,1,0],
                  [0,1,0],
                  [0,1,0]]),
    torch.tensor([[0,0,1],
                  [0,0,1],
                  [0,0,1]]),
    torch.tensor([[0,0,1],
                  [0,1,0],
                  [1,0,0]]),
    torch.tensor([[1,0,0],
                  [0,1,0],
                  [0,0,1]]),
]

doubles_dict={
    0:0,
    1:0,
    2:1,
    3:2,
    4:6
}

def doubles(m):
    out=double_conv(m)#.flatten()
    out.apply_(lambda x: doubles_dict[x])
    return out#.flatten()


triplet_conv=nn.Conv2d(2,16, 3, bias=False)
triplet_conv.requires_grad_(False)
a=torch.zeros_like(triplet_conv.weight)
a[:8,0]=torch.stack(kernels)
a[8:,1]=torch.stack(kernels)
triplet_conv.weight = torch.nn.Parameter(a, requires_grad=False)

def triplets(m):
    return triplet_conv(m)

def encode_state_fc_torch(seq, device='cpu'):
    M=torch.zeros((2,6,7), device=device)
    col_height=torch.zeros(7, dtype=torch.int8)
    i=0
    #while seq[i]!=-1:
     #   s=seq[i]
    for s in seq:
        if s<0: break
      ##  print(s, col_height)
        M[i%2, col_height[s], s] = 1
        col_height[s]=min(col_height[s]+1, 5)
        i+=1

    i=sum(seq!=-1)%2
    res=[]

  #  for axis in [0,1,-1,-2]:
   #     res.append(no_ones_twos_threes(M[i], axis))
    #    res.append(no_ones_twos_threes(M[i-1], axis))
   # print(M[i])
    res=torch.tensor([no_squares(M[i]), 
                             no_squares(M[i-1]),
                             sum(doubles(M[i])),
                             sum(doubles(M[i-1])),
                             sum(triplets(M[i]).flatten()==3),
                             sum(triplets(M[i-1]).flatten()==3)])
    res=torch.cat([res, M.sum(axis=(1,0))]).type(torch.float32)
  #  print(res)
    #res=nn.functional.normalize(res.view(1,-1))
    return res

In [5]:
def encode_matrix(M, i):

  res=torch.tensor([no_squares(M[i]), 
                            no_squares(M[i-1]),
                            sum(doubles(M[i])),
                            sum(doubles(M[i-1])),
                            sum(triplets(M[i]).flatten()==3),
                            sum(triplets(M[i-1]).flatten()==3)])
  res=torch.cat([res, M.sum(axis=(1,0))]).type(torch.float32)
  #  print(res)
  #res=nn.functional.normalize(res.view(1,-1))
  return res

In [6]:
class C4_FC_Dataset(data_utils.Dataset):
    def __init__(self, input_seqs):
        super().__init__()
        labels=[]
        self.label_dict={'A':0,'B':1,'D':2}
        inputs=[]

        max_len=max(map(lambda x: len(x)-2, input_seqs))

        for seq in tqdm(input_seqs):
            seq=seq.strip()[1:]
            label=seq[-1]
            for i in range(1, len(seq)):
                labels.append(self.label_dict[label])
                input_list=[int(x) for x in seq[:i]]
                input_list.extend([-1]*(max_len-i))
                inputs.append(torch.tensor(input_list, dtype=torch.int8))

        self.labels=torch.tensor(labels)
        self.inputs=torch.vstack(inputs)
        self.device='cpu'

    def __len__(self):
        return self.labels.shape[0]
    
    def to(self, device='cpu'):
        self.labels=self.labels.to(device)
        self.inputs=self.inputs.to(device)
        self.device=device

    def longest_seq(self):
        return max(map(lambda x: len(x), self.inputs))

    def __getitem__(self, indexes):
        if isinstance(indexes, int):
            indexes = [indexes]
        states=[]
        lens=[]
        for i in indexes:
            try:
                states.append(encode_state_fc_torch(self.inputs[i], device=self.device).flatten())
            except IndexError as err:
                print(i)
                print(indexes)
                raise IndexError(err)
            lens.append(torch.sum(self.inputs[i]!=-1))
        return torch.stack(states), torch.tensor(lens, device=self.device), self.labels[indexes]

In [6]:
double_conv=nn.Conv2d(2,2,2, bias=False)
double_conv.requires_grad_(False)
a=torch.ones_like(double_conv.weight)
a[0,1,:,:]=0
a[1,0,:,:]=0
double_conv.weight = torch.nn.Parameter(a, requires_grad=False)
double_conv.weight.shape

triplet_conv=nn.Conv2d(2,16, 3, bias=False)
triplet_conv.requires_grad_(False)
a=torch.zeros_like(triplet_conv.weight)
a[:8,0]=torch.stack(kernels)
a[8:,1]=torch.stack(kernels)
triplet_conv.weight = torch.nn.Parameter(a, requires_grad=False)

class C4_FC_precalculated_Dataset(data_utils.Dataset):
    def __init__(self, input_seqs):
        super().__init__()
        labels=[]
        self.label_dict={'A':0,'B':1,'D':2}
        matrices=[]
        lens=[]

        for seq in tqdm(input_seqs):
            seq=seq.strip()[1:]
            label=seq[-1]
            input_list=[int(x) for x in seq[:-1]]

            M=torch.zeros((2,6,7))
            col_height=torch.zeros(7, dtype=torch.int8)
            for i, s in enumerate(input_list):
                labels.append(self.label_dict[label])
                lens.append(i+1)
                M[i%2, col_height[s], s] = 1
                col_height[s]=min(col_height[s]+1, 5)

                matrices.append(M)

        self.labels=torch.tensor(labels)
        self.lens=torch.tensor(lens)
        self.matrices=torch.stack(matrices)
        self.device='cpu'

       # states=[]
        #for m,l in tqdm(zip(self.matrices, self.lens), total=self.lens.shape[0]):
         #   states.append(encode_matrix(m, l%2))
        #self.states=torch.stack(states)


    def __len__(self):
        return self.labels.shape[0]
    
    def to(self, device='cpu'):
        self.labels=self.labels.to(device)
        self.matrices=self.matrices.to(device)
        self.states=self.states.to(device)
        self.device=device

    def longest_seq(self):
        return max(map(lambda x: len(x), self.inputs))

    def __getitem__(self, indexes):
        if isinstance(indexes, int):
            indexes = [indexes]

     #   states=[]
      #  for idx in indexes:
      #      states.append(encode_matrix(self.matrices[idx], self.lens[idx]%2))

       # print(self.matrices[indexes].shape)
        squares=no_squares(self.matrices[indexes])
        doubles=double_conv(self.matrices[indexes]).apply_(lambda x: doubles_dict[x]).sum(axis=(2,3))
        triples=(triplet_conv(self.matrices[indexes])==3).sum(axis=(2,3))

        

        return torch.cat([squares, doubles, triples], dim=1), self.lens[indexes], self.labels[indexes]

In [54]:
class C4_FC_Model(pl.LightningModule):
    def __init__(self, input_dim, lr=1e-3, l2=1e-5, lr_dc_step=2, lr_dc=0.1):
        super().__init__()
        self.save_hyperparameters()

        self.model=nn.Sequential(
            nn.BatchNorm1d(input_dim),#, track_running_stats=False, affine=False),
            nn.Linear(input_dim, 128),
            nn.Tanh(),
            nn.Linear(128, 128),
            nn.Tanh(),
            nn.Linear(128, 16),
            nn.Tanh(),
            nn.Linear(16,3)
        )

        self.loss=nn.CrossEntropyLoss(reduction='none')
        self.lr=lr


    def forward(self, x):
        return self.model(x.squeeze())
    
    def training_step(self, batch, batch_idx):
        x, lens,y=batch
        logits=self(x)
        y=y.squeeze()
        lens=lens.squeeze()
        loss=self.loss(logits, y)
     #   print(loss, loss.shape)

#        lens=lens/max(lens)
 #       loss=torch.mean(lens*loss)
        loss=self.loss(logits, y)
        loss=torch.mean(loss)
      #  print(lens.shape)
       # print(lens)

        self.log('train_loss', loss, prog_bar=True)
     #   print(loss.shape)
        return loss
    
    def evaluate(self, batch, mode=None):
        x, lens,y=batch
        y=y.squeeze()
        logits=self(x)
      #  print(logits.shape)
       # print(y.shape)
        loss=self.loss(logits, y)
        loss=torch.mean(loss)

        preds=torch.argmax(logits, axis=1)
        acc=torch.sum(preds==y)/y.shape[0]

        if mode:
            self.log(mode+'_loss', loss,  prog_bar=True)
            self.log(mode+'_acc', acc,  prog_bar=True)

    def validation_step(self, batch, *args, **kwargs):
        return self.evaluate(batch, "val")
    def test_step(self, batch, *args, **kwargs):
        return self.evaluate(batch, "test")

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), lr=self.lr, weight_decay=self.hparams.l2
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            patience=self.hparams.lr_dc_step,
            factor=self.hparams.lr_dc,
            cooldown=1,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val_loss",
                "strict": False,
                "interval": "epoch",
                "frequency": 1,
                "name": "scheduler_lr",
            },
        }

## train model

In [8]:
no_seqs=len(data)
train_s=int(0.8*no_seqs)

In [9]:
train_matrix_dataset=C4_FC_precalculated_Dataset(data[:train_s])
val_matrix_dataset=C4_FC_precalculated_Dataset(data[train_s:])

100%|██████████| 16000/16000 [00:12<00:00, 1287.60it/s]
100%|██████████| 4000/4000 [00:02<00:00, 1346.31it/s]


In [39]:
train_dataset=C4_FC_precalculated_Dataset(data[:train_s])
val_dataset=C4_FC_precalculated_Dataset(data[train_s:])

#train_dataset.to('cuda')
#val_dataset.to('cuda'              )

100%|██████████| 16000/16000 [00:12<00:00, 1310.43it/s]
100%|██████████| 4000/4000 [00:03<00:00, 1312.80it/s]


In [10]:

train_dataloader=data_utils.DataLoader(train_matrix_dataset, batch_size=32, num_workers=cpu_count(),
                                       shuffle=True, drop_last=True)
val_dataloader=data_utils.DataLoader(val_matrix_dataset, batch_size=32, num_workers=cpu_count(),)

In [55]:
fc_model=C4_FC_Model(input_dim=20, lr=1e-2)

In [26]:
wandb_logger = pl.loggers.WandbLogger(
        project="NN24", entity="kpuchalskixiv", log_model=True
    )


In [56]:

trainer=pl.Trainer(
        max_epochs=20,
        callbacks=[
            EarlyStopping(
                monitor="val_loss", patience=4, mode="min", check_finite=True, check_on_train_epoch_end=False
            ),
            LearningRateMonitor(),
            ModelCheckpoint(monitor="val_loss", mode="min"),
         #   LearningRateFinder(min_lr=1e-4, num_training_steps=1000)
        ],
        num_sanity_val_steps=100,
      #  logger=wandb_logger,
       # limit_train_batches=5000
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [61]:
trainer.fit(model=fc_model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

/home/kacper/anaconda3/envs/gpu_torch/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:652: Checkpoint directory /home/kacper/NN24/lightning_logs/version_33/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | model | Sequential       | 21.4 K
1 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
21.4 K    Trainable params
0         Non-trainable params
21.4 K    Total params
0.085     Total estimated model params size (MB)


Epoch 2:   3%|▎         | 560/17488 [01:24<42:36,  6.62it/s, v_num=33, train_loss=0.331, val_loss=0.376, val_acc=0.844] 
Epoch 6:  81%|████████  | 14203/17488 [01:17<00:18, 182.20it/s, v_num=33, train_loss=0.288, val_loss=0.343, val_acc=0.868]

In [30]:
wandb.finish()

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██████████████
scheduler_lr,▁▁
train_loss,▇▇▃▁▇▅▄▃▆▇▃▅▁▇▄▄█▆▅▅▂▄▁▅▅▂▅▂▇▅▃▄▄▃▁▆▄▆▃▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁
val_loss,▁

0,1
epoch,1.0
scheduler_lr,0.001
train_loss,0.43771
trainer/global_step,6749.0
val_acc,0.45823
val_loss,0.98259


## train agent

In [15]:
def get_matrix(seq):
    M=torch.zeros((2,6,7))
    col_height=torch.zeros(7, dtype=torch.int8)
    for i, s in enumerate(seq):
        M[i%2, col_height[s], s] = 1
        col_height[s]=min(col_height[s]+1, 5)
    return M


def get_model_input(batch_M):
    squares=no_squares(batch_M)
    doubles=double_conv(batch_M).apply_(lambda x: doubles_dict[x]).sum(axis=(2,3))
    triples=(triplet_conv(batch_M)==3).sum(axis=(2,3))
    return torch.cat([squares, doubles, triples], dim=1)

In [69]:
B.name

'NN_conv'

In [70]:
class NeuralFCAgent():
    def __init__(self, model, second=1) -> None:
        self.name = 'FullyConnected'
        self.model=model
        self.second=second

    def best_move(self, b):
        seq=[x-1 for x in b.last_moves]

        possible_moves=[]
        for i in range(7):
          #  print(seq+[i,-1])
            possible_moves.append(get_matrix(seq+[i, -1]))
        
       # print(possible_moves[0].shape)
        possible_moves=torch.stack(possible_moves)
        possible_moves=get_model_input(possible_moves)
       # print(possible_moves.shape)
        logits=self.model(possible_moves)

        best_moves=torch.sort(logits[:, self.second], descending=True).indices
      #  print(best_moves)
        for move in best_moves:
            if b.hs[move]<6:
              return move.item()
        
    def name(self):
        return 'FullyConnected'

In [16]:
def my_duel(agent_a, agent_neural, N):
    score = {1:0, -1:0, 0:0}
    
    for i in range(N):
        agent_neural.second=1
        r1 = game(agent_a, agent_neural)
        score[r1] += 1
        agent_neural.second=0
        r2 = game(agent_neural, agent_a)
        score[-r2] += 1
    
    s = sum(score.values())
    
    for k in score:
        score[k] /= s    
    print (f'{agent_a.name}: {score[+1]}, {agent_neural.name}: {score[-1]}, Draw: {score[0]}')     

In [67]:
fc_model.to('cpu')

C4_FC_Model(
  (model): Sequential(
    (0): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=20, out_features=128, bias=True)
    (2): Tanh()
    (3): Linear(in_features=128, out_features=128, bias=True)
    (4): Tanh()
    (5): Linear(in_features=128, out_features=16, bias=True)
    (6): Tanh()
    (7): Linear(in_features=16, out_features=3, bias=True)
  )
  (loss): CrossEntropyLoss()
)

In [71]:
A = AgentRandom()
#A = AgentMC(50)    
#B = AgentMC(10)
B = NeuralFCAgent(fc_model)

my_duel(A, B, 200)

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [31m⬤ [30m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [22m5 [1m6 [22m7 

1

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [34m⬤ [30m⬤ [34m⬤ [34m⬤ [30m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[1m1 [22m2 [22m3 [22m4 [22m5 [22m6 [22m7 

1

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m

In [60]:
#A = AgentMC(50)    
A = AgentMC(10)
B = NeuralFCAgent(fc_model)

my_duel(A, B, 200)

[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [31m⬤ [31m⬤ [31m⬤ [31m⬤ [30m⬤ [30m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

1

[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤ [30m⬤ [30m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [1m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ 
[34m⬤ [30m⬤ [30m⬤ [30m⬤ [30m⬤ [30