## Polygon Transformer using Pytorch Transformer Encoder Module

Note that mask is different here, only (batch_size, seq_len) mask where True stands for invalid (mask) attention queries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
USE_GPU = True if torch.cuda.is_available() else "cpu"

In [28]:
class Pot(nn.Module):
    def __init__(self, d_model=7, nhead=1, num_layers=3, max_seq_len=64, dim_feedforward=64, dropout=0.1, num_types=10):
        super().__init__()

        self.class_embedding = nn.Parameter(torch.randn(1, 1, d_model))
        self.pos_embedding = nn.Parameter(torch.randn(1, 1 + max_seq_len, d_model))
        self.dropout = nn.Dropout(dropout)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
                                                dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.mlp_head = nn.Sequential(nn.Linear(d_model, dim_feedforward),
                                      nn.ReLU(),
                                      nn.Linear(dim_feedforward, num_types))

    def forward(self, x, mask=None):
                # token_mask = (tokens != 0).unsqueeze(1).unsqueeze(2)
        batch_size, seq_len, emb_dim = x.shape
        class_embedding = self.class_embedding.repeat(batch_size, 1, 1)
        x = torch.cat([class_embedding, x], dim=1)
        # print(x.shape, self.pos_embedding[:, :seq_len+1].shape)
        x = x + self.pos_embedding[:, :seq_len+1]
        x = self.dropout(x)

        # Create a new tensor with True values in the first column (for cls token)
        if mask is not None:
            cls_mask = torch.ones((batch_size, 1), dtype=torch.bool)
            if USE_GPU:
                cls_mask = cls_mask.to(device)
            mask = torch.cat((cls_mask, mask), dim=1)
        
        x = self.transformer_encoder(x, src_key_padding_mask=mask)
        x = x[:, 0, :] # grab the class embedding
        x = self.mlp_head(x)
        
        return x

In [7]:
import pandas as pd
import numpy as np
from deep_geometry import vectorizer as gv
from deep_geometry import GeomScaler


max_seq_len = 64
batch_size = 32


gs = GeomScaler()
types_dict = {'PK':0, 'MR': 1, 'KL':2, 'NV':3, 'WA':4, 'LG':5, 'HO':6, 'GR':7, 'REC':8, 'PGK':9}
df = pd.read_csv("archaeology.csv")
df['type'] = df['Aardspoor'].map(types_dict)
df = df.dropna().reset_index(drop=True)

def count_points(wkt):
    try:
        num_points = gv.num_points_from_wkt(wkt)
        # gv.vectorize_wkt(wkt)
        return num_points
    except:
        print("Invalid wkt string, skip it")
        return np.inf

filtered_df = df[df['WKT'].apply(lambda x: count_points(x) <= max_seq_len)]
df = filtered_df

df = df[:1000]

Invalid wkt string, skip it


In [8]:
def dataset_split(df, val_split_ratio, test_split_ratio):

    data, labels = np.array(df['WKT'].tolist()), np.array(df['type'].tolist())

    num_val = int(val_split_ratio * len(df))
    num_test = int(test_split_ratio * len(df))

    indices = np.arange(len(df))
    np.random.shuffle(indices)

    train_indices, val_indices, test_indices = indices[num_val+num_test:], indices[:num_val], indices[num_val:num_val+num_test]

    train_data, train_labels = data[train_indices], labels[train_indices]
    val_data, val_labels = data[val_indices], labels[val_indices]
    test_data, test_labels = data[test_indices], labels[test_indices]

    return train_data, train_labels, val_data, val_labels, test_data, test_labels

ori_train_data, ori_train_labels, ori_val_data, ori_val_labels, ori_test_data, ori_test_labels = dataset_split(df, 0.1, 0.1)

In [31]:
def prepare_polygon_dataset(wkts, types, max_seq_len, train=True): # TODO - 1. split into train, validate, test. 2. randomly sample
    geoms, labels, start_points = [], [], []
    for i, wkt in enumerate(wkts):
        num_point = gv.num_points_from_wkt(wkt)
        if  num_point > max_seq_len:
             continue
        geom = gv.vectorize_wkt(wkt, max_points=max_seq_len, fixed_size=True)
        geoms.append(geom)
        labels.append(types[i])
        start_points.append(num_point)

    start_points = torch.tensor(start_points).unsqueeze(1)
    indices = torch.arange(max_seq_len).unsqueeze(0)
    mask = indices >= start_points
    tokens = np.stack(geoms, axis=0)
    if train:
        gs.fit(tokens)
    tokens = gs.transform(tokens)
    tokens = torch.tensor(tokens, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.long)
    
    return tokens, labels, mask

In [32]:
train_tokens, train_labels, train_mask = prepare_polygon_dataset(ori_train_data, ori_train_labels, max_seq_len)
val_tokens, val_labels, val_mask = prepare_polygon_dataset(ori_val_data, ori_val_labels, max_seq_len, train=False)
test_tokens, test_labels, test_mask = prepare_polygon_dataset(ori_test_data, ori_test_labels, max_seq_len, train=False)

In [33]:
train_loader = DataLoader(TensorDataset(train_tokens, train_labels, train_mask), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tokens, val_labels, val_mask))
test_loader = DataLoader(TensorDataset(test_tokens, test_labels, test_mask))

In [36]:
pot = Pot(d_model=7, nhead=1, num_layers=3, max_seq_len=64, dim_feedforward=64, dropout=0.1, num_types=10)

if USE_GPU:
    pot = pot.to(device)

criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(pot.parameters(), lr=0.004)
optimizer = optim.Adam(pot.parameters(), lr=0.004, betas=(0.9, 0.98), eps=1e-9)

num_epochs = 100

def train(model, loader):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0
    for batch_x, batch_y, batch_mask in loader:
        if USE_GPU:
            batch_x, batch_y, batch_mask = batch_x.to(device), batch_y.to(device), batch_mask.to(device)
        optimizer.zero_grad()
        outputs = pot(batch_x, batch_mask)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += batch_y.size(0)
        correct += (predicted == batch_y).sum().item()
    train_loss /= len(loader)
    train_acc = correct / total
    return train_loss, train_acc

def evaluate(model, loader):
    model.eval()
    eval_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_x, batch_y, batch_mask in loader:
            if USE_GPU:
                batch_x, batch_y, batch_mask = batch_x.to(device), batch_y.to(device), batch_mask.to(device)
            outputs = pot(batch_x, batch_mask)
            loss = criterion(outputs, batch_y)
            eval_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    eval_loss /= len(val_loader)
    eval_acc = correct / total
    return eval_loss, eval_acc

for epoch in range(num_epochs):
    train_loss, train_acc = train(pot, train_loader)
    val_loss, val_acc = evaluate(pot, val_loader)
    print(f"Epoch: {epoch+1}, Train Loss: {train_loss}, Train Acc {train_acc}, Val Loss: {val_loss}, Val Acc: {val_acc}")


# Test
test_loss, test_acc = evaluate(pot, test_loader)
print(f"Test Loss: {test_loss}, Test Acc: {test_acc}")

    

Epoch: 1, Train Loss: 1.5517314338684083, Train Acc 0.50375, Val Loss: 1.3070946848392486, Val Acc: 0.53
Epoch: 2, Train Loss: 1.367800521850586, Train Acc 0.52625, Val Loss: 1.2718898367881775, Val Acc: 0.53
Epoch: 3, Train Loss: 1.341175582408905, Train Acc 0.525, Val Loss: 1.2165230885148048, Val Acc: 0.54
Epoch: 4, Train Loss: 1.2440640807151795, Train Acc 0.57, Val Loss: 1.134313284754753, Val Acc: 0.63
Epoch: 5, Train Loss: 1.192763397693634, Train Acc 0.5675, Val Loss: 1.170486791729927, Val Acc: 0.57
Epoch: 6, Train Loss: 1.1788130807876587, Train Acc 0.5675, Val Loss: 1.1226997366547584, Val Acc: 0.57
Epoch: 7, Train Loss: 1.1652851653099061, Train Acc 0.59, Val Loss: 1.1382062074542045, Val Acc: 0.57
Epoch: 8, Train Loss: 1.1698787546157836, Train Acc 0.5925, Val Loss: 1.1251955422759057, Val Acc: 0.59
Epoch: 9, Train Loss: 1.1512342691421509, Train Acc 0.6, Val Loss: 1.1300003844499589, Val Acc: 0.57
Epoch: 10, Train Loss: 1.1560446310043335, Train Acc 0.58875, Val Loss: 1.1