# Naive approach
I will only use images and image based methods to create a baseline. Later, more sophisticated negative mining strategies (based on heuristics or arguequery) will be compared to this.
Inspired by [DeepFindr](https://colab.research.google.com/drive/1oO-Raqge8oGXGNkZQOYTH-je4Xi1SFVI?usp=sharing)

## Load Dataset

In [1]:
from glob import glob
from datasets import load_dataset

dataset = load_dataset("imagefolder", data_dir="../data/kialo-graphnli-images/")
dataset


Resolving data files:   0%|          | 0/1560 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['image'],
        num_rows: 1560
    })
})

In [5]:
dataset = dataset.map(lambda x: {"pixel_values": model.processor(x["image"].convert("RGB"), return_tensors="pt", padding=True)["pixel_values"].squeeze()}, remove_columns=["image"])



Map:   0%|          | 0/1560 [00:00<?, ? examples/s]

In [188]:
# dataset.push_to_hub("kblw/kialo-graphnli-images")

Uploading the dataset shards:   0%|          | 0/2 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

## Data Preparation

Color Jitter should probably not be applied, because the colors actually carry important meaning. RandomCrop could also be a problem, as the structure of a graph drastically changes with its size. It should however be rotation invariant, so flipping should be a good idea.

In [6]:
from torch.utils.data import DataLoader
from torchvision.transforms import v2

transforms = v2.Compose([
    # v2.RandomResizedCrop(224),
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    # v2.RandomRotation(45),
    # v2.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    v2.ToImage()
])

In [7]:
import arguebuf as ab
from evaluate_cbr.visualize import export_graph
import uuid
from PIL import Image
import math
def heuristic_distance(graph1: ab.Graph, graph2: ab.Graph) -> float:
    no_edges = len(graph1.edges)
    no_i_nodes = len(graph1.atom_nodes)
    no_s_nodes = len(graph1.scheme_nodes)

    path = f"/tmp/{uuid.uuid4()}.png"
    export_graph(graph1, path)
    image = Image.open(path)
    # get image height
    height = image.height
    depth = height // 28

    # calculate statistics for graph 2
    no_edges2 = len(graph2.edges)
    no_i_nodes2 = len(graph2.atom_nodes)
    no_s_nodes2 = len(graph2.scheme_nodes)

    path = f"/tmp/{uuid.uuid4()}.png"
    export_graph(graph2, path)
    image = Image.open(path)
    # get image height
    height2 = image.height
    depth2 = height2 // 28

    # calculate normalized deltas
    delta_edges = (no_edges - no_edges2)
    delta_i_nodes = (no_i_nodes - no_i_nodes2)
    delta_s_nodes = (no_s_nodes - no_s_nodes2)
    delta_depth = (depth - depth2)


    # calculate euclidean distance
    return math.sqrt(delta_edges ** 2 + delta_i_nodes ** 2 + delta_s_nodes ** 2 + delta_depth ** 2)





    


### Visualize transforms

In [8]:
# # get first dataset image
# import matplotlib.pyplot as plt

# img = dataset["train"][0]["image"]
# plt.imshow(img)

KeyError: 'image'

In [179]:
# transformed = transforms(img)
# plt.imshow(transformed)

## Model
In the first experiment, the model will projection head will be Linear layer and transform the embedding down to 32-d. When the loss is applied to this, it prevents the curse of dimensionality and should provide better results.

In [3]:
import torch
from torch.nn import Linear
from transformers import Swinv2Model, AutoImageProcessor

class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.processor = AutoImageProcessor.from_pretrained("../pretrained_model")
        self.model = Swinv2Model.from_pretrained("../pretrained_model")
        self.projection_head = Linear(768, 32)

    def forward(self, x, train = True):
        if train:
            augm_1 = transforms(x)
            augm_2 = transforms(x)
            x1 = self.model(augm_1).pooler_output
            x1 = self.projection_head(x1)
            x2 = self.model(augm_2).pooler_output
            x2 = self.projection_head(x2)
            return x1, x2
        else:
            # x = self.processor(x, return_tensors="pt", padding=True)
            x = self.model(**x).pooler_output
            return x

In [4]:
from pytorch_metric_learning.losses import NTXentLoss
loss_func = NTXentLoss(temperature=0.1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

In [182]:
# model.model(torch.Tensor(dataset["train"][0]["pixel_values"]).unsqueeze(0).to(device))

In [9]:
torch.Tensor(dataset["train"][0]["pixel_values"]).squeeze().shape

torch.Size([3, 224, 224])

In [10]:
import torch
def collate_fn(batch):
    images = [torch.Tensor(example["pixel_values"]) for example in batch]
    images = torch.stack(images)
    return images

train_loader = DataLoader(dataset["train"], collate_fn=collate_fn, batch_size=32, shuffle=True, num_workers=4)

In [186]:
for _, data in enumerate(train_loader):
    data = data.to(device)
    optimizer.zero_grad()
    # Get data representations
    emb_view1, emb_view2 = model(data)
    # Prepare for loss
    embeddings = torch.cat((emb_view1, emb_view2))
    # The same index corresponds to a positive pair
    indices = torch.arange(0, emb_view1.size(0), device=emb_view2.device)
    print(embeddings.shape)
    print(indices)
    break



torch.Size([64, 32])
tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])


In [11]:
import tqdm

def train():
    model.train()
    total_loss = 0
    for _, data in enumerate(tqdm.tqdm(train_loader)):
        data = data.to(device)
        optimizer.zero_grad()
        # Get data representations
        emb_view1, emb_view2 = model(data)
        # Prepare for loss
        embeddings = torch.cat((emb_view1, emb_view2))
        # The same index corresponds to a positive pair
        indices = torch.arange(0, emb_view1.size(0), device=emb_view2.device)
        labels = torch.cat((indices, indices))
        loss = loss_func(embeddings, labels)
        loss.backward()
        total_loss += loss.item() * data.shape[0]
        optimizer.step()
    return total_loss / len(dataset)

for epoch in range(1, 10):
    loss = train()
    print(f'Epoch {epoch:03d}, Loss: {loss:.4f}')
    scheduler.step()

100%|██████████| 49/49 [06:53<00:00,  8.45s/it]


Epoch 001, Loss: 6433.9808


100%|██████████| 49/49 [06:59<00:00,  8.56s/it]


Epoch 002, Loss: 6456.2163


100%|██████████| 49/49 [06:56<00:00,  8.51s/it]


Epoch 003, Loss: 6456.8168


  4%|▍         | 2/49 [00:26<10:17, 13.15s/it]


KeyboardInterrupt: 

In [2]:
path = "/home/kilian/arguebase-public/microtexts/format=aif,lang=en/nodeset6361.json"
import arguebuf as ab

graph = ab.load.file(path)
ab.render.graphviz(ab.dump.graphviz(graph), "graph.png")

In [3]:
from evaluate_cbr.visualize import export_graph

export_graph(graph, "graph_2.png")

In [5]:
from transformers import Swinv2Model
model = Swinv2Model.from_pretrained("../pretrained_model")
model.push_to_hub("kblw/pretrained_argimg")

model.safetensors:   0%|          | 0.00/110M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/kblw/pretrained_argimg/commit/1b9f2ac5e20c11a3f622b3f36c6de9f69de9452b', commit_message='Upload model', commit_description='', oid='1b9f2ac5e20c11a3f622b3f36c6de9f69de9452b', pr_url=None, pr_revision=None, pr_num=None)