In [1]:
from dataset import read_annotations_file, tokenize_sentences
import numpy as np
from model import ImageCaptioningSystem
import pytorch_lightning as pl
from pathlib import Path
from dataset import NWPU_Captions
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from evaluation import eval_validation
import matplotlib.pyplot as plt
import torch
from transformers import GPT2TokenizerFast, BertTokenizer, BertModel
import pandas as pd
import seaborn as sns
import tqdm
import umap

from sklearn.cluster import KMeans

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to /home/fe/hufe/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/fe/hufe/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
%cd ..

/home/fe/hufe/Documents/semester11/activelearning_ic


In [3]:
annotations = read_annotations_file("NWPU-Captions/NWPU_images/", "NWPU-Captions/dataset_nwpu.json")
tk = annotations['test']['sentences']
ids = np.array(annotations['test']['img_id'])
order = ids.argsort()
ordered_sentences = np.array(tk)[order]

In [4]:
for i in range(5):
    with open(f'evaluation/test_references_{i}.txt', 'w') as f:
        for x in ordered_sentences[:,i]:
            x = x.encode('ascii', 'ignore').decode('ascii')
            f.write(x)
            f.write('\n')

In [5]:
data_path = "NWPU-Captions/"
images_path = Path(data_path, "NWPU_images")
annotations_path = Path(data_path, "dataset_nwpu.json")

test_set = NWPU_Captions(
    root=images_path,
    annotations_file=annotations_path,
    split="test",
    transform=ToTensor(),
)

test_loader = DataLoader(
    test_set, batch_size=12, shuffle=False, num_workers=4
)



In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:

model = ImageCaptioningSystem.load_from_checkpoint("test/full.ckpt", lr=0.001, device_type=device, sampling_method='cluster')


In [10]:
confidences = []
ids = []
model.model = model.model.to(device)

with tqdm.tqdm(total=len(test_loader)) as pbar:
    for batch in test_loader:
        pixel_values, sentences_token, img_ids, sentences_ids = batch
        
        pixel_values = pixel_values.squeeze(dim=1)
        bs = pixel_values.shape[0]
        label = sentences_token[:, 0, :].long().contiguous()

        pixel_values = pixel_values.to(device)
        label = label.to(device)

        with torch.no_grad():
            out = model.model(
                    pixel_values=pixel_values, labels=label, output_hidden_states=True
                )

        logits = out.logits
        logits_softmax = torch.nn.functional.softmax(logits, dim=2)
        word_conf, _ = torch.max(logits_softmax, dim=2)
        
        ids.append(img_ids.cpu().numpy())
        confidences.append(word_conf.cpu().numpy())
        
        pbar.update(1)


100%|██████████| 263/263 [00:55<00:00,  4.71it/s]


In [None]:
confidences = np.concatenate(confidences)
ids = np.concatenate(ids)

In [31]:
conf_array, ids_array = np.row_stack(confidences), np.row_stack(ids).flatten()
conf_array.shape , ids_array.shape  

((3150, 56), (3150,))

In [36]:
order = np.argsort(ids_array)

conf_array = conf_array[order]

In [8]:
open('evaluation/full_model_on_testset.txt').read().splitlines().__len__()

3150

In [73]:
data = pd.DataFrame({
    'classes': open('test/test_classes.txt').read().splitlines(),
    'text': open('test/test_hypothesis.txt').read().splitlines(),
    'conf': list(conf_array)
    })

In [62]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert = BertModel.from_pretrained("bert-base-uncased").to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
masked_hits = np.where(confidences != 1, 1, 0)
masked_confidences = np.where(confidences != 1, confidences, 0)
hist = masked_confidences.sum(axis=0) / masked_hits.sum(axis=0)

In [64]:
with torch.no_grad():
    tokens = bert_tokenizer(list(data.text.values), padding="longest").input_ids
    tokens = torch.tensor(tokens).to(device)
    embeddings = []
    for i in tqdm.tqdm(range(len(tokens))):
        emb = bert(tokens[i].view(1,38)).pooler_output.cpu().detach()
        embeddings.append(emb)

100%|██████████| 3150/3150 [00:39<00:00, 80.57it/s]


In [65]:
embeddings = list(map(lambda x: x[0].numpy(), embeddings))

In [74]:
data['text_embeddings'] = embeddings
data

Unnamed: 0,classes,text,conf,text_embeddings
0,airport,An airport with a runway on the farmland and s...,"[0.40354893, 0.9988651, 0.92300886, 0.42026994...","[-0.44611844, -0.5037827, -0.9770528, 0.629920..."
1,airport,The airport with some staggered runways and a ...,"[0.25058964, 0.9982071, 0.9722014, 0.6550449, ...","[-0.49807495, -0.3656745, -0.86610484, 0.51463..."
2,airport,An airport with two parallel runways perpendic...,"[0.483505, 0.99916255, 0.9385024, 0.4119709, 0...","[-0.43180054, -0.5027431, -0.96019113, 0.59383..."
3,airport,There airport with some parallelways perpendic...,"[0.25636074, 0.99622107, 0.9557086, 0.32363772...","[-0.14300999, -0.4275721, -0.9775194, 0.501682..."
4,airport,An airport with some staggeredways perpendicul...,"[0.3787551, 0.9991002, 0.95848227, 0.45996037,...","[-0.4592382, -0.52649605, -0.97654355, 0.63209..."
...,...,...,...,...
3145,airplane,Two planes are next the open and a lawn beside.,"[0.700559, 0.4066743, 0.5457334, 0.42717522, 0...","[-0.42181855, -0.5287245, -0.980238, 0.5691961..."
3146,airplane,A purple plane parked next to a boarding bridg...,"[0.40402263, 0.4289053, 0.91317993, 0.6797357,...","[-0.26916754, -0.35601264, -0.93533576, 0.4809..."
3147,airplane,Two planes planes parked different sizes are i...,"[0.31834695, 0.4757649, 0.7869386, 0.40645906,...","[-0.39184868, -0.40650916, -0.9101948, 0.43862..."
3148,airplane,Two planes are on a line on the open while a b...,"[0.6247576, 0.5185052, 0.59193176, 0.3818173, ...","[-0.55357575, -0.45693985, -0.9415538, 0.64116..."


In [88]:
reducer = umap.UMAP()
txt_embeddings = np.stack(data.text_embeddings.values, axis=0)
umap_text_embedding = reducer.fit_transform(txt_embeddings)

In [92]:
data['x'] = list(umap_text_embedding[:,0])
data['y'] = list(umap_text_embedding[:,1])

In [94]:
cluster = KMeans(n_clusters=157*4, random_state=0).fit(txt_embeddings)

In [95]:
data['cluster'] = cluster.labels_

In [96]:
data

Unnamed: 0,classes,text,conf,text_embeddings,x,y,cluster
0,airport,An airport with a runway on the farmland and s...,"[0.40354893, 0.9988651, 0.92300886, 0.42026994...","[-0.44611844, -0.5037827, -0.9770528, 0.629920...",9.838443,4.786550,531
1,airport,The airport with some staggered runways and a ...,"[0.25058964, 0.9982071, 0.9722014, 0.6550449, ...","[-0.49807495, -0.3656745, -0.86610484, 0.51463...",6.767348,13.849251,66
2,airport,An airport with two parallel runways perpendic...,"[0.483505, 0.99916255, 0.9385024, 0.4119709, 0...","[-0.43180054, -0.5027431, -0.96019113, 0.59383...",10.117051,5.246923,201
3,airport,There airport with some parallelways perpendic...,"[0.25636074, 0.99622107, 0.9557086, 0.32363772...","[-0.14300999, -0.4275721, -0.9775194, 0.501682...",1.606305,8.746367,164
4,airport,An airport with some staggeredways perpendicul...,"[0.3787551, 0.9991002, 0.95848227, 0.45996037,...","[-0.4592382, -0.52649605, -0.97654355, 0.63209...",9.575499,4.669265,540
...,...,...,...,...,...,...,...
3145,airplane,Two planes are next the open and a lawn beside.,"[0.700559, 0.4066743, 0.5457334, 0.42717522, 0...","[-0.42181855, -0.5287245, -0.980238, 0.5691961...",5.644590,5.192215,161
3146,airplane,A purple plane parked next to a boarding bridg...,"[0.40402263, 0.4289053, 0.91317993, 0.6797357,...","[-0.26916754, -0.35601264, -0.93533576, 0.4809...",4.185515,9.850986,374
3147,airplane,Two planes planes parked different sizes are i...,"[0.31834695, 0.4757649, 0.7869386, 0.40645906,...","[-0.39184868, -0.40650916, -0.9101948, 0.43862...",5.642641,10.905988,267
3148,airplane,Two planes are on a line on the open while a b...,"[0.6247576, 0.5185052, 0.59193176, 0.3818173, ...","[-0.55357575, -0.45693985, -0.9415538, 0.64116...",10.486596,9.688638,360


In [None]:
plt.figure(figsize=(10,10))
# data.groupby
filtered = data[data.classes.isin(['airport'])]
size = pd.Series(filtered.conf.values)
size -= size.mean()
# size = size.map(lambda x: x.min()*1000)
# size *= -1
# size = size.map(lambda x: x if x > 0 else 0)
# sns.scatterplot(data=data, x='x', y='y', hue='classes', palette='tab20', s=size)
size

In [135]:
plt.figure(figsize=(10, 5))
sns.set_theme(style="whitegrid")

sns.barplot(x=list(range(25)), y=hist[:25], color='cyan')

plt.xticks(ticks=[0, 5, 10, 15, 20, 25], labels=[0, 5, 10, 15, 20, 25])
plt.xlabel('Word index')
plt.ylabel('Confidence')