<a href="https://colab.research.google.com/github/larissasantesso/IA025A_FinalProject_ImageCaptioning/blob/main/notebooks/run05_cityscapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mounting Google Drive in Colab backend

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!unzip -o gdrive/My\ Drive/Datasets/Semantic_Segmentation/Cityscapes/leftImg8bit_trainvaltest.zip

# Importing variables and functions from other notebooks

## Installing library

In [None]:
!pip install import-ipynb
import import_ipynb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting import-ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.4


## Changing directory to the same where the notebooks are located

In [None]:
cd /content/gdrive/MyDrive/Colab\ Notebooks/Projeto/

In [None]:
!ls

## Importing variables/functions from other notebooks

In [None]:
from run00_dataset import *
from run01_metrics import calculate_metrics
from run02_models import Encoder, Decoder, EncoderDecoder

# Cityscapes Dataset

In [None]:
import glob

In [None]:
class CustomDataset():
    def __init__(self, transform_x,  type_dataset="train"):
        self.imgsPath_list = []

        print(f"{type_dataset} dataset")
        for k in glob.glob("/content/leftImg8bit/" + str(type_dataset) + "/*"):
            full_path = glob.glob(k+"/*")
            self.imgsPath_list.extend(full_path)
            
        self.transform_x = transform_x


    def __len__(self):
        return len(self.imgsPath_list)

    def __getitem__(self, idx):
        path_image = self.imgsPath_list[idx]

        data = Image.open(path_image).convert("RGB")

        img = data.copy()
        img = img.resize((224,224), resample = Image.BILINEAR) 
        img = self.transform_x(img)

        return img

In [None]:
feature_extractor = ViTFeatureExtractor.from_pretrained(config_exp["encoder_pretrained_model"])

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)])


train_dataset = CustomDataset(transform)
val_dataset = CustomDataset(transform, type_dataset="val")

train dataset
val dataset


In [None]:
print(len(train_dataset))
print(len(val_dataset))

2975
500


# Installing, Importing and Connecting to W&B

In [None]:
!pip install wandb -qqq

In [None]:
# Log in to your W&B account
import wandb
wandb.login()

In [None]:
model = EncoderDecoder(pretrained_model = (config_exp["encoder_pretrained_model"], config_exp["decoder_pretrained_model"]), 
                       eos_token_id=tokenizer.eos_token_id, 
                       pad_token_id=tokenizer.pad_token_id)

model = torch.load("modelos/model_exp008_run02.pt")

# Qualitative Evaluation

In [None]:
run_name = "008_evaluation"
run_id = '32mzuqkb'

In [None]:
run = wandb.init(project="ImageCaptioning_Project", name=f"experiment_{run_name}", config=config_exp, resume=run_id, id=run_id, settings=wandb.Settings(start_method="thread"), reinit=True, dir=os.getenv("WANDB_DIR", config_exp["path_save_checkpoints"]))

val_loader = DataLoader(val_dataset, batch_size=1)
columns = ["id", "image", "predicted"]
mytable = wandb.Table(columns=columns)

list_id_preds = []

with torch.no_grad():
    for idx, (inputs) in enumerate(tqdm(val_loader)):
        model.eval()
        inputs = inputs.to(device)
        decoded_ids = torch.full((inputs.shape[0], 1),
                            model.decoder_start_token_id,
                            dtype=torch.long).to(inputs.device)
            
        for step in range(32):
            output_val =  model(images =  inputs, decoder_ids= decoded_ids)
                
            #print(f"logits.shape: {output_val.logits.shape}")
            val_logits = output_val.logits
            next_token_logits = val_logits[:, -1, :]
            next_token_id = next_token_logits.argmax(1).unsqueeze(-1)
            decoded_ids = torch.cat([decoded_ids, next_token_id], dim=-1)
            #print(f"decoded_ids batch: {decoded_ids}")

            # Check if output is end of senquence for all batches
            if torch.eq(next_token_id[:, -1], model.eos_token_id).all():
                break

        
        eval_preds_sentences = tokenizer.batch_decode(decoded_ids,  skip_special_tokens=True)

        mytable.add_data(idx, wandb.Image(unorm(inputs[0].squeeze().cpu()).permute(1,2,0).numpy()), eval_preds_sentences[0])

       
wandb.log({f"Table_cityscapes_dataset": mytable})

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

  0%|          | 0/500 [00:00<?, ?it/s]

In [None]:
wandb.finish()

VBox(children=(Label(value='35.046 MB of 35.046 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…