In [3]:
%%bash
export PYTHONPATH=.
export AOKVQA_DIR=./datasets/aokvqa/
export COCO_DIR=./datasets/coco/
export FEATURES_DIR=./features/
export LOG_DIR=./logs/
export PREDS_DIR=./predictions/
export PT_MODEL_DIR=./pretrained_models/

In [12]:
from tqdm import tqdm
import numpy as np
import argparse
import pathlib

import torch
import torch.nn.functional as F
import clip
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from load_aokvqa import load_aokvqa
from evaluation.remap_predictions import map_to_choices
from ClipCap.data import prompt_text, load_data
from ClipCap.train import load_config, load_model
import skimage.io as io
import PIL.Image
from IPython.display import Image 

In [10]:
clip_model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [7]:
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--log-dir', type=pathlib.Path, required=True, dest='log_dir')
    parser.add_argument('--epoch', type=int, required=True)
    parser.add_argument('--aokvqa-dir', type=pathlib.Path, required=True, dest='aokvqa_dir')
    parser.add_argument('--split', type=str, choices=['train', 'val', 'test'], required=True)
    parser.add_argument('--eval-features', type=pathlib.Path, required=True, dest='eval_features')
    parser.add_argument('--map-to-choices', action='store_true', dest='map_to_choices')
    parser.add_argument('--beam-search', action='store_true', dest='beam_search')
    parser.add_argument('--out', type=argparse.FileType('w'), required=True, dest='output_file')
    cfg = parser.parse_args()

    cfg = argparse.Namespace(
        **vars(cfg),
        **vars(load_config(os.path.join(cfg.log_dir, 'model_config.json'))),
        **{
            'finetune_gpt': False,
            f"{cfg.split}_features": cfg.eval_features
        }
    )

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = load_model(
        cfg,
        os.path.join(cfg.log_dir, 'checkpoints', f"ckpt-{cfg.epoch:03d}.pt")
    )
    model = model.to(device)


    ## Run inference
    predictions = {}

    with torch.no_grad():
        for i in range(len(dataset)):
            q = dataset.question_ids[i]
            prefix, prompt_tokens, prompt_len = dataset[i]

            prefix = prefix.unsqueeze(0).to(device)
            prompt_tokens = prompt_tokens.unsqueeze(0).to(device)
            embedding_text = model.gpt.transformer.wte(prompt_tokens)
            prefix_projections = model.clip_project(prefix).view(-1, model.prefix_length, model.gpt_embedding_size)
            embed = torch.cat(( prefix_projections, embedding_text ), dim=1)

            if cfg.beam_search:
                generated_text = generate_beam(model, tokenizer, embed, device, beam_size=5, return_top_pred=True, entry_length=entry_length, stop_token_index=tokenizer.eos_token_id)
            else:
                generated_text = generate(model, tokenizer, embed=embed, entry_length=entry_length, stop_token_index=tokenizer.eos_token_id)

            predictions[q] = generated_text

    if cfg.map_to_choices:
        aokvqa_set = load_aokvqa(cfg.aokvqa_dir, cfg.split)
        aokvqa_set = { aokvqa_set[i]['question_id'] : aokvqa_set[i] for i in range(len(aokvqa_set)) }
        predictions = map_to_choices(aokvqa_set, predictions)

    json.dump(predictions, cfg.output_file)


TypeError: load_model() missing 1 required positional argument: 'cfg'

In [None]:
url = " " # linkimage
image = io.imread(url)
pil_image = PIL.Image.fromarray(image)


image = preprocess(pil_image).unsqueeze(0).to(device)
