- Just to inspect COCON model

In [2]:
from logging import getLogger, basicConfig
import sys
from typing import List

from IPython.display import clear_output
import torch
from ptutils import count_params
from transformers import (
    GPT2Config, 
    GPT2Tokenizer,
    GPT2LMHeadModel,
    CoconBlock
)
from utils.utils import set_seed, fix_state_dict_naming

from args import get_args

In [3]:
sys.argv = [
    "this_file.py",
    "--do_cocon_compute",
    "--output_dir", "models/COCON",
    "--cocon_output_filename", "computers_cocon_output.txt",
    "--cocon_output_jsonl_filename", "computers_cocon_output.jsonl",
    "--model_type", "gpt2",
    "--model_name_or_path", "gpt2-medium",
    "--output_hidden_for_cocon_after_block_ind", "6",
    "--per_gpu_eval_batch_size", "1",
    "--prepend_bos_token_to_line",
    "--gen_cs_len", "5",
    "--generate_length", "80",
    "--line_by_line_cs",
    "--line_by_line_hs",
    "--enumerate_all_cs_for_each_hs",
    "--seed", "42"
]


args = get_args()
args.n_gpu = torch.cuda.device_count()

In [4]:
set_seed(args)

In [5]:
basicConfig(level="INFO")

In [6]:
logger = getLogger(__name__)

In [7]:
device = "cuda:0" if torch.cuda.is_available() else "cpu:0"
model_name = "gpt2-medium"

In [8]:
# Load config
config = GPT2Config.from_pretrained(model_name)

# Load tokenizer
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

clear_output()

In [9]:
# Load GPT2 model
model = GPT2LMHeadModel.from_pretrained(
    model_name,
    from_tf=False,
    config=config,
    cache_dir=None,
    output_meanvars=True,
    compute_meanvars_before_layernorm=False
)
model = model.to(device)

INFO:transformers.modeling_utils:loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-pytorch_model.bin from cache at /home/nptdat/.cache/torch/transformers/4b337a4f3b7d3e1518f799e238af607498c02938a3390152aaec7d4dabca5a02.8769029be4f66a5ae1055eefdd1d11621b901d510654266b8681719fff492d6e


In [10]:
# Load CoconBlock
cocon_block = CoconBlock(config.n_ctx, config, scale=True)
cocon_state_dict = torch.load("models/COCON/cocon_block_pytorch_model.bin")
new_cocon_state_dict = fix_state_dict_naming(cocon_state_dict)
cocon_block.load_state_dict(new_cocon_state_dict)

cocon_block = cocon_block.to(device)

INFO:transformers.modeling_gpt2:CoconBlock initialized


In [11]:
print(f"Num params of {model.__class__.__name__}: {count_params(model):,}")
print(f"Num params of CoconBlock: {count_params(cocon_block):,}")

Num params of GPT2LMHeadModel: 354,823,168
Num params of CoconBlock: 14,697,472


In [12]:
def generate_topic(prompt_text, context, model, cocon_block, tokenizer, args, device):
    prompt_seq = tokenizer.encode(
        tokenizer.bos_token + prompt_text, 
        add_special_tokens=False, 
        return_tensors="pt"
    ).to(device)
    
    if context:
        print("--- Generate with CoconBlock based on GPT-2")
        # Generate with CoconBlock based on GPT-2
        context_seq = tokenizer.encode(context, add_special_tokens=False, return_tensors="pt").to(device)
        output_sequences = model.generate(
            input_ids=prompt_seq[:, 0:0],
            max_length=args.generate_length,
            temperature=args.temperature,
            top_k=args.k,
            top_p=args.p,
            repetition_penalty=args.repetition_penalty,
            do_sample=True,
            num_return_sequences=args.num_return_sequences,
            do_cocon=True,
            cocon_block=cocon_block,
            cocon_context_inputs=context_seq,
            cocon_history_inputs=prompt_seq,
            cocon_after_block_ind=args.output_hidden_for_cocon_after_block_ind,
            transform_h_after_layernorm=False,
            use_only_last_cocon_output_for_ar=args.use_only_last_cocon_output_for_ar,
            context_attn_bias=-5
        )
        output_sequences = torch.cat([prompt_seq, output_sequences], dim=1)
    else:
        print("--- Generate with GPT-2 only")
        # Generate with GPT-2 only
        output_sequences = model.generate(
            input_ids=prompt_seq,
            max_length=args.generate_length,
            temperature=args.temperature,
            top_k=args.k,
            top_p=args.p,
            repetition_penalty=args.repetition_penalty,
            do_sample=True,
            num_return_sequences=args.num_return_sequences
        )

    return tokenizer.decode(output_sequences[0])

# The best & simplest generation

In [13]:
generate_topic("It is listed on the Tokyo Stock Exchange, where", "", model, cocon_block, tokenizer, args, device)

--- Generate with GPT-2 only


'<|endoftext|>It is listed on the Tokyo Stock Exchange, where it has been available in thin sheet form for five years. When I asked Mr Mair whether he sold the 6mg (18mg, 21mg, 26mg, and 32mg units in the United States, he smiled and said: "I have gotten to about 50 out of 52 right now, and I\'ll continue to get them to'

In [14]:
generate_topic("Toshiba Corporation is a Japanese", "stock", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


'<|endoftext|>Toshiba Corporation is a Japanese consumer electronics company based in Japan, established in 1993, in the Western region of Japan.\n\nThe company was founded as a professional electronics company, setting its commercial objective to produce and market high-end electronic devices with reliable performance and affordable prices for Japanese consumers.\n\nIn 2001, it was renamed Toshiba Corporation, creating its modern manufacturing business models.\n\nSince then, Toshiba Corporation'

In [15]:
generate_topic("Toshiba Corporation is a Japanese", "finance", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


"<|endoftext|>Toshiba Corporation is a Japanese electronics company headquartered in Taipei. It's main business is in manufacturing and selling portable electronic equipment and solutions. The company's main activities include the production, assembly, and sale of products for commercial and industrial use. In the 1990s the company's main business was to develop and produce the entry level electronics that have traditionally been sold on the traditional electronics market. It became the world's largest electronics company"

In [16]:
generate_topic("Toshiba Corporation is a Japanese", "computer", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


'<|endoftext|>Toshiba Corporation is a Japanese conglomerate that makes PC software products and media players, including numerous multimedia software titles, and similar products such as video game consoles, video game controllers, and several personal computers. For more information about Toshiba Corporation, please visit our website at http://www.toshiba.co.jp/.\n\nUPDATES\n\n(8/28/2018) — Toshiba has announced that it'

In [17]:
generate_topic("In summary", "scandal", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


"<|endoftext|>In summary, the Senate scandal is nearing its end, with reports that senators are suffering from hypothermia and questioning whether it is still an issue for the nation's flagship university.\n\nMultiple senators told CNN that Sen. Al Franken and Sen. Orrin Hatch were taken out of bed early Friday morning for a routine fitness test.\n\nA source told CNN that Hatch, now speaking to reporters in his"

In [18]:
generate_topic("In summary", "computers", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


"<|endoftext|>In summary, you can actually build websites. Any computers can do this. There are even online forums dedicated to computing where you can talk about it.\n\nAll of this can be done with a few simple, inexpensive devices that do just that. The most common ones are very simple, and most people don't want to make money on the hardware they develop.\n\nThere are also devices that can do"

In [19]:
generate_topic("In summary", "legal", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


'<|endoftext|>In summary:\n\nOn 28th April 2017 a craft helicopter was used by E-Bomb to bomb the largest oil field in Iraq, Hashd al-Shaabi (Infantry Brigade), located on the Khanaqin-El-Khadiba (ISIS-controlled) oil field, a distance of 800km.\n\nAs the craft was acting as a military helicopter, it was able'

In [20]:
generate_topic("In summary", "science", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


'<|endoftext|>In summary, this article illustrates the ideal situation where an adult player or "player" is trading a certain amount of time for a certain amount of money. A player could be doing something good or doing something bad and the amount of time the person is involved can be used as a proxy for the value that the player gets from that transaction.\n\nThis post discusses the ideal situation in which an adult player or'

In [21]:
generate_topic("Prior to this", "computers", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


'<|endoftext|>Prior to this video, machines were equipped with hardware that was capable of processing data on a visual and/or auditory level. For example, computer systems can perform mathematical calculations that allow information to be obtained and stored in multiple locations simultaneously.\n\nHowever, for the past few years, the field of audio/visual processing has been dominated by scientific devices. These devices include audio/visual analyzers that can perform calculations'

In [22]:
generate_topic("The key aspect", "computers", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


"<|endoftext|>The key aspect of human cognition is the electric connections between neurons. This brain wiring is closely linked to the brain's chemical excitation of nerve cells and acts as a control system that regulates the behaviour of those nerve cells. The brain also has a sensor that senses the neurotransmitter, serotonin, in the body.\n\nSerotonin is a chemical that helps the brain regulate the activity of nerve cells and is known as"

In [23]:
generate_topic("The key aspect", "japanese game industry", model, cocon_block, tokenizer, args, device)

--- Generate with CoconBlock based on GPT-2


"<|endoftext|>The key aspect of life in video game culture is playing Minecraft. Since its release in 2009, Minecraft has spawned a series of other games and services that have become very popular. While many of these games are based on the Minecraft server, there are also many others based on its lore, and they can be found all over the internet. Today we're going to talk about some of them. The Minecraft series has grown"