<a href="https://colab.research.google.com/github/player1537/Train-Bloom-560m/blob/main/Master_Apprentice_Experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install Dependencies
%%script bash
TMPDIR=${TMPDIR:-${TMP:-/tmp}}
TMPOUT=${TMPDIR:?}/pip.text
INSTALL=(
  transformers
  torch
  datasets
  tqdm
  accelerate
  peft
  huggingface_hub
  guidance
)

if ! pip install --upgrade "${INSTALL[@]}" &>"${TMPOUT:?}"; then
  cat "${TMPOUT:?}" >&2
  exit 1
fi

In [None]:
import huggingface_hub
huggingface_hub.notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
#@title Utilities & Imports
from dataclasses import dataclass, field
from pathlib import Path
from functools import lru_cache
import itertools
import inspect

import torch
import transformers
from transformers.utils import cached_property
import datasets
import peft
import guidance
from tqdm.notebook import tqdm
from IPython.display import clear_output
import huggingface_hub

def doctest(func=None, /, verbose=False, sterile=False):
  def wrapper(func):
    # Thanks https://stackoverflow.com/a/49659927
    import doctest
    import copy

    # I need this to error out on failure; the default one doesn't.
    def run_docstring_examples(f, globs, verbose=False, name="NoName", compileflags=None, optionflags=0):
      finder = doctest.DocTestFinder(verbose=verbose, recurse=False)
      runner = doctest.DocTestRunner(verbose=verbose, optionflags=optionflags)
      for test in finder.find(func, name, globs=globs):
          runner.run(test, compileflags=compileflags)
      assert runner.failures == 0

    name = func.__name__

    if sterile:
      globs = {}
    else:
      globs = copy.copy(globals())
    globs[name] = func
    run_docstring_examples(func, globs, verbose=verbose, name=name)
    return func

  if func is not None:
    return wrapper(func)
  else:
    return wrapper

try:
  g
except NameError:
  g = {}

def run(func=None, /, name=None, cond=True, splat=False):
  def wrapper(func, /, *, name=name, cond=cond):
    if callable(cond):
      cond = cond()

    if not cond:
      return None

    if name is None:
      name = func.__name__

    args = []
    for key, parameter in inspect.signature(func).parameters.items():
      if parameter.kind == inspect.Parameter.POSITIONAL_ONLY:
        value = g[key]
        args.append(value)

    ret = func(*args)

    if splat:
      it = ret.items()
    else:
      it = [(name, ret)]

    for name, ret in it:
      g[name] = ret

    return None

  if func is not None:
    return wrapper(func)
  else:
    return wrapper

In [None]:
#@title Configuration
DO_EXPLODE = (
  # True
  False
)
DO_TOKENIZE = (
  # True
  False
)
DO_COMPLETE = (
  # True
  False
)
DO_IMPLODE = (
  # True
  False
)
DO_TRAIN = (
  # True
  False
)
DO_TEST = (
  True
  # False
)

MODEL_NAME = (
  'bigscience/bloom-560m'
)
ADAPTER_NAME = (
  'player1537/Bloom-560m-trained-on-Wizard-Vicuna-Uncensored'
)
TOKENIZER_NAME = (
  MODEL_NAME
)
DATASET_NAME = (
  'ehartford/wizard_vicuna_70k_unfiltered'
)

EXPLODE_SHUFFLE_SEED = (
  1337
)
EXPLODE_SHARDS = (
  100
)
EXPLODE_SHARD = -1 + (
  1
)

TRAIN_REPLICAS = (
  2
)
TRAIN_CONTEXT_SIZE = (
  1024
)
TRAIN_LEARNING_RATE = (
  1e-5
)

BRANCH = lambda prefix: \
  f'{prefix}-shuffle{EXPLODE_SHUFFLE_SEED}-{1+EXPLODE_SHARD}of{EXPLODE_SHARDS}'

REPOSITORY = (
  'player1537/Master-Apprentice-Experiment'
)

# Explode

In [None]:
@run(cond=DO_EXPLODE)
def tokenizer():
  tokenizer = transformers.AutoTokenizer.from_pretrained(
    TOKENIZER_NAME,
    add_prefix_space=True,
  )
  return tokenizer

In [None]:
@run(cond=DO_EXPLODE)
def dataset():
  dataset = datasets.load_dataset(
    DATASET_NAME,
  )
  dataset = dataset['train']
  return dataset

In [None]:
@run(cond=DO_EXPLODE, splat=True)
def dataset(dataset, /):
  dataset = dataset.shuffle(
    seed=EXPLODE_SHUFFLE_SEED,
  )
  dataset = dataset.shard(EXPLODE_SHARDS, EXPLODE_SHARD)
  return locals()

In [None]:
@run(cond=DO_EXPLODE)
def dataset(dataset, tokenizer, /):
  def each(inps):
    outs = {}

    for id, conversations in zip(inps['id'], inps['conversations']):
      count = itertools.count()

      def emit(*, role, content, needs_completion):
        outs.setdefault('id', []).append(id)
        outs.setdefault('index', []).append(next(count))
        outs.setdefault('role', []).append(role)
        outs.setdefault('content', []).append(content)
        outs.setdefault('needs_completion', []).append(needs_completion)

      text = []
      text.append(tokenizer.bos_token)  # '<s>'
      for message in conversations:
        role = message['from']
        content = message['value']

        if role == 'human':
          emit(
            role='user',
            content=content,
            needs_completion=False,
          )

        elif role == 'gpt':
          emit(
            role='apprentice',
            content=''.join(text + ['ASSISTANT:']),
            needs_completion=True,
          )

          emit(
            role='master',
            content=content,
            needs_completion=False,
          )

        else:
          raise NotImplementedError()

        text.append('USER: ' if role == 'human' else 'ASSISTANT: ')
        text.append(content)
        if role == 'gpt':
          text.append(tokenizer.eos_token)  # </s>

    return outs

  dataset = dataset.map(each, batched=True, batch_size=1, remove_columns=['id', 'conversations'])
  return dataset

In [None]:
@run(cond=DO_EXPLODE, splat=True)
def dataset(dataset, /):
  huggingface_hub.create_branch(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('explode'),
    revision=(
      'orphan'
    ),
    repo_type=(
      'dataset'
    ),
  )
  return locals()

In [None]:
@run(cond=DO_EXPLODE)
def dataset(dataset, /):
  dataset.push_to_hub(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('explode'),
  )

# Tokenize

In [None]:
@run(cond=DO_TOKENIZE)
def tokenizer():
  return transformers.AutoTokenizer.from_pretrained(
    TOKENIZER_NAME,
    add_prefix_space=True,
  )

In [None]:
@run(cond=DO_TOKENIZE)
def dataset():
  dataset = datasets.load_dataset(
    REPOSITORY,
    revision=BRANCH('explode'),
  )
  dataset = dataset['train']
  return dataset

In [None]:
@run(cond=DO_TOKENIZE)
def dataset(dataset, /):
  def cond(inp):
    return inp['needs_completion']

  dataset = dataset.filter(cond)
  return dataset

In [None]:
@run(cond=DO_TOKENIZE)
def dataset(dataset, tokenizer, /):
  def each(inps):
    outs = {}
    def emit(*, n):
      outs.setdefault('n', []).append(n)

    for content in inps['content']:
      n = len(tokenizer.tokenize(content))

      emit(
        n=n,
      )

    return outs

  dataset = dataset.map(each, batched=True)
  return dataset

In [None]:
@run(cond=DO_TOKENIZE, splat=True)
def dataset(dataset, /):
  huggingface_hub.create_branch(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('tokenize'),
    revision=(
      'orphan'
    ),
    repo_type=(
      'dataset'
    ),
  )
  return locals()

In [None]:
@run(cond=DO_TOKENIZE)
def dataset(dataset, /):
  dataset.push_to_hub(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('tokenize'),
  )

# Complete

In [None]:
@run(cond=DO_COMPLETE)
def tokenizer():
  return transformers.AutoTokenizer.from_pretrained(
    TOKENIZER_NAME,
    add_prefix_space=True,
  )

In [None]:
@run(cond=DO_COMPLETE)
def model():
  model = transformers.AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=(
      torch.float16
      if torch.cuda.is_available() else
      torch.float32
    ),
  )
  return model

@run(cond=DO_COMPLETE)
def model(model, /):
  model = peft.PeftModel.from_pretrained(
    model,
    ADAPTER_NAME,
  )
  return model

In [None]:
@run(cond=DO_COMPLETE)
def pipeline(model, tokenizer, /):
  pipeline = transformers.pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
    device=(
      0   # GPU
      if torch.cuda.is_available() else
      -1  # CPU
    ),
  )
  return pipeline

In [None]:
@run(cond=DO_COMPLETE)
def dataset():
  dataset = datasets.load_dataset(
    REPOSITORY,
    revision=BRANCH('tokenize'),
  )
  dataset = dataset['train']
  return dataset

# @run(cond=DO_COMPLETE)
# def dataset(SHARDS, SHARD, dataset, /):
#   dataset = dataset.sort(column_names=['n'])
#   dataset = dataset.shard(SHARDS, SHARD, contiguous=True)
#   dataset = dataset.sort(column_names=['n'])  # paranoid
#   return dataset

# @run(cond=DO_COMPLETE)
# def dataset(MINISHARDS, MINISHARD, dataset, /):
#   dataset = dataset.sort(column_names=['n'], reverse=True)
#   dataset = dataset.shard(MINISHARDS, MINISHARD, contiguous=True)
#   dataset = dataset.sort(column_names=['n'], reverse=True)  # paranoid
#   return dataset

In [None]:
@run(cond=DO_COMPLETE)
def dataset(dataset, pipeline, /):
  CHUNKS = (
    10
  )

  outs = {}

  def emit(*, id, index, content):
    summary = content
    if len(summary) > 21:
      summary = f'{summary[:10]}..{summary[-10:]}'
    progress.set_postfix_str(f'{id}-{index}: {summary!r}')
    outs.setdefault('id', []).append(id)
    outs.setdefault('index', []).append(index)
    outs.setdefault('content', []).append(content)

  dataset = dataset.sort(column_names=['n'], reverse=True)

  the_dataset = dataset
  for chunk in tqdm(range(CHUNKS), unit='chunk', leave=False):
    dataset = the_dataset.shard(CHUNKS, chunk, contiguous=True)

    BATCH_SIZE = (
      4 + 4 * chunk
    )
    it = dataset
    # it = datasets.Dataset(it)
    it = transformers.pipelines.pt_utils.KeyDataset(it, 'content')
    it = pipeline(
      it,
      batch_size=(
        BATCH_SIZE
      ),
      max_new_tokens=256,
      do_sample=True,
      temperature=1.3,
      # return_text=True,
      return_full_text=False,
    )
    it = (x[0]['generated_text'] for x in it)
    it = zip(dataset['id'], dataset['index'], it)
    it = itertools.zip_longest(*( [iter(it)] * BATCH_SIZE ))
    it = (progress := tqdm(it, total=(len(dataset)+BATCH_SIZE-1)//BATCH_SIZE, unit='batch', leave=False))
    it = itertools.chain.from_iterable(it)
    it = (x for x in it if x is not None)
    for id, index, content in tqdm(it, total=len(dataset)):
      # print(f'\n{content}\n')

      emit(
        id=id,
        index=index,
        content=content,
      )

  return datasets.Dataset.from_dict(outs)

In [None]:
@run(cond=DO_COMPLETE, splat=True)
def dataset(dataset, /):
  huggingface_hub.create_branch(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('complete'),
    revision=(
      'orphan'
    ),
    repo_type=(
      'dataset'
    ),
  )
  return locals()

In [None]:
@run(cond=DO_COMPLETE)
def dataset(dataset, /):
  dataset.push_to_hub(
    REPOSITORY,
    branch=BRANCH('shuffle'),
  )

# Implode

In [None]:
@run(cond=DO_IMPLODE)
def dataset():
  dataset = datasets.load_dataset(
    REPOSITORY,
    revision=BRANCH('complete'),
  )
  dataset = datasets.concatenate_datasets(
    dsets=list(dataset.values()),
  )
  dataset = dataset.sort(column_names=['index', 'id'])
  return dataset

In [None]:
@run(cond=DO_IMPLODE, splat=True)
def lookup(dataset, /):
  seen = set()
  lookup = {}

  def emit(*, id, index, content):
    seen.add(id)
    lookup[(id, index)] = content

  for inp in dataset:
    emit(
      id=inp['id'],
      index=inp['index'],
      content=inp['content'],
    )

  return dict(
    seen=seen,
    lookup=lookup,
  )

In [None]:
@run(cond=DO_IMPLODE)
def dataset():
  dataset = datasets.load_dataset(
    REPOSITORY,
    revision=BRANCH('explode'),
  )
  dataset = dataset['train']
  return dataset

In [None]:
@run(cond=DO_IMPLODE)
def dataset(dataset, lookup, /):
  def each(inps):
    outs = {}
    def emit(*, id, index, role, content):
      outs.setdefault('id', []).append(id)
      outs.setdefault('index', []).append(index)
      outs.setdefault('role', []).append(role)
      outs.setdefault('content', []).append(content)

    it = ('id', 'index', 'role', 'content', 'needs_completion')
    it = (inps[k] for k in it)
    it = zip(*it)
    for id, index, role, content, needs_completion in it:
      if needs_completion:
        content = lookup.get((id, index), None)

      emit(
        id=id,
        index=index,
        role=role,
        content=content,
      )

    return outs

  dataset = dataset.map(each, batched=True, batch_size=None, remove_columns=['needs_completion'])
  return dataset

In [None]:
@run(cond=DO_IMPLODE)
def dataset(dataset, /):
  dataset = dataset.sort(column_names=['index', 'id'])
  return dataset

In [None]:
@run(cond=DO_IMPLODE)
def dataset(dataset, /):
  def each(inps):
    outs = {}

    keys = ['id', 'index', 'role', 'content']
    N = len(inps[keys[0]])
    it = ({ k: inps[k][i] for k in keys } for i in range(N))
    it = itertools.groupby(it, key=lambda inp: inp['id'])
    for id, it in it:
      conversation = []

      it = (x for x in it)
      for inp in it:
        role = inp['role']
        content = inp['content']

        conversation.append({
          'role': role,
          'content': content,
        })

      outs.setdefault('id', []).append(id)
      outs.setdefault('conversation', []).append(conversation)

    return outs

  dataset = dataset.map(each, batched=True, batch_size=None, remove_columns=['index', 'role', 'content', 'id'])
  return dataset

In [None]:
@run(cond=DO_IMPLODE)
def __create_branch(dataset, /):
  huggingface_hub.create_branch(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('implode'),
    revision=(
      'orphan'
    ),
    repo_type=(
      'dataset'
    ),
  )

In [None]:
@run(cond=DO_IMPLODE)
def __push_dataset(dataset, /):
  dataset.push_to_hub(
    REPOSITORY,
    branch=BRANCH('implode'),
  )

# Train

In [None]:
@run(cond=DO_TRAIN)
def model():
  return transformers.AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=(
      torch.float16
      if torch.cuda.is_available() else
      torch.float32
    ),
  )

In [None]:
@run(cond=DO_TRAIN)
def model(model, /):
  model = peft.get_peft_model(
    model,
    peft.LoraConfig(
      task_type='CAUSAL_LM',
      lora_alpha=32,
      lora_dropout=0.1,
    ),
  )
  model.print_trainable_parameters()
  return model

In [None]:
@run(cond=DO_TRAIN)
def tokenizer():
  return transformers.AutoTokenizer.from_pretrained(
    TOKENIZER_NAME,
    add_prefix_space=True,
  )

In [None]:
@run(cond=DO_TRAIN)
def dataset():
  dataset = datasets.load_dataset(
    REPOSITORY,
    revision=BRANCH('implode'),
  )
  dataset = dataset['train']
  return dataset

In [None]:
@run(cond=DO_TRAIN)
def dataset(dataset, /):
  def cond(inp):
    for message in inp['conversation']:
      content = message['content']

      if content is None:
        return False

    return True

  return dataset.filter(cond)

In [None]:
@run(cond=DO_TRAIN)
def dataset(dataset, tokenizer, /):
  def each(inps):
    outs = {}
    def emit(*, id, text):
      outs.setdefault('id', []).append(id)
      outs.setdefault('text', []).append(text)

    for id, conversation in zip(inps['id'], inps['conversation']):
      texts = []
      texts += [
        tokenizer.bos_token,
      ]

      for message in conversation:
        role = message['role']
        content = message['content']

        if role == 'user':
          texts += [
            tokenizer.bos_token,
            'USER: ',
            content,
            tokenizer.eos_token,
          ]

        elif role == 'apprentice':
          texts += [
            tokenizer.bos_token,
            'APPRENTICE: ',
            content,
            tokenizer.eos_token,
          ]

        elif role == 'master':
          texts += [
            tokenizer.bos_token,
            'MASTER: ',
            content,
            tokenizer.eos_token,
          ]

        else:
          raise ValueError(f'Unexpected role: {role!r}')

      emit(
        id=id,
        text=''.join(texts),
      )

    return outs

  return dataset.map(each, batched=True, batch_size=1, remove_columns=['id', 'conversation'])

In [None]:
@run(cond=DO_TRAIN)
def dataset(dataset, /):
  dataset = datasets.concatenate_datasets(
    dsets=[dataset] * TRAIN_REPLICAS,
  )

  dataset.shuffle(
    seed=1337,
  )

  return dataset

In [None]:
@run(cond=DO_TRAIN)
def dataset(dataset, /):
  """tokenize the dataset and split into 'contexts'"""
  def each(inps):
    outs = {}
    def emit(*, text):
      outs.setdefault('text', []).append(text)

    for text in inps['text']:
      emit(
        text=text,
      )

    emit(
      text=''.join(outs.pop('text')),
    )
    return outs

  return dataset.map(each, batched=True, batch_size=None, remove_columns=['id', 'text'])

In [None]:
@run(cond=DO_TRAIN)
def dataset(dataset, tokenizer, /):
  def each(inps):
    outs = {}
    def emit(*, text, input_ids):
      outs.setdefault('text', []).append(text)
      outs.setdefault('input_ids', []).append(input_ids)

    def chunk(size, arr):
      for i in range(0, len(arr)//size*size, size):
        yield arr[i:i+size]

    for text in inps['text']:
      input_ids = tokenizer.encode(text)
      for input_ids in chunk(TRAIN_CONTEXT_SIZE, input_ids):
        text = tokenizer.decode(input_ids)

        emit(
          text=text,
          input_ids=input_ids,
        )

    return outs

  return dataset.map(each, batched=True, batch_size=None, remove_columns=['text'])

In [None]:
@run(cond=DO_TRAIN)
def trainer(model, dataset, /):
  class Trainer(transformers.Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
      return model(
        input_ids=inputs["input_ids"],
        attention_mask=torch.ones_like(inputs["input_ids"]).bool(),
        labels=inputs["input_ids"],
      ).loss

  return Trainer(
    model=model,
    train_dataset=dataset,
    args=transformers.TrainingArguments(
      fp16=True,
      fp16_full_eval=True,
      output_dir='output',
      evaluation_strategy=(
        # 'steps'
        'no'
      ),
      eval_steps=1/10/2,
      save_strategy='steps',
      save_total_limit=3,
      save_steps=1/5,
      gradient_accumulation_steps=1,
      per_device_train_batch_size=1,
      per_device_eval_batch_size=1,
      learning_rate = TRAIN_LEARNING_RATE,
      num_train_epochs=1,
      # logging_first_step=True,
      # logging_steps=2*1/1000,
      eval_accumulation_steps=1,
    ),
  )

In [None]:
@run(cond=DO_TRAIN)
def __train(trainer, /):
  trainer.train(
  )

In [None]:
@run(cond=DO_TRAIN)
def __push_model(model, /):
  model.push_to_hub(
    REPOSITORY,
  )

In [None]:
@run(cond=DO_TRAIN)
def __create_branch():
  huggingface_hub.create_branch(
    repo_id=(
      REPOSITORY
    ),
    branch=BRANCH('train'),
    repo_type=(
      'model'
    ),
  )

# Test

In [None]:
@run(cond=DO_TEST)
def model():
  return transformers.AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=(
      torch.float16
      if torch.cuda.is_available() else
      torch.float32
    ),
  )

Downloading (…)lve/main/config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

In [None]:
@run(cond=DO_TEST)
def model(model, /):
  return peft.PeftModel.from_pretrained(
    model,
    REPOSITORY,
    revision=BRANCH('train'),
  )

Downloading adapter_model.bin:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

In [None]:
@run(cond=DO_TEST)
def tokenizer():
  return transformers.AutoTokenizer.from_pretrained(
    TOKENIZER_NAME,
    add_prefix_space=True,
  )

Downloading (…)okenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

In [None]:
@run(cond=DO_TEST)
def pipeline(model, tokenizer, /):
  return transformers.pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
  )

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'Pe

In [None]:
@run(cond=DO_TEST)
def __test(pipeline, /):
  def complete(text, *, temperature):
    completion = pipeline(
      text,
      temperature=temperature,
      return_full_text=False,
      max_new_tokens=256,
      do_sample=True,
    )
    completion = completion[0]
    completion = completion['generated_text']

    return completion

  def template(prompt, apprentice=None):
    text = f'<s><s>USER: {prompt}</s><s>APPRENTICE:'
    if apprentice is not None:
      text = f'{text} {apprentice}</s><s>MASTER:'
    return text

  prompt = (
    """In common usage, climate change describes global warming—the ongoing increase in global average temperature—and its effects on Earth's climate system. Climate change in a broader sense also includes previous long-term changes to Earth's climate. The current rise in global average temperature is more rapid than previous changes, and is primarily caused by humans burning fossil fuels.[2][3] Fossil fuel use, deforestation, and some agricultural and industrial practices increase greenhouse gases, notably carbon dioxide and methane.[4] Greenhouse gases absorb some of the heat that the Earth radiates after it warms from sunlight. Larger amounts of these gases trap more heat in Earth's lower atmosphere, causing global warming. """
    "What are the main causes and effects of climate change?"
  )

  import pprint; pprint.pprint({
    'prompt': prompt,
  })

  text = f'<s><s>USER: {prompt} Be concise.</s><s>APPRENTICE:'

  apprentice = complete(text, temperature=1.3)
  import pprint; pprint.pprint({
    'apprentice': apprentice,
  })

  text = f"""<s><s>USER: {prompt}</s><s>APPRENTICE: {apprentice}</s><s>MASTER: My apprentice's response isn't enough. The correct response is:"""

  master = complete(text, temperature=0.7)
  import pprint; pprint.pprint({
    'master': master,
  })

  return {
    'prompt': prompt,
    'apprentice': apprentice,
    'master': master,
  }


{'prompt': 'In common usage, climate change describes global warming—the '
           'ongoing increase in global average temperature—and its effects on '
           "Earth's climate system. Climate change in a broader sense also "
           "includes previous long-term changes to Earth's climate. The "
           'current rise in global average temperature is more rapid than '
           'previous changes, and is primarily caused by humans burning fossil '
           'fuels.[2][3] Fossil fuel use, deforestation, and some agricultural '
           'and industrial practices increase greenhouse gases, notably carbon '
           'dioxide and methane.[4] Greenhouse gases absorb some of the heat '
           'that the Earth radiates after it warms from sunlight. Larger '
           "amounts of these gases trap more heat in Earth's lower atmosphere, "
           'causing global warming. What are the main causes and effects of '
           'climate change?'}
{'apprentice': ' Question #1  Th

# (end)