# Mistral

In [6]:
import transformer_lens
from pathlib import Path
import pickle
from torch import nn
import json
from pprint import pp
from transformers import AutoConfig, AutoModelForCausalLM

from transformer_lens.loading_from_pretrained import STANFORD_CRFM_CHECKPOINTS 
from dotenv import load_dotenv
import torch

load_dotenv();

In [19]:
import os
from typing import Literal, Union

gpt2_mediums = [
    "stanford-crfm/arwen-gpt2-medium-x21",
    "stanford-crfm/beren-gpt2-medium-x49",
    "stanford-crfm/celebrimbor-gpt2-medium-x81",
    "stanford-crfm/durin-gpt2-medium-x343",
    "stanford-crfm/eowyn-gpt2-medium-x777",
]

gpt2_smalls = [
    "stanford-crfm/alias-gpt2-small-x21",
    "stanford-crfm/battlestar-gpt2-small-x49",
    "stanford-crfm/caprica-gpt2-small-x81",
    "stanford-crfm/darkmatter-gpt2-small-x343",
    "stanford-crfm/expanse-gpt2-small-x777",
]

GPT2SmallRunName = Literal["alias", "battlestar", "caprica", "darkmatter", "expanse"]
GPT2MediumRunName = Literal["arwen", "beren", "celebrimbor", "durin", "eowyn"]
GPT2RunName = Union[GPT2SmallRunName, GPT2MediumRunName]
GPT2Size = Literal["small", "medium"]

GPT2_NAME: GPT2RunName = 'alias'
GPT2_SIZE: GPT2Size = 'small'

def get_full_name(name):
    size = {'arwen': 'medium', 'beren': 'medium', 'celebrimbor': 'medium', 'durin': 'medium', 'eowyn': 'medium', 'alias': 'small', 'battlestar': 'small', 'caprica': 'small', 'darkmatter': 'small', 'expanse': 'small'}[name]
    ext = {'arwen': 21, 'beren': 49, 'celebrimbor': 81, 'durin': 343, 'eowyn': 777, 'alias': 21, 'battlestar': 49, 'caprica': 81, 'darkmatter': 343, 'expanse': 777}[name]
    return f"stanford-crfm/{name}-gpt2-{size}-x{ext}"

In [25]:
import tqdm

def retrieve_checkpoint(name, step=400_000):
    """Retrieve checkpoint from AWS. If not found, retrieve from HuggingFace."""
    import boto3
    import botocore

    full_name = get_full_name(name)
    prefix = f"{full_name}/{step}"

    s3 = boto3.resource("s3")
    bucket = s3.Bucket(os.environ['AWS_LANGUAGE_BUCKET_NAME'])
    checkpoints_path = Path(f"../checkpoints/{prefix}")

    if not checkpoints_path.exists():
        checkpoints_path.mkdir(parents=True)
    try: 
        bucket.download_file(f"checkpoints/{prefix}/pytorch_model.bin", str(checkpoints_path / "pytorch_model.bin"))
        print("Done.")
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print(f"Checkpoint {prefix} not found on AWS. Retrieving from HuggingFace.")
            model = AutoModelForCausalLM.from_pretrained(full_name, revision=f'checkpoint-{step}', torch_dtype=torch.float32)
            print("Saving HF model to disk...")
            model.save_pretrained(checkpoints_path)

            del model

            print("Uploading HF model to AWS...")
            for file in checkpoints_path.glob("*"):
                if file.is_file():
                    print(f"Uploading {file}...")
                    bucket.upload_file(file, f"checkpoints/{prefix}/{file.name}")

            print("Done.")
        else:
            raise


def load_checkpoint(name, step=400_000):
    """Load checkpoint from local storage. If not found, retrieve from AWS or HF."""
    full_name = get_full_name(name)
    checkpoint_path = Path(f"../checkpoints/{full_name}/{step}")

    if not checkpoint_path.exists():
        print("Retrieving checkpoint from AWS. This may take a while.")
        retrieve_checkpoint(name, step)

    config = AutoConfig.from_pretrained(full_name, torch_dtype=torch.float32)

    print(f"Loading checkpoint from disk {checkpoint_path}...")
    hf_model = AutoModelForCausalLM.from_pretrained(
        checkpoint_path,
        revision=f'checkpoint-{step}',
        config=config,
        torch_dtype=torch.float32,
        # **kwargs,
    )

    model = transformer_lens.HookedTransformer.from_pretrained(full_name, hf_model=hf_model)

    return model, hf_model


def retrieve_checkpoints(name, steps=None):
    """Retrieve multiple checkpoints from AWS/HuggingFace.
    Defaults to loading all checkpoints for a given training run."""
    full_name = get_full_name(name)
    steps = steps or STANFORD_CRFM_CHECKPOINTS[1:]  # Step 0 is not available. TODO: Figure out how to initialize this. 
    
    for step in tqdm.tqdm(steps, desc=f"Retrieving {name} checkpoints"):
        if os.path.exists(f"../checkpoints/{full_name}/{step}/pytorch_model.bin"):
            continue

        retrieve_checkpoint(name, step)


retrieve_checkpoints('alias')

Retrieving alias checkpoints:   0%|          | 0/608 [00:00<?, ?it/s]

Checkpoint stanford-crfm/alias-gpt2-small-x21/10 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/10/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/10/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/10/pytorch_model.bin...


Retrieving alias checkpoints:   0%|          | 1/608 [00:21<3:40:35, 21.80s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/20 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/20/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/20/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/20/pytorch_model.bin...


Retrieving alias checkpoints:   0%|          | 2/608 [00:40<3:20:37, 19.86s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/30 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/30/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/30/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/30/pytorch_model.bin...


Retrieving alias checkpoints:   0%|          | 3/608 [00:54<2:53:26, 17.20s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/40 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/40/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/40/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/40/pytorch_model.bin...


Retrieving alias checkpoints:   1%|          | 4/608 [01:14<3:03:18, 18.21s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/50 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/50/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/50/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/50/pytorch_model.bin...


Retrieving alias checkpoints:   1%|          | 5/608 [01:27<2:46:34, 16.57s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/60 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/60/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/60/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/60/pytorch_model.bin...


Retrieving alias checkpoints:   1%|          | 6/608 [01:45<2:51:17, 17.07s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/70 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/70/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/70/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/70/pytorch_model.bin...


Retrieving alias checkpoints:   1%|          | 7/608 [01:58<2:35:32, 15.53s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/80 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/80/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/80/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/80/pytorch_model.bin...


Retrieving alias checkpoints:   1%|▏         | 8/608 [02:16<2:44:49, 16.48s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/90 not found on AWS. Retrieving from HuggingFace.
Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/90/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/90/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/90/pytorch_model.bin...


Retrieving alias checkpoints:   1%|▏         | 9/608 [02:37<2:57:27, 17.77s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/100 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/100/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/100/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/100/pytorch_model.bin...


Retrieving alias checkpoints:   2%|▏         | 10/608 [03:04<3:25:29, 20.62s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/150 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/150/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/150/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/150/pytorch_model.bin...


Retrieving alias checkpoints:   2%|▏         | 11/608 [03:23<3:19:44, 20.07s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/200 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/200/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/200/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/200/pytorch_model.bin...


Retrieving alias checkpoints:   2%|▏         | 12/608 [03:52<3:46:33, 22.81s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/250 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/250/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/250/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/250/pytorch_model.bin...


Retrieving alias checkpoints:   2%|▏         | 13/608 [04:21<4:06:52, 24.89s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/300/pytorch_model.bin...


Retrieving alias checkpoints:   2%|▏         | 14/608 [04:47<4:07:46, 25.03s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/350 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/350/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/350/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/350/pytorch_model.bin...


Retrieving alias checkpoints:   2%|▏         | 15/608 [05:15<4:17:13, 26.03s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/400 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/400/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/400/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/400/pytorch_model.bin...


Retrieving alias checkpoints:   3%|▎         | 16/608 [05:43<4:21:41, 26.52s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/450 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/450/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/450/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/450/pytorch_model.bin...


Retrieving alias checkpoints:   3%|▎         | 17/608 [06:02<4:01:06, 24.48s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/500 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/500/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/500/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/500/pytorch_model.bin...


Retrieving alias checkpoints:   3%|▎         | 18/608 [06:29<4:05:59, 25.02s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/550 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/550/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/550/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/550/pytorch_model.bin...


Retrieving alias checkpoints:   3%|▎         | 19/608 [06:55<4:10:25, 25.51s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/600 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/600/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/600/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/600/pytorch_model.bin...


Retrieving alias checkpoints:   3%|▎         | 20/608 [07:22<4:11:48, 25.70s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/650 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/650/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/650/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/650/pytorch_model.bin...


Retrieving alias checkpoints:   3%|▎         | 21/608 [07:48<4:12:55, 25.85s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/700 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/700/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/700/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/700/pytorch_model.bin...


Retrieving alias checkpoints:   4%|▎         | 22/608 [08:13<4:09:51, 25.58s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/750 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/750/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/750/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/750/pytorch_model.bin...


Retrieving alias checkpoints:   4%|▍         | 23/608 [08:40<4:13:56, 26.04s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/800 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/800/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/800/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/800/pytorch_model.bin...


Retrieving alias checkpoints:   4%|▍         | 24/608 [09:07<4:15:52, 26.29s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/850 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/850/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/850/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/850/pytorch_model.bin...


Retrieving alias checkpoints:   4%|▍         | 25/608 [09:35<4:20:22, 26.80s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/900 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/900/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/900/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/900/pytorch_model.bin...


Retrieving alias checkpoints:   4%|▍         | 26/608 [10:01<4:18:22, 26.64s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/950 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/950/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/950/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/950/pytorch_model.bin...


Retrieving alias checkpoints:   4%|▍         | 27/608 [10:29<4:20:43, 26.92s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1000/pytorch_model.bin...


Retrieving alias checkpoints:   5%|▍         | 28/608 [10:46<3:54:00, 24.21s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1050 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1050/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1050/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1050/pytorch_model.bin...


Retrieving alias checkpoints:   5%|▍         | 29/608 [11:07<3:43:29, 23.16s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1100 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1100/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1100/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1100/pytorch_model.bin...


Retrieving alias checkpoints:   5%|▍         | 30/608 [11:34<3:53:58, 24.29s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1150 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1150/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1150/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1150/pytorch_model.bin...


Retrieving alias checkpoints:   5%|▌         | 31/608 [12:02<4:04:31, 25.43s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1200 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1200/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1200/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1200/pytorch_model.bin...


Retrieving alias checkpoints:   5%|▌         | 32/608 [12:24<3:53:01, 24.27s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1250 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1250/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1250/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1250/pytorch_model.bin...


Retrieving alias checkpoints:   5%|▌         | 33/608 [12:52<4:05:21, 25.60s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1300/pytorch_model.bin...


Retrieving alias checkpoints:   6%|▌         | 34/608 [13:20<4:11:30, 26.29s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1350 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1350/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1350/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1350/pytorch_model.bin...


Retrieving alias checkpoints:   6%|▌         | 35/608 [13:42<3:59:02, 25.03s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1400 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1400/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1400/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1400/pytorch_model.bin...


Retrieving alias checkpoints:   6%|▌         | 36/608 [14:08<4:00:59, 25.28s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1450 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1450/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1450/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1450/pytorch_model.bin...


Retrieving alias checkpoints:   6%|▌         | 37/608 [14:33<3:58:24, 25.05s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1500 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1500/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1500/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1500/pytorch_model.bin...


Retrieving alias checkpoints:   6%|▋         | 38/608 [14:56<3:52:04, 24.43s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1550 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1550/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1550/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1550/pytorch_model.bin...


Retrieving alias checkpoints:   6%|▋         | 39/608 [15:14<3:35:27, 22.72s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1600 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1600/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1600/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1600/pytorch_model.bin...


Retrieving alias checkpoints:   7%|▋         | 40/608 [15:36<3:32:50, 22.48s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1650 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1650/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1650/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1650/pytorch_model.bin...


Retrieving alias checkpoints:   7%|▋         | 41/608 [16:08<3:58:19, 25.22s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1700 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1700/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1700/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1700/pytorch_model.bin...


Retrieving alias checkpoints:   7%|▋         | 42/608 [16:38<4:11:39, 26.68s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1750 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1750/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1750/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1750/pytorch_model.bin...


Retrieving alias checkpoints:   7%|▋         | 43/608 [17:03<4:04:54, 26.01s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1800 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1800/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1800/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1800/pytorch_model.bin...


Retrieving alias checkpoints:   7%|▋         | 44/608 [17:30<4:08:09, 26.40s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1850 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1850/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1850/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1850/pytorch_model.bin...


Retrieving alias checkpoints:   7%|▋         | 45/608 [17:48<3:45:31, 24.04s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1900 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1900/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1900/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1900/pytorch_model.bin...


Retrieving alias checkpoints:   8%|▊         | 46/608 [18:13<3:46:54, 24.23s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/1950 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1950/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1950/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/1950/pytorch_model.bin...


Retrieving alias checkpoints:   8%|▊         | 47/608 [18:31<3:29:15, 22.38s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2000/pytorch_model.bin...


Retrieving alias checkpoints:   8%|▊         | 48/608 [18:58<3:42:48, 23.87s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2100 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2100/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2100/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2100/pytorch_model.bin...


Retrieving alias checkpoints:   8%|▊         | 49/608 [19:24<3:46:03, 24.26s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2200 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2200/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2200/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2200/pytorch_model.bin...


Retrieving alias checkpoints:   8%|▊         | 50/608 [19:49<3:50:02, 24.74s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2300/pytorch_model.bin...


Retrieving alias checkpoints:   8%|▊         | 51/608 [20:11<3:41:03, 23.81s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2400 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2400/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2400/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2400/pytorch_model.bin...


Retrieving alias checkpoints:   9%|▊         | 52/608 [20:38<3:49:06, 24.72s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2500 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2500/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2500/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2500/pytorch_model.bin...


Retrieving alias checkpoints:   9%|▊         | 53/608 [21:04<3:51:53, 25.07s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2600 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2600/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2600/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2600/pytorch_model.bin...


Retrieving alias checkpoints:   9%|▉         | 54/608 [21:30<3:55:06, 25.46s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2700 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2700/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2700/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2700/pytorch_model.bin...


Retrieving alias checkpoints:   9%|▉         | 55/608 [21:58<3:59:54, 26.03s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2800 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2800/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2800/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2800/pytorch_model.bin...


Retrieving alias checkpoints:   9%|▉         | 56/608 [22:25<4:03:47, 26.50s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/2900 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2900/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2900/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/2900/pytorch_model.bin...


Retrieving alias checkpoints:   9%|▉         | 57/608 [22:53<4:05:53, 26.78s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3000/pytorch_model.bin...


Retrieving alias checkpoints:  10%|▉         | 58/608 [23:24<4:19:04, 28.26s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3100 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3100/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3100/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3100/pytorch_model.bin...


Retrieving alias checkpoints:  10%|▉         | 59/608 [23:50<4:10:15, 27.35s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3200 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3200/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3200/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3200/pytorch_model.bin...


Retrieving alias checkpoints:  10%|▉         | 60/608 [24:05<3:36:55, 23.75s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3300/pytorch_model.bin...


Retrieving alias checkpoints:  10%|█         | 61/608 [24:25<3:25:26, 22.53s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3400 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3400/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3400/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3400/pytorch_model.bin...


Retrieving alias checkpoints:  10%|█         | 62/608 [24:51<3:36:17, 23.77s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3500 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3500/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3500/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3500/pytorch_model.bin...


Retrieving alias checkpoints:  10%|█         | 63/608 [25:19<3:47:59, 25.10s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3600 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3600/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3600/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3600/pytorch_model.bin...


Retrieving alias checkpoints:  11%|█         | 64/608 [25:46<3:50:21, 25.41s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3700 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3700/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3700/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3700/pytorch_model.bin...


Retrieving alias checkpoints:  11%|█         | 65/608 [26:12<3:53:18, 25.78s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3800 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3800/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3800/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3800/pytorch_model.bin...


Retrieving alias checkpoints:  11%|█         | 66/608 [26:42<4:02:52, 26.89s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/3900 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3900/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3900/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/3900/pytorch_model.bin...


Retrieving alias checkpoints:  11%|█         | 67/608 [27:08<4:01:38, 26.80s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4000/pytorch_model.bin...


Retrieving alias checkpoints:  11%|█         | 68/608 [27:36<4:04:40, 27.19s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4100 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4100/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4100/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4100/pytorch_model.bin...


Retrieving alias checkpoints:  11%|█▏        | 69/608 [28:02<3:58:48, 26.58s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4200 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4200/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4200/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4200/pytorch_model.bin...


Retrieving alias checkpoints:  12%|█▏        | 70/608 [28:28<3:56:50, 26.41s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4300/pytorch_model.bin...


Retrieving alias checkpoints:  12%|█▏        | 71/608 [28:53<3:52:37, 25.99s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4400 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4400/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4400/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4400/pytorch_model.bin...


Retrieving alias checkpoints:  12%|█▏        | 72/608 [29:15<3:41:21, 24.78s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4500 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4500/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4500/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4500/pytorch_model.bin...


Retrieving alias checkpoints:  12%|█▏        | 73/608 [29:42<3:47:40, 25.53s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4600 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4600/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4600/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4600/pytorch_model.bin...


Retrieving alias checkpoints:  12%|█▏        | 74/608 [30:00<3:28:52, 23.47s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4700 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4700/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4700/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/4700/pytorch_model.bin...


Retrieving alias checkpoints:  12%|█▏        | 75/608 [30:31<3:48:36, 25.73s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/4800 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

In [None]:
from datasets import load_dataset

dataset = load_dataset("tasksource/mmlu")