# Mistral

In [6]:
import transformer_lens
from pathlib import Path
import pickle
from torch import nn
import json
from pprint import pp
from transformers import AutoConfig, AutoModelForCausalLM

from transformer_lens.loading_from_pretrained import STANFORD_CRFM_CHECKPOINTS 
from dotenv import load_dotenv
import torch

load_dotenv();

In [19]:
import os
from typing import Literal, Union

gpt2_mediums = [
    "stanford-crfm/arwen-gpt2-medium-x21",
    "stanford-crfm/beren-gpt2-medium-x49",
    "stanford-crfm/celebrimbor-gpt2-medium-x81",
    "stanford-crfm/durin-gpt2-medium-x343",
    "stanford-crfm/eowyn-gpt2-medium-x777",
]

gpt2_smalls = [
    "stanford-crfm/alias-gpt2-small-x21",
    "stanford-crfm/battlestar-gpt2-small-x49",
    "stanford-crfm/caprica-gpt2-small-x81",
    "stanford-crfm/darkmatter-gpt2-small-x343",
    "stanford-crfm/expanse-gpt2-small-x777",
]

GPT2SmallRunName = Literal["alias", "battlestar", "caprica", "darkmatter", "expanse"]
GPT2MediumRunName = Literal["arwen", "beren", "celebrimbor", "durin", "eowyn"]
GPT2RunName = Union[GPT2SmallRunName, GPT2MediumRunName]
GPT2Size = Literal["small", "medium"]

GPT2_NAME: GPT2RunName = 'alias'
GPT2_SIZE: GPT2Size = 'small'

def get_full_name(name):
    size = {'arwen': 'medium', 'beren': 'medium', 'celebrimbor': 'medium', 'durin': 'medium', 'eowyn': 'medium', 'alias': 'small', 'battlestar': 'small', 'caprica': 'small', 'darkmatter': 'small', 'expanse': 'small'}[name]
    ext = {'arwen': 21, 'beren': 49, 'celebrimbor': 81, 'durin': 343, 'eowyn': 777, 'alias': 21, 'battlestar': 49, 'caprica': 81, 'darkmatter': 343, 'expanse': 777}[name]
    return f"stanford-crfm/{name}-gpt2-{size}-x{ext}"

In [26]:
import tqdm

def retrieve_checkpoint(name, step=400_000):
    """Retrieve checkpoint from AWS. If not found, retrieve from HuggingFace."""
    import boto3
    import botocore

    full_name = get_full_name(name)
    prefix = f"{full_name}/{step}"

    s3 = boto3.resource("s3")
    bucket = s3.Bucket(os.environ['AWS_LANGUAGE_BUCKET_NAME'])
    checkpoints_path = Path(f"../checkpoints/{prefix}")

    if not checkpoints_path.exists():
        checkpoints_path.mkdir(parents=True)
    try: 
        bucket.download_file(f"checkpoints/{prefix}/pytorch_model.bin", str(checkpoints_path / "pytorch_model.bin"))
        print("Done.")
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print(f"Checkpoint {prefix} not found on AWS. Retrieving from HuggingFace.")
            model = AutoModelForCausalLM.from_pretrained(full_name, revision=f'checkpoint-{step}', torch_dtype=torch.float32)
            print("Saving HF model to disk...")
            model.save_pretrained(checkpoints_path)

            del model

            print("Uploading HF model to AWS...")
            for file in checkpoints_path.glob("*"):
                if file.is_file():
                    print(f"Uploading {file}...")
                    bucket.upload_file(file, f"checkpoints/{prefix}/{file.name}")

            print("Done.")
        else:
            raise


def load_checkpoint(name, step=400_000):
    """Load checkpoint from local storage. If not found, retrieve from AWS or HF."""
    full_name = get_full_name(name)
    checkpoint_path = Path(f"../checkpoints/{full_name}/{step}")

    if not checkpoint_path.exists():
        print("Retrieving checkpoint from AWS. This may take a while.")
        retrieve_checkpoint(name, step)

    config = AutoConfig.from_pretrained(full_name, torch_dtype=torch.float32)

    print(f"Loading checkpoint from disk {checkpoint_path}...")
    hf_model = AutoModelForCausalLM.from_pretrained(
        checkpoint_path,
        revision=f'checkpoint-{step}',
        config=config,
        torch_dtype=torch.float32,
        # **kwargs,
    )

    model = transformer_lens.HookedTransformer.from_pretrained(full_name, hf_model=hf_model)

    return model, hf_model


def retrieve_checkpoints(name, steps=None):
    """Retrieve multiple checkpoints from AWS/HuggingFace.
    Defaults to loading all checkpoints for a given training run."""
    full_name = get_full_name(name)
    steps = steps or STANFORD_CRFM_CHECKPOINTS[1:]  # Step 0 is not available. TODO: Figure out how to initialize this. 
    
    for step in tqdm.tqdm(steps, desc=f"Retrieving {name} checkpoints"):
        if os.path.exists(f"../checkpoints/{full_name}/{step}/pytorch_model.bin"):
            continue

        retrieve_checkpoint(name, step)


retrieve_checkpoints('alias', STANFORD_CRFM_CHECKPOINTS[1::10])

Retrieving alias checkpoints:   0%|          | 0/61 [00:00<?, ?it/s]

Checkpoint stanford-crfm/alias-gpt2-small-x21/6300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/6300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/6300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/6300/pytorch_model.bin...


Retrieving alias checkpoints:  16%|█▋        | 10/61 [00:27<02:21,  2.78s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/7300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/7300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/7300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/7300/pytorch_model.bin...


Retrieving alias checkpoints:  18%|█▊        | 11/61 [00:52<04:36,  5.53s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/8300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/8300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/8300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/8300/pytorch_model.bin...


Retrieving alias checkpoints:  20%|█▉        | 12/61 [01:16<06:45,  8.27s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/9300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/9300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/9300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/9300/pytorch_model.bin...


Retrieving alias checkpoints:  21%|██▏       | 13/61 [01:37<08:32, 10.67s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/10300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/10300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/10300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/10300/pytorch_model.bin...


Retrieving alias checkpoints:  23%|██▎       | 14/61 [02:02<10:35, 13.51s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/11300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/11300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/11300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/11300/pytorch_model.bin...


Retrieving alias checkpoints:  25%|██▍       | 15/61 [02:29<12:34, 16.41s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/12300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/12300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/12300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/12300/pytorch_model.bin...


Retrieving alias checkpoints:  26%|██▌       | 16/61 [02:56<14:23, 19.19s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/13300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/13300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/13300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/13300/pytorch_model.bin...


Retrieving alias checkpoints:  28%|██▊       | 17/61 [03:24<15:39, 21.35s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/14300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/14300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/14300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/14300/pytorch_model.bin...


Retrieving alias checkpoints:  30%|██▉       | 18/61 [03:52<16:37, 23.20s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/15300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/15300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/15300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/15300/pytorch_model.bin...


Retrieving alias checkpoints:  31%|███       | 19/61 [04:12<15:39, 22.36s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/16300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/16300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/16300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/16300/pytorch_model.bin...


Retrieving alias checkpoints:  33%|███▎      | 20/61 [04:39<16:03, 23.49s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/17300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/17300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/17300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/17300/pytorch_model.bin...


Retrieving alias checkpoints:  34%|███▍      | 21/61 [05:04<15:58, 23.97s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/18300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/18300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/18300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/18300/pytorch_model.bin...


Retrieving alias checkpoints:  36%|███▌      | 22/61 [05:21<14:14, 21.92s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/19300 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/19300/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/19300/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/19300/pytorch_model.bin...


Retrieving alias checkpoints:  38%|███▊      | 23/61 [05:49<15:03, 23.77s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/23000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/23000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/23000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/23000/pytorch_model.bin...


Retrieving alias checkpoints:  39%|███▉      | 24/61 [06:16<15:15, 24.75s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/33000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/33000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/33000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/33000/pytorch_model.bin...


Retrieving alias checkpoints:  41%|████      | 25/61 [06:39<14:33, 24.26s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/43000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/43000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/43000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/43000/pytorch_model.bin...


Retrieving alias checkpoints:  43%|████▎     | 26/61 [07:09<15:10, 26.00s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/53000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/53000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/53000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/53000/pytorch_model.bin...


Retrieving alias checkpoints:  44%|████▍     | 27/61 [07:27<13:17, 23.46s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/63000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/63000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/63000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/63000/pytorch_model.bin...


Retrieving alias checkpoints:  46%|████▌     | 28/61 [07:54<13:28, 24.50s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/73000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/73000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/73000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/73000/pytorch_model.bin...


Retrieving alias checkpoints:  48%|████▊     | 29/61 [08:20<13:19, 24.99s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/83000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/83000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/83000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/83000/pytorch_model.bin...


Retrieving alias checkpoints:  49%|████▉     | 30/61 [08:40<12:13, 23.66s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/93000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/93000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/93000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/93000/pytorch_model.bin...


Retrieving alias checkpoints:  51%|█████     | 31/61 [09:10<12:40, 25.34s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/103000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/103000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/103000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/103000/pytorch_model.bin...


Retrieving alias checkpoints:  52%|█████▏    | 32/61 [09:27<11:07, 23.01s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/113000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/113000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/113000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/113000/pytorch_model.bin...


Retrieving alias checkpoints:  54%|█████▍    | 33/61 [09:57<11:43, 25.11s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/123000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/123000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/123000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/123000/pytorch_model.bin...


Retrieving alias checkpoints:  56%|█████▌    | 34/61 [10:23<11:23, 25.33s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/133000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/133000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/133000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/133000/pytorch_model.bin...


Retrieving alias checkpoints:  57%|█████▋    | 35/61 [10:48<10:58, 25.33s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/143000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/143000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/143000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/143000/pytorch_model.bin...


Retrieving alias checkpoints:  59%|█████▉    | 36/61 [11:15<10:40, 25.62s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/153000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/153000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/153000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/153000/pytorch_model.bin...


Retrieving alias checkpoints:  61%|██████    | 37/61 [11:40<10:14, 25.60s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/163000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/163000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/163000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/163000/pytorch_model.bin...


Retrieving alias checkpoints:  62%|██████▏   | 38/61 [12:10<10:15, 26.76s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/173000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/173000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/173000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/173000/pytorch_model.bin...


Retrieving alias checkpoints:  64%|██████▍   | 39/61 [12:36<09:44, 26.58s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/183000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/183000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/183000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/183000/pytorch_model.bin...


Retrieving alias checkpoints:  66%|██████▌   | 40/61 [13:02<09:14, 26.41s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/193000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/193000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/193000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/193000/pytorch_model.bin...


Retrieving alias checkpoints:  67%|██████▋   | 41/61 [13:34<09:22, 28.13s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/203000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/203000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/203000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/203000/pytorch_model.bin...


Retrieving alias checkpoints:  69%|██████▉   | 42/61 [13:55<08:11, 25.88s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/213000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/213000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/213000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/213000/pytorch_model.bin...


Retrieving alias checkpoints:  70%|███████   | 43/61 [14:29<08:30, 28.36s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/223000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/223000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/223000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/223000/pytorch_model.bin...


Retrieving alias checkpoints:  72%|███████▏  | 44/61 [14:55<07:50, 27.67s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/233000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/233000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/233000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/233000/pytorch_model.bin...


Retrieving alias checkpoints:  74%|███████▍  | 45/61 [15:20<07:08, 26.79s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/243000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/243000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/243000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/243000/pytorch_model.bin...


Retrieving alias checkpoints:  75%|███████▌  | 46/61 [15:44<06:32, 26.15s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/253000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/253000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/253000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/253000/pytorch_model.bin...


Retrieving alias checkpoints:  77%|███████▋  | 47/61 [16:11<06:07, 26.28s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/263000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/263000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/263000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/263000/pytorch_model.bin...


Retrieving alias checkpoints:  79%|███████▊  | 48/61 [16:37<05:41, 26.26s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/273000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/273000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/273000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/273000/pytorch_model.bin...


Retrieving alias checkpoints:  80%|████████  | 49/61 [17:03<05:12, 26.06s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/283000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/283000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/283000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/283000/pytorch_model.bin...


Retrieving alias checkpoints:  82%|████████▏ | 50/61 [17:31<04:55, 26.83s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/293000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/293000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/293000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/293000/pytorch_model.bin...


Retrieving alias checkpoints:  84%|████████▎ | 51/61 [17:55<04:18, 25.81s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/303000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/303000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/303000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/303000/pytorch_model.bin...


Retrieving alias checkpoints:  85%|████████▌ | 52/61 [18:25<04:03, 27.02s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/313000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/313000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/313000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/313000/pytorch_model.bin...


Retrieving alias checkpoints:  87%|████████▋ | 53/61 [18:52<03:36, 27.12s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/323000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/323000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/323000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/323000/pytorch_model.bin...


Retrieving alias checkpoints:  89%|████████▊ | 54/61 [19:18<03:07, 26.74s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/333000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/333000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/333000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/333000/pytorch_model.bin...


Retrieving alias checkpoints:  90%|█████████ | 55/61 [19:43<02:36, 26.14s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/343000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/343000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/343000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/343000/pytorch_model.bin...


Retrieving alias checkpoints:  92%|█████████▏| 56/61 [20:08<02:09, 25.81s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/353000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/353000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/353000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/353000/pytorch_model.bin...


Retrieving alias checkpoints:  93%|█████████▎| 57/61 [20:32<01:41, 25.39s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/363000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/363000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/363000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/363000/pytorch_model.bin...


Retrieving alias checkpoints:  95%|█████████▌| 58/61 [21:03<01:20, 26.97s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/373000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/373000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/373000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/373000/pytorch_model.bin...


Retrieving alias checkpoints:  97%|█████████▋| 59/61 [21:21<00:48, 24.34s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/383000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/383000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/383000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/383000/pytorch_model.bin...


Retrieving alias checkpoints:  98%|█████████▊| 60/61 [21:46<00:24, 24.68s/it]

Done.
Checkpoint stanford-crfm/alias-gpt2-small-x21/393000 not found on AWS. Retrieving from HuggingFace.


Downloading pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Saving HF model to disk...
Uploading HF model to AWS...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/393000/config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/393000/generation_config.json...
Uploading ../checkpoints/stanford-crfm/alias-gpt2-small-x21/393000/pytorch_model.bin...


Retrieving alias checkpoints: 100%|██████████| 61/61 [22:12<00:00, 21.84s/it]

Done.





In [None]:
from datasets import load_dataset

dataset = load_dataset("tasksource/mmlu")