In [1]:
import os
import random
import json
import pickle	

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from transformers import GPT2Tokenizer, GPT2LMHeadModel, utils, AutoTokenizer, GPTJForCausalLM

from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

# move workind directory to the root of the project
os.chdir("..")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_or_download_model(model_name="EleutherAI/gpt-j-6B", device = "cpu"):
    if not os.path.exists(f'./models/{model_name}'):
        os.makedirs(f'./models/{model_name}', exist_ok=True)

    TOKENIZER_PATH = f"./models/{model_name}/tokenizer.pt"
    MODEL_PATH = f"./models/{model_name}/model.pt"
    EMBEDDINGS_PATH = f"./models/{model_name}/embeddings.pt"

    # Load or Download Tokenizer
    if os.path.exists(TOKENIZER_PATH):
        print(f'Loading {model_name} tokenizer from local storage...')
        tokenizer = torch.load(TOKENIZER_PATH)
    else:
        print(f'Downloading {model_name} tokenizer...')
        tokenizer = AutoTokenizer.from_pretrained(f"{model_name}")
        torch.save(tokenizer, TOKENIZER_PATH)

    # Load or Download Model
    if os.path.exists(MODEL_PATH):
        print(f'Loading {model_name} model from local storage...')
        GPTmodel = torch.load(MODEL_PATH).to(device)
    else:
        print(f'Downloading {model_name} model...')
        GPTmodel = GPTJForCausalLM.from_pretrained(f"{model_name}").to(device)
        torch.save(GPTmodel, MODEL_PATH)
        
    GPTmodel.eval()

    # Save or Load Embeddings
    if os.path.exists(EMBEDDINGS_PATH):
        print(f'Loading {model_name} embeddings from local storage...')
        embeddings = torch.load(EMBEDDINGS_PATH).to(device)
    else:
        embeddings = GPTmodel.transformer.wte.weight.to(device)
        torch.save(embeddings, EMBEDDINGS_PATH)
        print(f"The {model_name} 'embeddings' tensor has been saved.")

    return tokenizer, GPTmodel, embeddings

# Call the function with desired model name
tokenizer, GPTmodel, embeddings = load_or_download_model(
    model_name="gpt2", device = "cpu")


Downloading gpt2 tokenizer...
Downloading gpt2 model...


You are using a model of type gpt2 to instantiate a model of type gptj. This is not supported for all configurations of models and can yield errors.
Some weights of GPTJForCausalLM were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.7.attn.k_proj.weight', 'h.8.attn.k_proj.weight', 'h.3.mlp.fc_in.bias', 'h.1.mlp.fc_in.bias', 'h.3.attn.k_proj.weight', 'h.6.mlp.fc_out.bias', 'h.11.attn.k_proj.weight', 'h.3.mlp.fc_in.weight', 'h.10.attn.q_proj.weight', 'h.4.attn.q_proj.weight', 'h.6.attn.k_proj.weight', 'h.0.attn.k_proj.weight', 'h.3.mlp.fc_out.weight', 'h.7.mlp.fc_in.bias', 'h.3.mlp.fc_out.bias', 'h.5.attn.out_proj.weight', 'h.1.mlp.fc_in.weight', 'h.7.attn.out_proj.weight', 'lm_head.bias', 'h.2.mlp.fc_in.weight', 'h.0.mlp.fc_in.weight', 'h.4.attn.k_proj.weight', 'h.5.attn.v_proj.weight', 'h.0.mlp.fc_out.bias', 'h.0.attn.v_proj.weight', 'h.2.attn.k_proj.weight', 'h.0.attn.out_proj.weight', 'h.5.attn.k_proj.weight', 'h.1.attn.q_proj.weight', 'h.8.attn.out_

The gpt2 'embeddings' tensor has been saved.
