In [17]:
from huggingface_hub import list_repo_files
import os
from dotenv import load_dotenv
from tqdm import tqdm
from huggingface_hub import CommitOperationCopy,CommitOperationDelete, create_commit

repo_id = "microsoft/Dayhoff"
repo_type = "model"

load_dotenv()
files = list_repo_files(repo_id, repo_type=repo_type,token=os.getenv("HF_TOKEN"))



In [18]:
set([i.split('/')[0] for i in files if "gener" in i])

{'170m-GGR',
 '170m-UR50',
 '170m-UR50-BBR-n',
 '170m-UR50-BBR-s',
 '170m-UR50-BBR-u',
 '170m-UR90',
 '3b-GGR-MSA',
 '3b-UR90',
 '3b-cooled'}

In [13]:
model_old2new = {
 'jamba-170m-10mnofilter-36w':'170m-UR50-BBR-u',
 'jamba-170m-10mnovelty-36w':'170m-UR50-BBR-n',
 'jamba-170m-gigaclust-36w':'170m-GGR',
 'jamba-170m-seq-36w':'170m-UR50',
 'jamba-170m-seqsam-36w':'170m-UR90',
 'jamba-3b-cooldown7':'3b-cooled',
 'jamba-3b-indel-gigaclust-120k-2':'3b-GGR-MSA',
 'jamba-3b-seq-sam-biar-fsdp-tok90k':'3b-UR90',
 'jamba-170m-10mrmsd-36w':'170m-UR50-BBR-s'}

In [4]:
copy_operations = []
delete_operations = []

files = [i for i in files if 'jamba-3b-seq-sam-biar-fsdp-tok90k' in i]
for file in tqdm(files):
    model_name = file.split('/')[0]
    if model_name in model_old2new:
        if model_old2new[model_name] == '':
            continue
        new_file = file.replace(model_name, model_old2new[model_name])
        # print(file)
        # print(new_file)
        copy_operations.append(CommitOperationCopy(
            src_path_in_repo =file,
            path_in_repo =new_file
        ))
        delete_operations.append(CommitOperationDelete(
            path_in_repo =file
        ))
        # print('=======')
        


100%|██████████| 6/6 [00:00<00:00, 55188.21it/s]


In [15]:
delete_operations = []
for file in tqdm(files):
    if "jamba" in file:
        delete_operations.append(CommitOperationDelete(
            path_in_repo =file
        ))
        # print('=======')
        


100%|██████████| 72/72 [00:00<00:00, 480111.11it/s]


In [6]:

create_commit(
    repo_id=repo_id,
    operations=copy_operations,
    commit_message="Copy files to new model names",
    token=os.environ.get("HF_TOKEN"),
    repo_type=repo_type
)

CommitInfo(commit_url='https://huggingface.co/microsoft/dayhoff/commit/fe3732dcce88647d89c24953c1a58a8085f0992d', commit_message='Copy files to new model names', commit_description='', oid='fe3732dcce88647d89c24953c1a58a8085f0992d', pr_url=None, repo_url=RepoUrl('https://huggingface.co/microsoft/dayhoff', endpoint='https://huggingface.co', repo_type='model', repo_id='microsoft/dayhoff'), pr_revision=None, pr_num=None)

In [16]:

create_commit(
    repo_id=repo_id,
    operations=delete_operations,
    commit_message="Delete old model names",
    token=os.environ.get("HF_TOKEN"),
    repo_type=repo_type
)

CommitInfo(commit_url='https://huggingface.co/microsoft/dayhoff/commit/7ce25a90d4b8991cdc7f1dba0fa6e84da58587e7', commit_message='Delete old model names', commit_description='', oid='7ce25a90d4b8991cdc7f1dba0fa6e84da58587e7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/microsoft/dayhoff', endpoint='https://huggingface.co', repo_type='model', repo_id='microsoft/dayhoff'), pr_revision=None, pr_num=None)

In [5]:
model_old2new

{'jamba-170m-10mnofilter-36w': '170m-UR50-BBR-u',
 'jamba-170m-10mnovelty-36w': '170m-UR50-BBR-n',
 'jamba-170m-gigaclust-36w': '170m-GGR',
 'jamba-170m-seq-36w': '170m-UR50',
 'jamba-170m-seqsam-36w': '170m-UR90',
 'jamba-3b-cooldown7': '3b-cooled',
 'jamba-3b-indel-gigaclust-120k-2': '3b-GGR-MSA'}

In [7]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed


torch.set_default_device("cuda")

repo_id = "microsoft/dayhoff"
max_length = 10
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)

for old,new in model_old2new.items():

    if "170" not in old:
        continue
    set_seed(0)
    #OLD
    model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder = old)
    inputs = tokenizer(tokenizer.bos_token, return_tensors="pt", return_token_type_ids=False)
    outputs = model.generate(inputs['input_ids'],max_length=max_length,do_sample=True)
    sequence = tokenizer.batch_decode(outputs,skip_special_tokens=True)
    print(f"{old} (old): {sequence}")

    set_seed(0)
    #NEW
    model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder = new)
    inputs = tokenizer(tokenizer.bos_token, return_tensors="pt", return_token_type_ids=False)
    outputs = model.generate(inputs['input_ids'],max_length=max_length,do_sample=True)
    sequence = tokenizer.batch_decode(outputs,skip_special_tokens=True)
    print(f"{new} (new): {sequence}")

jamba-170m-10mnofilter-36w (old): ['MATITDQPN']
170m-UR50-BBR-u (new): ['MATITDQPN']
jamba-170m-10mnovelty-36w (old): ['MATPADQPN']
170m-UR50-BBR-n (new): ['MATPADQPN']
jamba-170m-gigaclust-36w (old): ['LATPADQPR']
170m-GGR (new): ['LATPADQPR']
jamba-170m-seq-36w (old): ['MATIADQPN']
170m-UR50 (new): ['MATIADQPN']
jamba-170m-seqsam-36w (old): ['MATPADQPN']
170m-UR90 (new): ['MATPADQPN']


In [8]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed


torch.set_default_device("cuda")

repo_id = "microsoft/dayhoff"
max_length = 100
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)

for old,new in model_old2new.items():

    if old in ["jamba-170m-10mnovelty-36w","jamba-170m-seqsam-36w"]:
            
        set_seed(0)
        #OLD
        model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder = old)
        inputs = tokenizer(tokenizer.bos_token, return_tensors="pt", return_token_type_ids=False)
        outputs = model.generate(inputs['input_ids'],max_length=max_length,do_sample=True)
        sequence_old = tokenizer.batch_decode(outputs,skip_special_tokens=True)
        print(f"{old} (old): {sequence_old}")

        set_seed(0)
        #NEW
        model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder = new)
        inputs = tokenizer(tokenizer.bos_token, return_tensors="pt", return_token_type_ids=False)
        outputs = model.generate(inputs['input_ids'],max_length=max_length,do_sample=True)
        sequence_new = tokenizer.batch_decode(outputs,skip_special_tokens=True)
        print(f"{new} (new): {sequence_new}")
        print(sequence_new == sequence_old)

jamba-170m-10mnovelty-36w (old): ['MATPADQPNLDLSTTSTDMTESYEPSEHVVKKTPKQNRKRKLDPMPEIEKETNEKNVEVEVKVKVKTETQREEQIIEEPPPVDDKMEVEVKEESATECD']
170m-UR50-BBR-n (new): ['MATPADQPNLDLSTTSTDMTESYEPSEHVVKKTPKQNRKRKLDPMPEIEKETNEKNVEVEVKVKVKTETQREEQIIEEPPPVDDKMEVEVKEESATECD']
True
jamba-170m-seqsam-36w (old): ['MATPADQPNLDLSGTSTDMTESYEPSEHVVKKTPKQNRKGKLDPMPWILKSTNTKNVGVEVLDKLATETQREEQIIGCPPCRRDKCFKIRIEESATKCL']
170m-UR90 (new): ['MATPADQPNLDLSGTSTDMTESYEPSEHVVKKTPKQNRKGKLDPMPWILKSTNTKNVGVEVLDKLATETQREEQIIGCPPCRRDKCFKIRIEESATKCL']
True
