In [None]:
# %pip install sae-lens transformer-lens

In [None]:
# # Standard imports
# import os
# import torch
# import numpy as np
# from tqdm import tqdm
# import plotly.express as px
# import pandas as pd
# import einops
# from datasets import load_dataset
# from sae_lens import SAE, HookedSAETransformer
# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
# from sae_lens import LanguageModelSAERunnerConfig
# from sae_lens import ActivationsStore
# import os
# from dotenv import load_dotenv
# import typing
# from dataclasses import dataclass
# from tqdm import tqdm
# import logging

# # GPU memory saver (this script doesn't need gradients computation)
# torch.set_grad_enabled(False)

In [None]:
from pathlib import Path

def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

# Model config

In [None]:
# define the model to work with
MODEL = 'MISTRAL' # GEMMA, GPT2

if MODEL == 'GEMMA':
    RELEASE = 'gemma-2b-res-jb'
    BASE_MODEL = "google/gemma-2b"
    FINETUNE_MODEL = 'shahdishank/gemma-2b-it-finetune-python-codes'
    DATASET_NAME = "ctigges/openwebtext-gemma-1024-cl"
    BASE_TOKENIZER_NAME = BASE_MODEL

    hook_part = 'post'
    layer_num = 6
elif MODEL == 'GPT2':
    RELEASE = 'gpt2-small-res-jb'
    BASE_MODEL = "gpt2-small"
    FINETUNE_MODEL = 'pierreguillou/gpt2-small-portuguese'
    DATASET_NAME = "Skylion007/openwebtext"
    BASE_TOKENIZER_NAME = BASE_MODEL

    hook_part = 'pre'
    layer_num = 6
elif MODEL == 'MISTRAL':
    RELEASE = 'mistral-7b-res-wg'
    BASE_MODEL = "mistral-7b"
    FINETUNE_MODEL = 'meta-math/MetaMath-Mistral-7B' #DeepMount00/Mistral-Ita-7b
    DATASET_NAME = "monology/pile-uncopyrighted"
    BASE_TOKENIZER_NAME = 'mistralai/Mistral-7B-v0.1'

    hook_part = 'pre'
    layer_num = 8

saving_name_base = BASE_MODEL if "/" not in BASE_MODEL else BASE_MODEL.split("/")[-1]
saving_name_ft = FINETUNE_MODEL if "/" not in FINETUNE_MODEL else FINETUNE_MODEL.split("/")[-1]
saving_name_ds = DATASET_NAME if "/" not in DATASET_NAME else DATASET_NAME.split("/")[-1]

SAE_HOOK = f'blocks.{layer_num}.hook_resid_{hook_part}'

In [None]:
from google.colab import drive
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the function to save model and tokenizer to Google Drive
def save_hf_model_to_gdrive(model_name, drive_folder):
    # Create directory in Google Drive if it doesn't exist
    if not os.path.exists(drive_folder):
        os.makedirs(drive_folder)

    # Load the tokenizer and model from Hugging Face
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Save the tokenizer and model to the specified folder in Google Drive
    tokenizer.save_pretrained(drive_folder)
    model.save_pretrained(drive_folder)

    print(f"Model and tokenizer saved to {drive_folder}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Example usage: specify the Hugging Face model and Google Drive folder
gdrive_folder = f'/content/drive/My Drive/Finetunes/{saving_name_ft}'  # Adjust to your desired Google Drive folder

save_hf_model_to_gdrive(FINETUNE_MODEL, gdrive_folder)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model and tokenizer saved to /content/drive/My Drive/Finetunes/MetaMath-Mistral-7B
