In [None]:
from google.colab import drive, userdata
import os
import sys

drive.mount('/content/drive')

In [2]:
!pip install transformers>=4.21.0 --quiet
!pip install torch torchvision torchaudio --quiet
!pip install tqdm --quiet
!pip install datasets --quiet
!pip install datatrove --quiet

try:
    !pip install flash-attn --no-build-isolation --quiet
    print("Flash Attention がインストールされました")
except:
    print("Flash Attention のインストールに失敗 - 標準Attentionを使用します")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m91.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for flash-attn (setup.py) ... [?25l[?25hdone
Flash Attention がインストールされました


In [3]:
#Google Drive内のモジュールを利用するために作業ディレクトリを変更し、pathに追加する
lightlm_path = '/content/drive/MyDrive/LightLM'

os.chdir(lightlm_path)
print(f"作業ディレクトリ: {os.getcwd()}")

if lightlm_path not in sys.path:
    sys.path.insert(0, lightlm_path)

作業ディレクトリ: /content/drive/MyDrive/LightLM


In [4]:
# デフォルトのチェックポイントパス（train.pyから）
default_checkpoint = lightlm_path + "/model_testing-small/model.checkpoint.epoch3_step3278_global13115.pt"
model_dir = lightlm_path + "/hf_model-small"
repo_name = "asap-bb/mylightlm_small_sample"
private = False

In [5]:
def convert_checkpoint_to_hf(checkpoint_path, output_dir="./hf_model-small"):
    """
    PyTorchチェックポイントをHuggingFace形式に変換

    Args:
        checkpoint_path: .ptチェックポイントファイルのパス
        output_dir: 変換後のモデルを保存するディレクトリ
    """
    print(f"Loading checkpoint from: {checkpoint_path}")

    # チェックポイント読み込み
    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    state_dict = checkpoint['model']

    print(f"Checkpoint keys: {len(state_dict.keys())}")
    print("Sample keys:", list(state_dict.keys())[:5])

    # _orig_mod.プレフィックスを除去
    clean_state_dict = {}
    for key, value in state_dict.items():
        if key.startswith('_orig_mod.'):
            clean_key = key[len('_orig_mod.'):]
            clean_state_dict[clean_key] = value
        else:
            clean_state_dict[key] = value

    print(f"Cleaned state dict keys: {len(clean_state_dict.keys())}")
    print("Sample cleaned keys:", list(clean_state_dict.keys())[:5])

    # トークナイザー情報（train.pyから）
    tokenizer_id = "HuggingFaceTB/SmolLM-360M"
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)

    # ModelConfig作成（train.pyの設定に基づく）
    config = ModelConfig(
        vocab_size=tokenizer.vocab_size,
        num_dims=512,
        num_heads=16,
        num_kv_heads=4,
        num_layers=12,
        ffn_hidden_dims=512 * 4,
        rmsnorm_eps=1e-6,
        rope_theta=1e5,
        context_len=1024,
        use_cache=False,
        use_flash=True,
        use_moe=True,
        moe_num_experts=3,
        moe_active_experts=1,
        moe_eps=1e-6,
        moe_aux_loss_coef=0.01,
        moe_shared_experts=1,
        use_lossfreebalance=False,
    )

    print(f"Model config: {config}")

    # モデルインスタンス作成
    model = Transformer(config)

    # 重みをロード
    missing_keys, unexpected_keys = model.load_state_dict(clean_state_dict, strict=False)

    if missing_keys:
        print(f"Warning: Missing keys: {missing_keys}")
    if unexpected_keys:
        print(f"Warning: Unexpected keys: {unexpected_keys}")

    # 出力ディレクトリ作成
    os.makedirs(output_dir, exist_ok=True)

    # HuggingFace形式で保存（PyTorchModelHubMixinを使用）
    print(f"Saving model to: {output_dir}")
    model.save_pretrained(output_dir)

    # トークナイザーも保存
    tokenizer.save_pretrained(output_dir)

    # model.pyをコピー（HuggingFaceに必要）
    model_py_path = os.path.join(output_dir, 'model.py')
    if os.path.exists('model.py'):
        shutil.copy2('model.py', model_py_path)
        print(f"Copied model.py to: {model_py_path}")
    else:
        print("Warning: model.py not found in current directory")

    # 追加のconfig情報を保存
    config_dict = {
        'model_type': 'llama',
        'vocab_size': config.vocab_size,
        'num_dims': config.num_dims,
        'num_heads': config.num_heads,
        'num_kv_heads': config.num_kv_heads,
        'num_layers': config.num_layers,
        'ffn_hidden_dims': config.ffn_hidden_dims,
        'context_len': config.context_len,
        'use_cache': config.use_cache,
        'use_flash': config.use_flash,
        'use_moe': config.use_moe,
        'moe_num_experts': config.moe_num_experts,
        'moe_active_experts': config.moe_active_experts,
        'moe_eps': config.moe_eps,
        'moe_aux_loss_coef': config.moe_aux_loss_coef,
        'moe_shared_experts': config.moe_shared_experts,
        'use_lossfreebalance': config.use_lossfreebalance,
        'rmsnorm_eps': config.rmsnorm_eps,
        'rope_theta': config.rope_theta,
        'tokenizer_id': tokenizer_id,
        'architecture': 'LightLM',
        'torch_dtype': 'float32',
        # HuggingFace auto_map設定（標準的な読み込み方法）
        'auto_map': {
            'AutoModel': 'model.Transformer'
        },
        'trust_remote_code': True,
    }

    # config.jsonに追加情報を書き込み
    config_path = os.path.join(output_dir, 'config.json')
    if os.path.exists(config_path):
        with open(config_path, 'r') as f:
            existing_config = json.load(f)
        existing_config.update(config_dict)
    else:
        existing_config = config_dict

    with open(config_path, 'w') as f:
        json.dump(existing_config, f, indent=2)

    print(f"✅ Conversion completed successfully!")
    print(f"   Model saved to: {output_dir}")
    print(f"   Files created:")
    for file in os.listdir(output_dir):
        print(f"     - {file}")

    return output_dir


In [6]:
#モデルのコンバートを実施する
import os
import torch
import json
import shutil
from transformers import AutoTokenizer
from model import Transformer, ModelConfig

if not os.path.exists(default_checkpoint):
    print(f"❌ Checkpoint not found: {default_checkpoint}")
    print("Available checkpoints:")
    checkpoint_dir = lightlm_path + "/model_testing"
    if os.path.exists(checkpoint_dir):
        for file in os.listdir(checkpoint_dir):
            if file.endswith('.pt'):
                print(f"   - {os.path.join(checkpoint_dir, file)}")


print(f"🚀 Converting checkpoint to HuggingFace format...")
output_dir = convert_checkpoint_to_hf(default_checkpoint, output_dir="./hf_model-small")
print(f"🎉 Done! Model is ready for upload to HuggingFace Hub.")
print(f"    Next step: Run 'python upload_to_hub.py' to upload to HuggingFace")

🚀 Converting checkpoint to HuggingFace format...
Loading checkpoint from: /content/drive/MyDrive/LightLM/model_testing-small/model.checkpoint.epoch3_step3278_global13115.pt
Checkpoint keys: 231
Sample keys: ['_orig_mod.tokens_embedding.weight', '_orig_mod.blocks.0.attention.wq.weight', '_orig_mod.blocks.0.attention.wk.weight', '_orig_mod.blocks.0.attention.wv.weight', '_orig_mod.blocks.0.attention.wo.weight']
Cleaned state dict keys: 231
Sample cleaned keys: ['tokens_embedding.weight', 'blocks.0.attention.wq.weight', 'blocks.0.attention.wk.weight', 'blocks.0.attention.wv.weight', 'blocks.0.attention.wo.weight']


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/831 [00:00<?, ?B/s]

Model config: ModelConfig(vocab_size=49152, num_dims=512, num_heads=16, num_kv_heads=4, num_layers=12, ffn_hidden_dims=2048, context_len=1024, use_cache=False, use_flash=True, use_moe=True, moe_num_experts=3, moe_active_experts=1, moe_eps=1e-06, moe_aux_loss_coef=0.01, moe_shared_experts=1, use_lossfreebalance=False, rmsnorm_eps=1e-06, rope_theta=100000.0, ffn_dim_multiplier=None)
Saving model to: ./hf_model-small
Copied model.py to: ./hf_model-small/model.py
✅ Conversion completed successfully!
   Model saved to: ./hf_model-small
   Files created:
     - model.py
     - model.safetensors
     - tokenizer_config.json
     - special_tokens_map.json
     - vocab.json
     - merges.txt
     - tokenizer.json
     - config.json
     - README.md
🎉 Done! Model is ready for upload to HuggingFace Hub.
    Next step: Run 'python upload_to_hub.py' to upload to HuggingFace


In [7]:
# モデルのアップロードを実行する

import os
import json
from transformers import AutoTokenizer
from huggingface_hub import HfApi
from upload_to_hub import upload_to_huggingface

token = userdata.get('HF_TOKEN')

try:
    url = upload_to_huggingface(
        model_dir=model_dir,
        repo_name=repo_name,
        private=private,
        token=token
    )
    print(f"🎉 Success! Your model is now available at: {url}")

except Exception as e:
    print(f"❌ Upload failed: {e}")
    print("   Make sure you have:")
    print("   1. Valid HuggingFace token")
    print("   2. Proper permissions")
    print("   3. Unique repository name")

🚀 Uploading model to HuggingFace Hub as: asap-bb/mylightlm_small_sample
✅ Model card created: /content/drive/MyDrive/LightLM/hf_model-small/README.md
🔧 Creating repository if it doesn't exist...


No files have been modified since last commit. Skipping to prevent empty commit.


✅ Repository already exists: asap-bb/mylightlm_small_sample
📤 Uploading files directly to hub...
📤 Uploading model.py...
✅ model.py uploaded successfully
📤 Uploading model.safetensors...


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...LM/hf_model-small/model.safetensors:   0%|          |  552kB /  736MB            

✅ model.safetensors uploaded successfully
📤 Uploading config.json...


No files have been modified since last commit. Skipping to prevent empty commit.


✅ config.json uploaded successfully
📤 Uploading tokenizer...


README.md: 0.00B [00:00, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


✅ Upload completed successfully!
   Model URL: https://huggingface.co/asap-bb/mylightlm_small_sample
   You can now use the model with:
     python hf_inference.py --model asap-bb/mylightlm_small_sample
🎉 Success! Your model is now available at: https://huggingface.co/asap-bb/mylightlm_small_sample


In [None]:

from google.colab import runtime
runtime.unassign()