In [3]:
import torch

def compute_U_from_H(H: torch.Tensor):
    """
    Given a symmetric positive–definite matrix H (n×n),
    compute U and D such that
        H = (U + I) @ D @ (U + I).T
    where U is strictly upper–triangular and D is diagonal.
    """
    n = H.shape[-1]
    # Method A: use PyTorch's LDL decomposition (requires PyTorch ≥2.0)
    if hasattr(torch.linalg, "ldl"):
        L, D, _ = torch.linalg.ldl(H)          # H = L @ D @ L.T
        # L is unit lower–triangular, so U = L.T – I is strictly upper
        U = L.transpose(-2, -1) - torch.eye(n, device=H.device, dtype=H.dtype)
        return U, D

    # Method B: fallback via Cholesky
    # 1) H = L_chol @ L_chol.T,  L_chol lower–triangular with diag d
    L_chol = torch.linalg.cholesky(H)            # (n, n)
    d = torch.diagonal(L_chol, 0)                # (n,)
    # 2) form D = diag(d^2)
    D = torch.diag(d * d)                        # (n, n)
    # 3) build a unit–lower triangular L_unit = L_chol @ diag(1/d)
    inv_d = (1.0 / d).to(H.dtype)
    L_unit = L_chol @ torch.diag(inv_d)           # now L_unit has 1's on diag
    # 4) extract strictly upper U
    U = L_unit.transpose(-2, -1) - torch.eye(n, device=H.device, dtype=H.dtype)
    return U, D


# 예시 사용법
H = torch.randn(128, 128)
H = H @ H.T + 1e-3 * torch.eye(128)  # SPD 보장
U, D = compute_U_from_H(H)
# 검증: H 재구성
H_recon = (U + torch.eye(128)).T @ D @ (U + torch.eye(128))
print("reconstruction error:", torch.norm(H - H_recon))


reconstruction error: tensor(0.0003)


In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Qwen2ForCausalLM, LlamaForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model_id = "Qwen/Qwen2.5-7B" # 필요에 따라 1.5B, 14B, 72B 등으로 변경

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto", # BF16·FP16 자동 선택
    device_map="cpu" # 여러 GPU가 있으면 자동 분할
    )

Downloading shards:  50%|█████     | 2/4 [00:43<00:43, 21.91s/it]

In [2]:
model_id = "/workspace/Weight_compression/Wparam_dataset/hf_model/lmsys--vicuna-7b-v1.5"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto", # BF16·FP16 자동 선택
    device_map="cpu" # 여러 GPU가 있으면 자동 분할
    )

Loading checkpoint shards: 100%|██████████| 6/6 [00:00<00:00,  6.60it/s]


In [3]:
model.state_dict

<bound method Module.state_dict of LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): Llam

In [12]:
import torch

d = torch.load('/workspace/Weight_compression/hf_model_comp/comp_qtip/ckpt/meta-llama--Meta-Llama-3-8B/optim_code_frdelta/lmbda50/0_v.pt', weights_only=False)

In [None]:
import torch
from transformers import AutoModel, MllamaForConditionalGeneration

# model_name="openai--clip-vit-large-patch14"
# lm_model_path=f"../Wparam_dataset/hf_model/{model_name}"
# ori = AutoModel.from_pretrained(lm_model_path)

q = AutoModel.from_pretrained("/workspace/Weight_compression/Wparam_dataset/hf_model/meta-llama--Llama-3.2-3B")

In [None]:
import torch
from transformers import AutoModel, MllamaForConditionalGeneration

In [None]:
import torch

from transformers import AutoModelForCausalLM

model_name = "/workspace/Weight_compression/Wparam_dataset/hf_model/meta-llama--Meta-Llama-3-8B"  # 예시 모델
model = AutoModelForCausalLM.from_pretrained(model_name)

# 파라미터 개수 계산
total_params = sum(p.numel() for p in model.parameters())

# 결과 출력
print(f"Total number of parameters: {total_params}")

In [None]:
 4084433813.00 / 8030261248 * 8

In [None]:
import os

def get_directory_size(directory_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(directory_path):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            total_size += os.path.getsize(file_path)
    return total_size

# 디렉토리 경로
directory_path = '/workspace/Weight_compression/comp_lm_qtip/aqlm_cache/models--ISTA-DASLab--Meta-Llama-3-8B-AQLM-2Bit-1x16/snapshots/812d023a2163f2c04f7f1016e8b1810e877c5aea'

# 디렉토리 크기 계산 (바이트 단위)
directory_size_bytes = get_directory_size(directory_path)

# 바이트를 메가바이트로 변환
directory_size_mb = directory_size_bytes

print(f"Directory size: {directory_size_mb:.2f} MB")

In [None]:
model_path = './path_to_model_directory/pytorch_model.bin'

# 파일 크기 확인 (바이트 단위)
file_size_bytes = os.path.getsize(model_path)

# 바이트를 메가바이트로 변환
file_size_mb = file_size_bytes / (1024 ** 2)

print(f"Model file size: {file_size_mb:.2f} MB")

In [None]:
import requests
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor

model_id = "meta-llama/Llama-3.2-11B-Vision"

model = MllamaForConditionalGeneration.from_pretrained(
    model_id,
    device_map="cpu",
    cache_dir='/workspace/Weight_compression/Wparam_dataset/hf_model/cache'
)
processor = AutoProcessor.from_pretrained(model_id)

model.save_pretrained('/workspace/Weight_compression/Wparam_dataset/hf_model/meta-llama--Llama-3.2-11B-Vision')
processor.save_pretrained('/workspace/Weight_compression/Wparam_dataset/hf_model/meta-llama--Llama-3.2-11B-Vision')


In [None]:
model = MllamaForConditionalGeneration.from_pretrained(
    '/workspace/Weight_compression/Wparam_dataset/hf_model/meta-llama--Llama-3.2-11B-Vision_',
    device_map="cpu",
)

In [None]:
import requests
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor

model_id = "meta-llama/Llama-3.2-11B-Vision"

model = AutoModel.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    cache_dir='/workspace/Weight_compression/Wparam_dataset/hf_model/cache'
)

In [None]:
ori = MllamaForConditionalGeneration.from_pretrained("/workspace/Weight_compression/Wparam_dataset/hf_model/meta-llama--Llama-3.2-11B-Vision")

In [None]:
ori

In [None]:
ori_s = ori.state_dict()
q_s = q.state_dict()

for k, v in ori_s.items():
    # if 'bias' in k:
        print(k,v.mean(), v.shape)

In [None]:
path = '../hf_model_comp/qtip/hf/clip-vit-large-patch14_4bit'

qtip = AutoModel.from_pretrained(path)

In [None]:
for k, v in qtip.state_dict().items():
    print(k, v)

In [14]:
import json
path='/workspace/Weight_compression/hf_model_comp/comp_qtip/hf/clip_test/openai--clip-vit-large-patch14/lmbda100000_result.json'
with open(path, 'r') as f:
    r = json.load(f)

In [None]:
print(r['bpp_loss']/ r['num_pixels'])



In [None]:
for i in range(12):
    

In [None]:
for k in ori_s:
    if 'bias' in k: 
        print(ori_s[k])
    # print(k)
    # # print(f'{((ori_s[k] - q_s[k])**2).mean().item():.4f}')
    # print(f'{((ori_s[k] - q_s[k])**2).mean().item() / (ori_s[k]**2).mean().item()}')

In [None]:
'quip_params' in c['model_config'].to_dict()

In [None]:
from transformers import AutoModelForCausalLM

quantized_model = AutoModelForCausalLM.from_pretrained(
    "ISTA-DASLab/Llama-2-7b-AQLM-2Bit-1x16-hf",
    trust_remote_code=True, torch_dtype="auto"
).cuda()

In [None]:
quantized_model