## 下载模型和数据集

In [ ]:
import os
import subprocess

# 设置 HF_ENDPOINT 环境变量
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

def download_model(model_name):
    try:
        subprocess.run(['huggingface-cli', 'download', '--resume-download', model_name,'--local-dir',model_name], check=True)
        print(f"Model '{model_name}' downloaded successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Error downloading model '{model_name}': {e}")

def download_dataset(dataset_name):
    try:
        subprocess.run(['huggingface-cli', 'download', '--resume-download','--repo-type','dataset',dataset_name,"--local-dir",dataset_name], check=True)
        print(f"Dataset '{dataset_name}' downloaded successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Error downloading dataset '{dataset_name}': {e}")

In [0]:
download_model('facebook/opt-6.7b')

## 加载分词器，配置量化超参数，加载模型

In [1]:
from transformers import GPTQConfig,AutoTokenizer,AutoModelForCausalLM

In [2]:
config=GPTQConfig(
    bits=4,
    group_size=128,
    dataset="c4",
    desc_act=False
    
)
model_path='facebook/opt-2.7b'

In [ ]:
%%sql


In [ ]:
tokenizer=AutoTokenizer.from_pretrained(model_path)

In [ ]:
quant_model=AutoModelForCausalLM(
    model_path,
    quantization_config=config,
    device_map='auto'
)

## 检查模型

In [ ]:
quant_model.model.decoder.layers[0].self_attn.q_proj.__dict__

## 调用模型

In [3]:
text="hello,can you introduce yourself?"
input=tokenizer(text,return_tensors='pt').to(0)
output=quant_model.generate(**input,max_new_tokens=64)
print(tokenizer.decode(output[0],skip_special_tokens=True))