In [8]:
import torch
import subprocess

def detailed_diagnostic():
    print("=== PyTorch GPU诊断 ===")
    
    # 1. PyTorch基本信息
    print(f"PyTorch版本: {torch.__version__}")
    print(f"CUDA是否可用: {torch.cuda.is_available()}")
    print(f"CUDA版本: {getattr(torch.version, 'cuda', 'None')}")
    
    # 2. 构建信息
    print(f"\n=== 构建信息 ===")
    print(f"使用CUDA构建: {torch.backends.cuda.is_built()}")
    print(f"cuDNN可用: {torch.backends.cudnn.is_available()}")
    print(f"cuDNN版本: {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else 'N/A'}")
    
    # 3. 检查是否是CPU版本的PyTorch
    print(f"\n=== 包详细信息 ===")
    try:
        import pip
        packages = pip.get_installed_distributions()
        torch_pkg = [p for p in packages if 'torch' in p.key][0]
        print(f"Torch包名称: {torch_pkg}")
        print(f"Torch包位置: {torch_pkg.location}")
    except:
        pass
    
    # 4. 尝试直接与CUDA运行时交互
    print(f"\n=== CUDA运行时测试 ===")
    if hasattr(torch.cuda, 'is_available') and torch.cuda.is_available():
        print(f"GPU数量: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    else:
        print("PyTorch报告CUDA不可用")
        
    # 5. 检查系统环境
    print(f"\n=== 系统检查 ===")
    try:
        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, shell=True)
        if result.returncode == 0:
            print("nvidia-smi执行成功")
            # 提取关键信息
            lines = result.stdout.split('\n')
            for i, line in enumerate(lines):
                if i < 8:  # 只显示前8行关键信息
                    print(f"  {line}")
        else:
            print("nvidia-smi执行失败")
    except Exception as e:
        print(f"nvidia-smi错误: {e}")

if __name__ == "__main__":
    detailed_diagnostic()

=== PyTorch GPU诊断 ===
PyTorch版本: 2.8.0+cu129
CUDA是否可用: True
CUDA版本: 12.9

=== 构建信息 ===
使用CUDA构建: True
cuDNN可用: True
cuDNN版本: 91002

=== 包详细信息 ===

=== CUDA运行时测试 ===
GPU数量: 1
GPU 0: NVIDIA GeForce GTX 1650

=== 系统检查 ===
nvidia-smi执行成功
  Wed Dec  3 10:10:46 2025       
  +-----------------------------------------------------------------------------------------+
  | NVIDIA-SMI 581.57                 Driver Version: 581.57         CUDA Version: 13.0     |
  +-----------------------------------------+------------------------+----------------------+
  | GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
  | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
  |                                         |                        |               MIG M. |


In [9]:
!wget https://www.csie.ntu.edu.tw/~b10902031/gsm8k_train.jsonl # original dataset for fine-tuning
!wget https://www.csie.ntu.edu.tw/~b10902031/gsm8k_train_self-instruct.jsonl # part of fine-tuning dataset refined by llama-3.2-1b-instruct
!wget https://www.csie.ntu.edu.tw/~b10902031/gsm8k_test_public.jsonl # gsm8k public test dataset
!wget https://www.csie.ntu.edu.tw/~b10902031/gsm8k_test_private.jsonl # gsm8k private test dataset
!wget https://www.csie.ntu.edu.tw/~b10902031/ailuminate_test.csv # ailuminate test dataset (public + private)

'wget' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
'wget' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
'wget' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
'wget' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
'wget' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���


In [None]:
!pip install -U datasets trl bitsandbytes transformers accelerate peft

In [12]:
from huggingface_hub import login
import os
from dotenv import load_dotenv

load_dotenv()
hf_token = os.getenv('HF_Token')
login(token=hf_token)

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
