## 1.1 HF hub 이해하기

In [1]:
from dotenv import load_dotenv
load_dotenv()  # .env 로드

True

로그인

In [2]:
from huggingface_hub import login

login()

모델 검색 및 탐색

In [3]:
from huggingface_hub import HfApi

api = HfApi()

In [4]:
api

<huggingface_hub.hf_api.HfApi at 0x10d21dd90>

In [5]:
models = api.list_models(
    sort = "downloads",
    direction=-1,
    limit=5
) # 다운로드 수 기준 상위 5개


Sorting is always descending.


In [6]:
models

<generator object HfApi.list_models at 0x10d31d1c0>

In [7]:
for model in models:
    print(f"{model.id} - Downloads: {model.downloads:,}")

sentence-transformers/all-MiniLM-L6-v2 - Downloads: 141,838,267
Falconsai/nsfw_image_detection - Downloads: 66,227,470
google/electra-base-discriminator - Downloads: 48,107,630
dima806/fairface_age_image_detection - Downloads: 43,027,405
google-bert/bert-base-uncased - Downloads: 39,333,188


In [8]:
# 필터링 검색
# Task + Library + Dataset 복합 필터
models = api.list_models(
    pipeline_tag="image-classification",      # Task 필터
    filter="pytorch",                 # 프레임워크(라이브러리)
    trained_dataset="imagenet",        # 학습 데이터셋
    limit=10
)

for m in models:
    print(f"{m.id} | {m.pipeline_tag} | {m.library_name}")

facebook/deit-base-distilled-patch16-224 | image-classification | transformers
google/vit-large-patch16-384 | image-classification | transformers
WinKawaks/vit-small-patch16-224 | image-classification | transformers
WinKawaks/vit-tiny-patch16-224 | image-classification | transformers
deepmind/vision-perceiver-conv | image-classification | transformers
deepmind/vision-perceiver-fourier | image-classification | transformers
deepmind/vision-perceiver-learned | image-classification | transformers
facebook/deit-base-distilled-patch16-384 | image-classification | transformers
facebook/deit-small-distilled-patch16-224 | image-classification | transformers
facebook/deit-tiny-distilled-patch16-224 | image-classification | transformers


VLM, Vison 모델 검색

In [9]:
# Vision-Language 모델 찾기
vlm_models = api.list_models(
    pipeline_tag="image-text-to-text",
    sort="downloads",
    limit=20
)

print("=== Top VLM Models ===")
for m in vlm_models:
    print(f"- {m.id}")

=== Top VLM Models ===
- Qwen/Qwen2.5-VL-3B-Instruct
- vikhyatk/moondream2
- Qwen/Qwen2.5-VL-7B-Instruct
- deepseek-ai/DeepSeek-OCR
- OpenGVLab/InternVL3-78B
- openvla/openvla-7b
- Qwen/Qwen2.5-VL-32B-Instruct
- Qwen/Qwen3-VL-8B-Instruct
- Qwen/Qwen2-VL-2B-Instruct
- google/gemma-3-27b-it
- Qwen/Qwen2-VL-7B-Instruct
- google/gemma-3-12b-it
- tencent/HunyuanOCR
- llava-hf/llava-1.5-7b-hf
- OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF
- OpenGVLab/InternVL2-2B
- nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1
- OpenGVLab/InternVL2-1B
- google/gemma-3-4b-it
- Qwen/Qwen3-VL-30B-A3B-Instruct


In [10]:
# 특정 조직 모델 검색
# 특정 조직의 모델만 검색
qwen_models = api.list_models(
    author="Qwen",
    sort="downloads",
    limit=10
)

for m in qwen_models:
    print(f"- {m.id}")

- Qwen/Qwen2.5-VL-3B-Instruct
- Qwen/Qwen2.5-3B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen3-0.6B
- Qwen/Qwen2.5-1.5B-Instruct
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
- Qwen/Qwen3-1.7B
- Qwen/Qwen2.5-VL-7B-Instruct
- Qwen/Qwen2.5-32B-Instruct


데이터셋 검색

In [11]:
datasets = api.list_datasets(
    language="ko",
    sort="downloads",
    limit=10
)

for ds in datasets:
    print(f"{ds.id} - {ds.downloads:,} downloads")

allenai/c4 - 510,409 downloads
CohereLabs/xP3x - 134,680 downloads
jobs-git/HPLT2.0_cleaned - 93,857 downloads
legacy-datasets/wikipedia - 90,778 downloads
wikimedia/wikipedia - 78,879 downloads
AaronZ345/GTSinger - 54,396 downloads
amphion/Emilia-Dataset - 45,441 downloads
hltcoe/megawika - 42,757 downloads
uonlp/CulturaX - 40,688 downloads
Viet-Mistral/CulturaY - 36,025 downloads


### 모델 카드 이해하기

모델 카드 = yaml 메타데이터 + 마크다운 본문

```yaml
---
language: en
license: mit
library_name: transformers
tags:
  - image-classification
  - pytorch
datasets:
  - imagenet-1k
metrics:
  - accuracy
base_model: google/vit-base-patch16-224
pipeline_tag: image-classification
---

# Model Card for ViT-Base

This model is a Vision Transformer (ViT) ...
```

In [12]:
from huggingface_hub import ModelCard

# Hub에서 Model Card 로드
card = ModelCard.load("google/vit-base-patch16-224")

# 메타데이터 확인
print("=== Metadata ===")
print(card.data.to_dict())

# 주요 정보 추출
print(f"License: {card.data.license}")
print(f"Library: {card.data.library_name}")
print(f"Tags: {card.data.tags}")
print(f"Base Model: {card.data.base_model}")

# Markdown 본문
print("\n=== Card Text (first 500 chars) ===")
print(card.text[:500])

=== Metadata ===
{'datasets': ['imagenet-1k', 'imagenet-21k'], 'license': 'apache-2.0', 'tags': ['vision', 'image-classification'], 'widget': [{'src': 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg', 'example_title': 'Tiger'}, {'src': 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg', 'example_title': 'Teapot'}, {'src': 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg', 'example_title': 'Palace'}]}
License: apache-2.0
Library: None
Tags: ['vision', 'image-classification']
Base Model: None

=== Card Text (first 500 chars) ===

# Vision Transformer (base-sized model) 

Vision Transformer (ViT) model pre-trained on ImageNet-21k (14 million images, 21,843 classes) at resolution 224x224, and fine-tuned on ImageNet 2012 (1 million images, 1,000 classes) at resolution 224x224. It was introduced in the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2

예시: 좋은 모델 선별

In [13]:
def evaluate_model_quality(model_id: str) -> dict:
    """모델 품질 평가 체크리스트"""
    api = HfApi()
    info = api.model_info(model_id, securityStatus=True)
    card = ModelCard.load(model_id)
    
    quality = {
        "model_id": model_id,
        "downloads": info.downloads,
        "likes": info.likes,
        "has_license": card.data.license is not None,
        "has_model_card": len(card.text) > 100,
        "library": card.data.library_name,
        "tags": card.data.tags or [],
        "security_status": getattr(info, "security_repo_status", None),
    }
    
    # 점수 계산 (간단한 휴리스틱)
    score = 0
    if quality["downloads"] > 10000: score += 2
    if quality["likes"] > 100: score += 1
    if quality["has_license"]: score += 2
    if quality["has_model_card"]: score += 2
    if quality["library"] in ["transformers", "diffusers", "sentence-transformers"]: score += 1
    
    quality["quality_score"] = score
    return quality

# 사용 예시
result = evaluate_model_quality("google/vit-base-patch16-224")
print(result)

{'model_id': 'google/vit-base-patch16-224', 'downloads': 3929903, 'likes': 931, 'has_license': True, 'has_model_card': True, 'library': None, 'tags': ['vision', 'image-classification'], 'security_status': {'scansDone': True, 'filesWithIssues': [{'path': 'README.md', 'level': 'error'}]}, 'quality_score': 7}


### 모델 상세 정보 조회 

In [14]:
from huggingface_hub import HfApi

In [15]:
api = HfApi()

# 모델 메타데이터
info = api.model_info(
    "meta-llama/Llama-3.2-1B",
    securityStatus=True
)

info

ModelInfo(id='meta-llama/Llama-3.2-1B', author='meta-llama', sha='4e20de362430cd3b72f300e6b0f18e50e7166e08', created_at=datetime.datetime(2024, 9, 18, 15, 3, 14, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2024, 10, 24, 15, 8, 3, tzinfo=datetime.timezone.utc), private=False, disabled=False, downloads=2424486, downloads_all_time=None, gated='manual', gguf=None, inference=None, inference_provider_mapping=None, likes=2277, library_name='transformers', tags=['transformers', 'safetensors', 'llama', 'text-generation', 'facebook', 'meta', 'pytorch', 'llama-3', 'en', 'de', 'fr', 'it', 'pt', 'hi', 'es', 'th', 'arxiv:2204.05149', 'arxiv:2405.16406', 'license:llama3.2', 'text-generation-inference', 'endpoints_compatible', 'region:us'], pipeline_tag='text-generation', mask_token=None, card_data={'base_model': None, 'datasets': None, 'eval_results': None, 'language': ['en', 'de', 'fr', 'it', 'pt', 'hi', 'es', 'th'], 'library_name': 'transformers', 'license': 'llama3.2', 'license_

In [16]:
print(f"Model ID: {info.id}")
print(f"Author: {info.author}")
print(f"Downloads: {info.downloads:,}")
print(f"Likes: {info.likes}")
print(f"Tags: {info.tags}")
print(f"Last Modified: {info.last_modified}")
print(f"Library: {info.library_name}")
print(f"Pipeline: {info.pipeline_tag}")

# 파일 목록
print("\n=== Files ===")
for sibling in info.siblings:
    size = sibling.size
    size_str = f"{size:,} bytes" if isinstance(size, int) else "size unknown"
    print(f"  {sibling.rfilename} ({size_str})")

Model ID: meta-llama/Llama-3.2-1B
Author: meta-llama
Downloads: 2,424,486
Likes: 2277
Tags: ['transformers', 'safetensors', 'llama', 'text-generation', 'facebook', 'meta', 'pytorch', 'llama-3', 'en', 'de', 'fr', 'it', 'pt', 'hi', 'es', 'th', 'arxiv:2204.05149', 'arxiv:2405.16406', 'license:llama3.2', 'text-generation-inference', 'endpoints_compatible', 'region:us']
Last Modified: 2024-10-24 15:08:03+00:00
Library: transformers
Pipeline: text-generation

=== Files ===
  .gitattributes (size unknown)
  LICENSE.txt (size unknown)
  README.md (size unknown)
  USE_POLICY.md (size unknown)
  config.json (size unknown)
  generation_config.json (size unknown)
  model.safetensors (size unknown)
  original/consolidated.00.pth (size unknown)
  original/params.json (size unknown)
  original/tokenizer.model (size unknown)
  special_tokens_map.json (size unknown)
  tokenizer.json (size unknown)
  tokenizer_config.json (size unknown)
