## 1. Intro to Agents

In [1]:
from dotenv import load_dotenv
load_dotenv()  # .env 로드

True

로그인

In [2]:
from huggingface_hub import login

login()

모델 검색 및 탐색

In [3]:
from huggingface_hub import HfApi

api = HfApi()

In [4]:
api

<huggingface_hub.hf_api.HfApi at 0x10a8a1160>

In [5]:
models = api.list_models(
    sort = "downloads",
    direction=-1,
    limit=5
) # 다운로드 수 기준 상위 5개


Sorting is always descending.


In [6]:
models

<generator object HfApi.list_models at 0x10ab6d1c0>

In [7]:
for model in models:
    print(f"{model.id} - Downloads: {model.downloads:,}")

sentence-transformers/all-MiniLM-L6-v2 - Downloads: 163,821,522
google-bert/bert-base-uncased - Downloads: 52,328,731
google/electra-base-discriminator - Downloads: 44,177,141
Falconsai/nsfw_image_detection - Downloads: 36,258,777
sentence-transformers/all-mpnet-base-v2 - Downloads: 24,311,379


In [8]:
# 특정 조직 모델 검색
# 특정 조직의 모델만 검색
qwen_models = api.list_models(
    author="Qwen",
    sort="downloads",
    limit=10
)

for m in qwen_models:
    print(f"- {m.id}")

- Qwen/Qwen2.5-VL-3B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen3-VL-2B-Instruct
- Qwen/Qwen3-0.6B
- Qwen/Qwen2.5-1.5B-Instruct
- Qwen/Qwen2.5-0.5B-Instruct
- Qwen/Qwen2.5-3B-Instruct
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
- Qwen/Qwen2.5-32B-Instruct


In [9]:
# 특정 조직 모델 검색
# 특정 조직의 모델만 검색
tg_models = api.list_models(
    pipeline_tag="text-generation",      # Task 필터
    sort="downloads",
    limit=10
)

for m in tg_models:
    print(f"- {m.id}")

- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen3-0.6B
- openai-community/gpt2
- Qwen/Qwen2.5-1.5B-Instruct
- meta-llama/Llama-3.1-8B-Instruct
- openai/gpt-oss-20b
- Qwen/Qwen2.5-0.5B-Instruct
- Qwen/Qwen2.5-3B-Instruct
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B


In [10]:
# 특정 조직 모델 검색
# 특정 조직의 모델만 검색
trending_models = api.list_models(
    pipeline_tag="text-generation",      # Task 필터
    sort="trending_score",
    limit=15
)

for m in trending_models:
    print(f"- {m.id}")

- MiniMaxAI/MiniMax-M2.5
- zai-org/GLM-5
- Nanbeige/Nanbeige4.1-3B
- inclusionAI/Ring-2.5-1T
- unsloth/MiniMax-M2.5-GGUF
- Qwen/Qwen3-Coder-Next
- jdopensource/JoyAI-LLM-Flash
- openbmb/MiniCPM-SALA
- unsloth/GLM-5-GGUF
- DMindAI/DMind-3
- unsloth/Qwen3-Coder-Next-GGUF
- Fortytwo-Network/Strand-Rust-Coder-14B-v1
- nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese
- lm-provers/QED-Nano
- inclusionAI/Ling-2.5-1T


### 모델 카드 이해하기

모델 카드 = yaml 메타데이터 + 마크다운 본문

```yaml
---
language: en
license: mit
library_name: transformers
tags:
  - image-classification
  - pytorch
datasets:
  - imagenet-1k
metrics:
  - accuracy
base_model: google/vit-base-patch16-224
pipeline_tag: image-classification
---

# Model Card for ViT-Base

This model is a Vision Transformer (ViT) ...
```

In [11]:
from huggingface_hub import ModelCard

# Hub에서 Model Card 로드
card = ModelCard.load("google/gemma-2-27b-it")

# 메타데이터 확인
print("=== Metadata ===")
print(card.data.to_dict())

# 주요 정보 추출
print(f"License: {card.data.license}")
print(f"Library: {card.data.library_name}")
print(f"Tags: {card.data.tags}")
print(f"Base Model: {card.data.base_model}")

# Markdown 본문
print("\n=== Card Text (first 500 chars) ===")
print(card.text[:500])

=== Metadata ===
{'base_model': 'google/gemma-2-27b', 'library_name': 'transformers', 'license': 'gemma', 'pipeline_tag': 'text-generation', 'extra_gated_heading': 'Access Gemma on Hugging Face', 'extra_gated_prompt': 'To access Gemma on Hugging Face, you’re required to review and agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging Face and click below. Requests are processed immediately.', 'extra_gated_button_content': 'Acknowledge license'}
License: gemma
Library: transformers
Tags: None
Base Model: google/gemma-2-27b

=== Card Text (first 500 chars) ===



# Gemma 2 model card

**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)

**Resources and Technical Documentation**:

* [Responsible Generative AI Toolkit][rai-toolkit]
* [Gemma on Kaggle][kaggle-gemma]
* [Gemma on Vertex Model Garden][vertex-mg-gemma]

**Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-2

예시: 좋은 모델 선별

In [12]:
def evaluate_model_quality(model_id: str) -> dict:
    """모델 품질 평가 체크리스트"""
    api = HfApi()
    info = api.model_info(model_id, securityStatus=True)
    card = ModelCard.load(model_id)
    
    quality = {
        "model_id": model_id,
        "downloads": info.downloads,
        "likes": info.likes,
        "has_license": card.data.license is not None,
        "has_model_card": len(card.text) > 100,
        "library": card.data.library_name,
        "tags": card.data.tags or [],
        "security_status": getattr(info, "security_repo_status", None),
    }
    
    # 점수 계산 (간단한 휴리스틱)
    score = 0
    if quality["downloads"] > 10000: score += 2
    if quality["likes"] > 100: score += 1
    if quality["has_license"]: score += 2
    if quality["has_model_card"]: score += 2
    if quality["library"] in ["transformers", "diffusers", "sentence-transformers"]: score += 1
    
    quality["quality_score"] = score
    return quality

# 사용 예시
result = evaluate_model_quality("google/gemma-2-27b-it")
print(result)

{'model_id': 'google/gemma-2-27b-it', 'downloads': 396313, 'likes': 559, 'has_license': True, 'has_model_card': True, 'library': 'transformers', 'tags': [], 'security_status': {'scansDone': True, 'filesWithIssues': [{'path': 'model-00005-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00009-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00004-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00008-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00003-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00001-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00011-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00010-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00007-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00002-of-00012.safetensors', 'level': 'unscanned'}, {'path': 'model-00006-of-00012.safetensors', 'level': 'unscanned'}]}, 'quality_score': 8}


### 모델 상세 정보 조회 

In [13]:
from huggingface_hub import HfApi

In [14]:
api = HfApi()

# 모델 메타데이터
info = api.model_info(
    "meta-llama/Llama-3.2-1B",
    securityStatus=True
)

info

ModelInfo(id='meta-llama/Llama-3.2-1B', author='meta-llama', sha='4e20de362430cd3b72f300e6b0f18e50e7166e08', created_at=datetime.datetime(2024, 9, 18, 15, 3, 14, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2024, 10, 24, 15, 8, 3, tzinfo=datetime.timezone.utc), private=False, disabled=False, downloads=1597282, downloads_all_time=None, gated='manual', gguf=None, inference=None, inference_provider_mapping=None, likes=2295, library_name='transformers', tags=['transformers', 'safetensors', 'llama', 'text-generation', 'facebook', 'meta', 'pytorch', 'llama-3', 'en', 'de', 'fr', 'it', 'pt', 'hi', 'es', 'th', 'arxiv:2204.05149', 'arxiv:2405.16406', 'license:llama3.2', 'text-generation-inference', 'endpoints_compatible', 'region:us'], pipeline_tag='text-generation', mask_token=None, card_data={'base_model': None, 'datasets': None, 'eval_results': None, 'language': ['en', 'de', 'fr', 'it', 'pt', 'hi', 'es', 'th'], 'library_name': 'transformers', 'license': 'llama3.2', 'license_

In [15]:
print(f"Model ID: {info.id}")
print(f"Author: {info.author}")
print(f"Downloads: {info.downloads:,}")
print(f"Likes: {info.likes}")
print(f"Tags: {info.tags}")
print(f"Last Modified: {info.last_modified}")
print(f"Library: {info.library_name}")
print(f"Pipeline: {info.pipeline_tag}")

# 파일 목록
print("\n=== Files ===")
for sibling in info.siblings:
    size = sibling.size
    size_str = f"{size:,} bytes" if isinstance(size, int) else "size unknown"
    print(f"  {sibling.rfilename} ({size_str})")

Model ID: meta-llama/Llama-3.2-1B
Author: meta-llama
Downloads: 1,597,282
Likes: 2295
Tags: ['transformers', 'safetensors', 'llama', 'text-generation', 'facebook', 'meta', 'pytorch', 'llama-3', 'en', 'de', 'fr', 'it', 'pt', 'hi', 'es', 'th', 'arxiv:2204.05149', 'arxiv:2405.16406', 'license:llama3.2', 'text-generation-inference', 'endpoints_compatible', 'region:us']
Last Modified: 2024-10-24 15:08:03+00:00
Library: transformers
Pipeline: text-generation

=== Files ===
  .gitattributes (size unknown)
  LICENSE.txt (size unknown)
  README.md (size unknown)
  USE_POLICY.md (size unknown)
  config.json (size unknown)
  generation_config.json (size unknown)
  model.safetensors (size unknown)
  original/consolidated.00.pth (size unknown)
  original/params.json (size unknown)
  original/tokenizer.model (size unknown)
  special_tokens_map.json (size unknown)
  tokenizer.json (size unknown)
  tokenizer_config.json (size unknown)


### Serverless API

In [19]:
from huggingface_hub import InferenceClient

client = InferenceClient(
    model = "Qwen/Qwen3-8B"
    )


In [20]:
completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
)

In [21]:
print(completion.choices[0].message)

ChatCompletionOutputMessage(role='assistant', content="<think>\nOkay, the user is asking for the capital of France. Let me think. I know that France is a country in Europe. The capital is a city that serves as the seat of government. I remember that Paris is the capital of France. But wait, I should make sure I'm not confusing it with another city. Let me recall some facts. France has several major cities like Lyon, Marseille, and Nice, but none of those are the capital. Paris is the largest city and the political, economic, and cultural center. The government buildings, like the Eiffel Tower and the Louvre, are in Paris. Also, the French president lives in the Elysée Palace in Paris. So yes, Paris is definitely the capital. I don't think there's any other city that could be considered the capital. Maybe the user is testing basic knowledge or just confirming. Either way, the answer is Paris. I should present it clearly and maybe add a bit of context to be helpful.\n</think>\n\nThe capi

In [22]:
print(completion.choices[0].message.content)

<think>
Okay, the user is asking for the capital of France. Let me think. I know that France is a country in Europe. The capital is a city that serves as the seat of government. I remember that Paris is the capital of France. But wait, I should make sure I'm not confusing it with another city. Let me recall some facts. France has several major cities like Lyon, Marseille, and Nice, but none of those are the capital. Paris is the largest city and the political, economic, and cultural center. The government buildings, like the Eiffel Tower and the Louvre, are in Paris. Also, the French president lives in the Elysée Palace in Paris. So yes, Paris is definitely the capital. I don't think there's any other city that could be considered the capital. Maybe the user is testing basic knowledge or just confirming. Either way, the answer is Paris. I should present it clearly and maybe add a bit of context to be helpful.
</think>

The capital of France is **Paris**. It is the largest city in the c

In [23]:
client = InferenceClient(
    model = "meta-llama/Llama-3.2-3B-Instruct"
    )

prompt="""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
The capital of France is<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
)

print(completion.choices[0].message.content)

The capital of France is Paris.


In [24]:
output = client.chat.completions.create(
    messages=[
        {"role": "user", "content": "The capital of France is"},
    ],
    stream=False,
    max_tokens=1024,
    extra_body={'thinking': {'type': 'disabled'}},
)
print(output.choices[0].message.content)

Paris.
