In [None]:
from huggingface_hub import HfApi, ModelFilter
from transformers import AutoModel

from datasets import load_dataset_builder, load_dataset

# Introduction to Hugging Face

# Transformers and the Hub

## Searching the Hub with Python

In [None]:
# Create the instance of the API
api = HfApi()

# Return the filtered list from the Hub
models = api.list_models(
    task="text-classification",
    sort="downloads",
    direction=-1,
  	limit=1
)

# Store as a list
modelList = list(models)

print(modelList[0].modelId)

## Saving a model

In [None]:
modelId = "distilbert-base-uncased-finetuned-sst-2-english"

# Instantiate the AutoModel class
model = AutoModel.from_pretrained(modelId)

# Save the model
model.save_pretrained(save_directory=f"models/{modelId}")

# Working with datasets

## Inspecting datasets

In [None]:
# Create the dataset builder
reviews_builder = load_dataset_builder("derenrich/wikidata-en-descriptions-small")

# Print the features
print(reviews_builder.info.features)

## Loading datasets

In [None]:
# Load the train portion of the dataset
wikipedia = load_dataset("wikimedia/wikipedia", "20231101.en", split="train")

print(f"The length of the dataset is {len(wikipedia)}")

## Manipulating datasets

In [None]:
# Filter the documents
filtered = wikipedia.filter(lambda row: "football" in row["text"])

# Create a sample dataset
example = filtered.select(range(1))

print(example[0]["text"])