# Loading the API

In [None]:
from huggingface_hub import HfApi 
import tqdm as notebook_tqdm

api = HfApi() 

#list(api.list_models())

# Filtering Models

In [None]:
from huggingface_hub import ModelFilter

# Return the filtered list from the Hub
models = api.list_models(
    filter=ModelFilter(task="text-classification"),
    sort="downloads",
    direction=-1,
  	limit=1
)

# Store as a list
modelList = list(models)

print(modelList[0].modelId)

# Downloading a Model

In [None]:
from transformers import AutoModel

modelId = "distilbert-base-uncased-finetuned-sst-2-english"

# Instantiate the AutoModel class
model = AutoModel.from_pretrained(modelId)

# Save the model
model.save_pretrained(save_directory=f"models/{modelId}")

# Woking with Datasets

Datasets can be consulted [here](https://huggingface.co/datasets). Each dataset has a dataset card, detailing itself.

The library *dataset* allows access, download, mutate, use and share all these datasets straight away. 

These datasets are usually quite big. Check them before downloading and mind the disk space.





In [None]:
from datasets import load_dataset_builder

data_builder = load_dataset_builder("derenrich/wikidata-en-descriptions-small")

print('Description: ', data_builder.info.description)
print('Features: ', data_builder.info.features)

In [None]:
from datasets import load_dataset 

data=load_dataset('imdb')


In [None]:
# With split parameter
data = load_dataset('imdb', split='train')

In [None]:
# Configuration parameter 
#data=load_dataset('wikipedia', '20231101.en')

This datasets are in apache arrow format -> efficient processing on large data and faster querying



# Pipelines 

Autoclasses are general classes for using Models, tokenizers, Configurations, Processors, Feature extractors. They are flexible and direct.

## AutoModels 

They are Auto classes to directly download a model 

AutoModel class for each type of task

In [None]:
from transformers import AutoModelForSequenceClassification 

model = AutoModelForSequenceClassification.from_pretrained(
    'distilbert-base-uncased-finetuned-sst-2-english'
)



## AutoTokenizers

Prepare text input data.

Recommended to use the tokenizer paired with the model. 



In [None]:
from transformers import AutoTokenizer 

tokenizer = AutoTokenizer.from_pretrained(
    'distilbert-base-uncased-finetuned-sst-2-english'    
)

## Pipeline Module 

Contains all task-specific steps 

Best for quickly performing tasks 

### Task Pipelines 

contains task specific pipeline for each task. These task pipelines leverage Auto classes. They download model and relevant processing.



In [None]:
from transformers import pipeline

from transformers import (
    SummarizationPipeline, 
    TextClassificationPipeline, 
    AudioClassificationPipeline,
    ImageSegmentationPipeline, 
    QuestionAnsweringPipeline
)

### Creating a pipeline 

When creating a pipeline you can specify a task or a model. This will end up in hf using default values for the missing param.

In [None]:
my_pipelne = pipeline(task='text-classification') 

my_pipeline = pipeline(model='distilbert-base-uncased-finetuned-sst-2-english')


In [None]:
my_pipelne = pipeline(task='text-classification',
                      model='distilbert-base-uncased-finetuned-sst-2-english')


In [None]:
from transformers import pipeline, AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    'distilbert-base-uncased-finetuned-sst-2-english'
)

my_pipeline = pipeline(model=model)

In [None]:
from transformers import pipeline 

my_pipeline = pipeline(task = 'text-classification', 
                        model='distilbert-base-uncased-finetuned-sst-2-english')

In [None]:
input = 'Hi, welcome to this course'

my_pipeline(input)