# Intro to Hugging face

Similar to what we previously did in [Intro to PyBuda Wrapping](./Intro_to_PyBudaWrapping.ipynb), Hugging Face abstracts out the model architecture, model structure, dataset and sometimes data loading. 

In [14]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

In [2]:
# Load model directly
from transformers import AutoImageProcessor, AutoModelForImageClassification

processor = AutoImageProcessor.from_pretrained("farleyknight/mnist-digit-classification-2022-09-04")
model = AutoModelForImageClassification.from_pretrained("farleyknight/mnist-digit-classification-2022-09-04")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from datasets import load_dataset

ds = load_dataset("ylecun/mnist")

In [4]:
train_dataset = ds['train']
test_dataset = ds['test']

In [5]:
PIL_to_tensor = transforms.PILToTensor()

In [22]:
class CustomDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        print('item: ', item.items())
        image = item['image']
        label = item['label']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [27]:
train_dataset = CustomDataset(train_dataset, transform=PIL_to_tensor)
test_dataset = CustomDataset(test_dataset, transform=PIL_to_tensor)

item:  dict_items([('image', <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x138061FD0>), ('label', 5)])


In [28]:
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

In [29]:
data, labels = next(iter(train_dataloader))

item:  dict_items([('image', <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x138568970>), ('label', 8)])
item:  dict_items([('image', <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x138568460>), ('label', 2)])
item:  dict_items([('image', <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x138568E50>), ('label', 9)])
item:  dict_items([('image', <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x138568520>), ('label', 5)])


In [30]:
print(labels)

tensor([8, 2, 9, 5])


In [None]:
output = model(data)
print(output)

### NLP Demo

[Official Hugging Face Demo](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/pytorch/translation.ipynb#scrollTo=EsoqUtVeh5u-)

### TTS Demo

[Official Hugging Face Demo](https://huggingface.co/docs/transformers/en/model_doc/speech_to_text)

In [None]:
#ripped straight from hugging face
import torch
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
from datasets import load_dataset

model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")

ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")

inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
generated_ids = model.generate(
    inputs["input_features"],
    attention_mask=inputs["attention_mask"],
    forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
)

translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
translation