# 1. 강의영상 

# 2. ref 

ref: <https://huggingface.co/docs/transformers/tasks/image_classification>

# 3. imports 

In [1]:
import datasets
import transformers
import torchvision.transforms
import evaluate
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# 4. 코드정리 

In [4]:
32000*3

96000

In [None]:
## Step1 
minds = datasets.load_dataset("PolyAI/minds14", name="en-US", split="train")
minds = minds.train_test_split(test_size=0.2)
minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
minds = minds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
feature_extractor = transformers.AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
    )
    return inputs
encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
encoded_minds = encoded_minds.rename_column("intent_class", "label")
## Step2 
labels = minds["train"].features["intent_class"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label
num_labels = len(id2label)
model = transformers.AutoModelForAudioClassification.from_pretrained(
    "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
)
## Step3 
accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
training_args = transformers.TrainingArguments(
    output_dir="my_awesome_mind_model",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    #warmup_ratio=0.1,
    warmup_steps=500,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)
trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_minds["train"],
    eval_dataset=encoded_minds["test"],
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)
trainer.train()
## Step4 

In [9]:
# Step4 
dataset = datasets.load_dataset("PolyAI/minds14", name="en-US", split="train")
dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16000))
sampling_rate = dataset.features["audio"].sampling_rate
audio_file = dataset[0]["audio"]["path"]
classifier = transformers.pipeline("audio-classification", model="my_awesome_mind_model/checkpoint-140")
classifier(audio_file)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'score': 0.07800574600696564, 'label': 'cash_deposit'},
 {'score': 0.07708185911178589, 'label': 'freeze'},
 {'score': 0.0737047791481018, 'label': 'business_loan'},
 {'score': 0.07282004505395889, 'label': 'direct_debit'},
 {'score': 0.0727161318063736, 'label': 'atm_limit'}]

# 5. 살펴보기 