In [None]:
import pandas as pd
import numpy as np
import torch
import torchaudio
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
import time
import warnings

warnings.filterwarnings('ignore')
torchaudio.set_audio_backend("sox_io")

# Carregar o modelo treinado e o processador
processor = Wav2Vec2Processor.from_pretrained("/content/command_processor")
model = Wav2Vec2ForSequenceClassification.from_pretrained("/content/command_model").to("cuda")


In [None]:
def pad_waveform(waveform, target_length):
    current_length = waveform.shape[1]
    if current_length < target_length:
        padding = target_length - current_length
        waveform = torch.nn.functional.pad(waveform, (0, padding), "constant", 0)
    return waveform

def extract_features(audio_file, target_length=144648):  # Utilize o valor de LONGEST_LENGTH calculado anteriormente
    waveform, sample_rate = torchaudio.load(audio_file)
    waveform = pad_waveform(waveform, target_length)
    input_values = processor(waveform.squeeze().numpy(), sampling_rate=sample_rate, return_tensors="pt", padding=True).input_values
    return input_values

def collate_fn(batch):
    input_values = [item[0] for item in batch]
    labels = [item[1] for item in batch]
    input_values = torch.nn.utils.rnn.pad_sequence(input_values, batch_first=True, padding_value=processor.tokenizer.pad_token_id)
    labels = torch.tensor(labels)
    return input_values, labels


In [None]:
class CommandDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.resampler = T.Resample(orig_freq=48000, new_freq=16000)
        self.label_map = {cmd: idx for idx, cmd in enumerate(test_df['class'].unique())}

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        audio_path = self.dataset.iloc[idx]['path']
        command_class = self.dataset.iloc[idx]['class']
        label = self.label_map.get(command_class, -1)
        if label == -1:
            raise ValueError(f"Unknown class: {command_class}")

        waveform, sample_rate = torchaudio.load(audio_path)

        if sample_rate != 16000:
            waveform = self.resampler(waveform)

        input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors='pt').input_values
        return input_values.squeeze(), label


In [None]:
test_df = pd.read_csv("/content/test_data.csv")

test_dataset = CommandDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
def evaluate_pipeline2(model, processor, dataloader, valid_commands):
    model.eval()
    predictions = []
    references = []
    inference_times = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_values, labels = batch
            input_values = input_values.to(device)
            labels = labels.to(device)

            start_time = time.time()
            outputs = model(input_values)
            logits = outputs.logits
            predicted_ids = torch.argmax(logits, dim=-1)
            end_time = time.time()

            inference_times.append(end_time - start_time)
            predictions.extend(predicted_ids.cpu().numpy())
            references.extend(labels.cpu().numpy())

    valid_label_map = {label: idx for idx, label in enumerate(valid_commands)}
    reverse_label_map = {idx: label for label, idx in valid_label_map.items()}

    valid_predictions = [reverse_label_map.get(p, "DESCONHECIDO") for p in predictions]
    valid_references = [reverse_label_map.get(r, "DESCONHECIDO") for r in references]

    filtered_predictions = [p for p in valid_predictions if p in valid_commands]
    filtered_references = [r for r in valid_references if r in valid_commands]

    report = classification_report(filtered_references, filtered_predictions, labels=valid_commands, zero_division=0)
    mean_inference_time = np.mean(inference_times)
    std_inference_time = np.std(inference_times)
    unknown_commands_percentage = (valid_predictions.count("DESCONHECIDO") / len(valid_predictions)) * 100

    print("Classification Report:")
    print(report)
    print(f"Mean inference time: {mean_inference_time}")
    print(f"Std inference time: {std_inference_time}")
    print(f"Unknown commands: {unknown_commands_percentage:.2f}%")

# Avaliar o modelo no conjunto de teste
evaluate_pipeline2(model, processor, test_loader, test_df['class'].unique().tolist())
