# Gender Classification

In [1]:
import logging
from transformers import logging as hf_logging
from transformers import pipeline, logging as hf_logging
import torchaudio
from torchaudio.transforms import Resample
import numpy as np

hf_logging.set_verbosity_error()
logging.basicConfig(level=logging.ERROR)

def main():
    pipe = pipeline("audio-classification", model=gender_model_path)
    
    waveform, sample_rate = torchaudio.load(audio_file_path)
    
    if waveform.shape[0] > 1:
        waveform = waveform[0].unsqueeze(0)
    
    if sample_rate != 16000:
        resampler = Resample(orig_freq=sample_rate, new_freq=16000)
        waveform = resampler(waveform)
    
    waveform_np = waveform.numpy().squeeze()
    
    result = pipe(waveform_np)
    print(result[0]["label"])

if __name__ == "__main__":
    audio_file_path = r"Gender-Classification\Test-Dataset\37.wav"
    gender_model_path = r"Gender-Classification\model"
    main()

# Noise Detection

##### Small Model

In [None]:
import logging
from transformers import pipeline, logging as hf_logging
from transformers import Wav2Vec2Processor, Wav2Vec2ForAudioFrameClassification
import torch
import librosa
from scipy.stats import mode

hf_logging.set_verbosity_error()
logging.basicConfig(level=logging.ERROR)

noise_base_model = r"Noise-Detection\base_model"
processor = Wav2Vec2Processor.from_pretrained(noise_base_model)
model = Wav2Vec2ForAudioFrameClassification.from_pretrained(noise_base_model)

def read_audio(file_path, target_sr=16000):
    audio, sr = librosa.load(file_path, sr=target_sr)
    return audio

def predict(audio):
    inputs = processor(audio, return_tensors="pt", sampling_rate=16000, padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    probabilities = torch.softmax(logits, dim=-1)
    predicted_ids = torch.argmax(probabilities, dim=-1)
    most_common = mode(predicted_ids.numpy())[0][0]
    return model.config.id2label.get(most_common, "Manual Review Needed")

model.config.id2label = {0: "clean", 1: "noisy"}

file_path = r"Gender-Classification\Test-Dataset\12.wav"
audio = read_audio(file_path)
classification = predict(audio)
print("The audio was classified as:", classification)

##### Large Model

In [None]:
import logging
from transformers import pipeline, logging as hf_logging
from transformers import Wav2Vec2Processor, Wav2Vec2ForAudioFrameClassification
import torch
import librosa
from scipy.stats import mode

hf_logging.set_verbosity_error()
logging.basicConfig(level=logging.ERROR)

noise_large_model = r"Noise-Detection\large_model"
noise_large_model = "facebook/wav2vec2-large-960h-lv60-self"

processor = Wav2Vec2Processor.from_pretrained(noise_large_model)
model = Wav2Vec2ForAudioFrameClassification.from_pretrained(noise_large_model)

def read_audio(file_path, target_sr=16000):
    audio, sr = librosa.load(file_path, sr=target_sr)
    return audio

def predict(audio):
    inputs = processor(audio, return_tensors="pt", sampling_rate=16000, padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    probabilities = torch.softmax(logits, dim=-1)
    predicted_ids = torch.argmax(probabilities, dim=-1)
    most_common = mode(predicted_ids.numpy())[0][0]
    return model.config.id2label.get(most_common, "Manual Review Needed")

model.config.id2label = {0: "clean", 1: "noisy"}

file_path = r"Gender-Classification\Test-Dataset\12.wav"
audio = read_audio(file_path)
classification = predict(audio)
print("The audio was classified as:", classification)

##### Finetuned Model

In [None]:
import logging
from transformers import pipeline, logging as hf_logging
from transformers import Wav2Vec2Processor, Wav2Vec2ForAudioFrameClassification
import torch
import librosa
from scipy.stats import mode

hf_logging.set_verbosity_error()
logging.basicConfig(level=logging.ERROR)

noise_large_model = r"Noise-Detection\finetuned_model"

noise_large_model = "facebook/wav2vec2-large-xlsr-53"
processor = Wav2Vec2Processor.from_pretrained(noise_large_model)
model = Wav2Vec2ForAudioFrameClassification.from_pretrained(noise_large_model)

def read_audio(file_path, target_sr=16000):
    audio, sr = librosa.load(file_path, sr=target_sr)
    return audio

def predict(audio):
    inputs = processor(audio, return_tensors="pt", sampling_rate=16000, padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    probabilities = torch.softmax(logits, dim=-1)
    predicted_ids = torch.argmax(probabilities, dim=-1)
    most_common = mode(predicted_ids.numpy())[0][0]
    return model.config.id2label.get(most_common, "Manual Review Needed")

model.config.id2label = {0: "clean", 1: "noisy"}

file_path = r"Gender-Classification\Test-Dataset\12.wav"
audio = read_audio(file_path)
classification = predict(audio)
print("The audio was classified as:", classification)