<a href="https://colab.research.google.com/github/kamalesh003/NoiseCancellationTranscriptionModel/blob/main/Tesla_Oscillator_Noise_Cancellation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install soundfile torch transformers librosa



In [9]:
import numpy as np
import scipy.signal as signal
import soundfile as sf
import librosa
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC

class NoiseCancellationTranscriptionModel:
    def __init__(self, model_name_or_path="facebook/wav2vec2-large-960h",
                 center_freq=1000.0, bandwidth=200.0, order=4,
                 sampling_rate=16000):
        self.processor = Wav2Vec2Processor.from_pretrained(model_name_or_path)
        self.model = Wav2Vec2ForCTC.from_pretrained(model_name_or_path)
        self.center_freq = center_freq
        self.bandwidth = bandwidth
        self.order = order
        self.sampling_rate = sampling_rate

    def apply_bandstop_filter(self, audio_data):
        # Normalize frequencies to Nyquist rate
        nyquist_rate = 0.5 * self.sampling_rate
        normalized_center_freq = self.center_freq / nyquist_rate
        normalized_bandwidth = self.bandwidth / nyquist_rate

        # Design the Butterworth bandstop filter
        b, a = signal.butter(self.order, [normalized_center_freq - normalized_bandwidth/2,
                                          normalized_center_freq + normalized_bandwidth/2], btype='bandstop')

        # Apply the filter to the audio data
        filtered_audio = signal.lfilter(b, a, audio_data)

        return filtered_audio

    def transcribe_audio(self, audio_file):
        # Load audio file and apply noise cancellation
        audio_data, _ = librosa.load(audio_file, sr=self.sampling_rate)
        filtered_audio = self.apply_bandstop_filter(audio_data)

        # Perform speech-to-text transcription
        inputs = self.processor(filtered_audio.tolist(), sampling_rate=self.sampling_rate, return_tensors="pt", padding=True)

        with torch.no_grad():
            logits = self.model(inputs.input_values).logits

        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = self.processor.batch_decode(predicted_ids)

        return transcription


In [10]:

# Example usage:
if __name__ == "__main__":
    # Initialize the integrated model
    model = NoiseCancellationTranscriptionModel()

    # Specify the input audio file
    audio_file = '/content/eng_m10.wav'

    # Perform noise cancellation and transcription
    transcription = model.transcribe_audio(audio_file)

    print("Transcription:", transcription)


Some weights of the model checkpoint at facebook/wav2vec2-large-960h were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You s

Transcription: ["PLANS THOUGH WELL UNDERWAY FOR RACES TO MARS AN THE MOON IN NINETEEN NINETY TWO BY SOLA'S SALE THE RACE TO MARS IS TO COMMEMORATE CALUMITY'S JOURNEY TO THE NEW WORLD FIVE HUNDRED YEARS AGO AND THE WANT OF THE MOON IS TO PROMOTE THE USE OF SOLA SALES IN SPACE EXPLORATION"]
