In [1]:
#!pip install torchaudio==0.13.0
#!pip install torch==1.13.0
import pyaudio
import wave
import time
import os
import whisper
from MODEL import GenderDetect_V01
import torch
from torch.utils.data import Dataset
import torchaudio
import soundfile
import random

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

class PRIDICTION:
    def __init__(self,audio_dir,transformation,target_sample_rate,num_samples,device):
        self.audio_dir = audio_dir
        self.device = device
        self.transformation = transformation.to(self.device)
        self.target_sample_rate = target_sample_rate
        self.num_samples = num_samples
            
        

    def __getitem__(self, audio_dir):

        signal, sr = torchaudio.load(audio_dir)
        #print("Signal Load:",signal)
        signal = signal.to(self.device)
        #print(signal.shape)
        signal = random.uniform(2,3)*signal
        signal = self._resample_if_necessary(signal, sr)
        signal = self._mix_down_if_necessary(signal)
        signal = self._cut_if_necessary(signal)
                
        signal = self._right_pad_if_necessary(signal)
        signal = self.transformation(signal)
        #print(type(signal))
        return signal

            
    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            signal = resampler(signal)
        return signal
            
    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

    def _cut_if_necessary(self, signal):
        if signal.shape[1] > self.num_samples:
            signal = signal[:, :self.num_samples]
        return signal

    def _right_pad_if_necessary(self, signal):
        length_signal = signal.shape[1]
        if length_signal < self.num_samples:
            num_missing_samples = self.num_samples - length_signal
            last_dim_padding = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_padding)
        return signal


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def predict_user_input(audio_dir,transformation,target_sample_rate,num_samples,device):
    #AUDIO_DIR = audio_dir
    
    
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
            


    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
                sample_rate=22050*3,
                n_fft=1024,
                hop_length=512,
                n_mels=80
            )

    user_input = PRIDICTION(audio_dir,mel_spectrogram,22050*3,22050*3,device)
    
            

    class_mapping = ["female",
                "male"]        


    def predict(model, input,class_mapping):
        model.eval()
        with torch.no_grad():
            
            predictions = model(input)
                    
           
            predicted_index = predictions[0].argmax(0)
            predicted = class_mapping[predicted_index]

        return predicted


            
    GD = GenderDetect_V01(input_shape=1,   
                  output_shape=len(class_mapping))
    state_dict_saved = torch.load("model_0_state.pth")

    GD.load_state_dict(state_dict_saved)


    input = user_input.__getitem__(audio_dir)   # [batch size, num_channels, fr, time]
    input.unsqueeze_(0)
    predicted = predict(GD, input,class_mapping)
    print(f"Predicted: '{predicted}'")


In [3]:
while True:
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
                sample_rate=22050*3,
                n_fft=1024,
                hop_length=512,
                n_mels=80
            )
    
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
        
    SAMPLE_RATE = 22050*3
    NUM_SAMPLES = 22050*3

    choice = int(input("\nEnter 1.To record\n2.To select an existing file:"))
    if choice == 1:

        try:
            serial = open("Record_Serial.txt","r")
            s = serial.read()
        except:
            serial = open("Record_Serial.txt","w")
            serial.write("1")
            s = 1
            s = str(s)
        finally:
            serial.close()
        file = s+".wav"
        path = os.path.join("D:\PyTorch_Try_02\AUDIO\input",file) 
        
        audio = pyaudio.PyAudio()

        stream = audio.open(format = pyaudio.paInt16,channels = 1,rate = 44100,input= True, frames_per_buffer = 1024)
        frames = []
        try:
            while True:
                data = stream.read(1024)
                frames.append(data)

        except KeyboardInterrupt:
            pass
        wf = wave.open(path,"wb")
        wf.setnchannels(1)
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(44100)
        wf.writeframes(b''.join(frames))

        stream.stop_stream()
        stream.close()
        wf.close()
        audio.terminate()

        serial = open("Record_Serial.txt","w")
        s = int(s)
        s = s+1
        s = str(s)
        serial.write(s)
        serial.close()

        #predict call    
        predict_user_input(path,mel_spectrogram,SAMPLE_RATE,NUM_SAMPLES,device)
        
        transcribe = input("Do want to transcribe:")
        if transcribe == 'Y' or transcribe == 'y':
            model = whisper.load_model("base")
            result = model.transcribe(path)
            print(result["text"])
        

    else:
        path = input("Enter path of 'wav' file(format eg->D:\Folder_Name\1.wav):")
        if os.path.exists(path):
        #predict call
            #print(path)
            predict_user_input(path,mel_spectrogram,SAMPLE_RATE,NUM_SAMPLES,device)
            transcribe = input("Do want to transcribe:")
            if transcribe == 'Y' or transcribe == 'y':
                model = whisper.load_model("base")
                result = model.transcribe(path)
                print(result["text"])

        else:
            print("Invalid Path!!")
    conti = str(input("Do you want to try again??(Y/N):"))
    if conti != "Y" and conti != "y":
        print("Thank You!!")
        break



Enter 1.To record
2.To select an existing file:2
Enter path of 'wav' file(format eg->D:\PyTorch_Try_02\AUDIO.wav):D:\PyTorch_Try_02\AUDIO\test\female\B4_1.wav
Predicted: 'male'
Do want to transcribe:y




 And my Furby Boom Tai, which is a toy that lives with me like my pet. All the inspiration around me gives me ideas and motivates me to keep doing something unique. Like, to write songs with my own, to make my school notes on cool post-its, to write songs to learn my faith in school topics and to decorate my books.
Do you want to try again??(Y/N):n
Thank You!!
