In [1]:
!pip install pydub
!pip install SpeechRecognition transformers


Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.1-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.1


In [2]:
import os
from pydub import AudioSegment

def convert_all_to_pcm_wav(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith(".wav"):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)

            convert_to_pcm_wav(input_path, output_path)

def convert_to_pcm_wav(input_path, output_path):
    try:
        # Load the audio file
        sound = AudioSegment.from_file(input_path)

        # Export as PCM WAV
        sound.export(output_path, format="wav", codec="pcm_s16le")

        print(f"Conversion successful: {output_path}")

    except Exception as e:
        print(f"Conversion failed for {input_path}: {e}")

# Example usage
input_folder = "/content/drive/MyDrive/Audio3"
output_folder = "/content/drive/MyDrive/Audioclass3"

convert_all_to_pcm_wav(input_folder, output_folder)



Conversion successful: /content/drive/MyDrive/Audioclass3/4001.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4002.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4004.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4005.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4008.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4006.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4009.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4003.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4007.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4011.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4010.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4013.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4014.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4012.wav
Conversion successful: /content/drive/MyDrive/Audioclass3/4016

In [4]:
import os
import csv
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play

def convert_to_wav(input_path, output_path):
    sound = AudioSegment.from_file(input_path)
    sound.export(output_path, format="wav")

def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
    try:
        transcription = recognizer.recognize_google(audio_data)
        return transcription
    except sr.UnknownValueError:
        print(f"Unable to transcribe audio: {audio_path}")
        return None
    except sr.RequestError as e:
        print(f"Recognition request failed for {audio_path}: {e}")
        return None

def generate_transcripts_and_save_csv(input_folder):
    output_csv = os.path.join(input_folder, "transcripts.csv")

    with open(output_csv, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['Audio_File', 'Transcription'])

        for filename in os.listdir(input_folder):
            if filename.endswith((".mp3", ".wav")):
                audio_path = os.path.join(input_folder, filename)

                # Convert to WAV if the file is not in PCM WAV format
                if not audio_path.lower().endswith(".wav"):
                    wav_path = os.path.splitext(audio_path)[0] + ".wav"
                    convert_to_wav(audio_path, wav_path)
                    audio_path = wav_path

                transcribed_text = transcribe_audio(audio_path)

                if transcribed_text is not None:
                    csv_writer.writerow([filename, transcribed_text])

    print(f"Transcripts saved to: {output_csv}")

if __name__ == "__main__":
    main_folder = "/content/drive/MyDrive/Audioclass3"
    generate_transcripts_and_save_csv(main_folder)


Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4008.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4006.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4009.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4011.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4010.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4012.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4015.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4018.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4024.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4031.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4028.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4033.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4038.wav
Unable to transcribe audio: /content/drive/MyDrive/Audioclass3/4

In [8]:
!pip install flask-ngrok

from flask import Flask, render_template, request
from flask_ngrok import run_with_ngrok
import torch
from transformers import BertTokenizer, BertForSequenceClassification

app = Flask(__name__)
run_with_ngrok(app)  # Start ngrok when the app is run

# Load the pre-trained model and tokenizer
model_path = '/content/drive/MyDrive/callrecord/model'  # Replace with the actual path
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=4)
tokenizer = BertTokenizer.from_pretrained(model_path)

# Function to predict the category
def predict_category(text):
    inputs = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=512,
        padding='max_length',
        return_attention_mask=True,
        return_tensors='pt'
    )

    with torch.no_grad():
        outputs = model(inputs['input_ids'], attention_mask=inputs['attention_mask'])
        logits = outputs.logits

    predicted_class = torch.argmax(logits, dim=1).item()
    return predicted_class

# Flask routes
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    if request.method == 'POST':
        text = request.form['text']
        predicted_class = predict_category(text)
        return render_template('index.html', text=text, predicted_class=predicted_class)

if __name__ == '__main__':
    app.run()




OSError: Error no file named pytorch_model.bin, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory /content/drive/MyDrive/callrecord/model.