<a href="https://colab.research.google.com/github/sudomason/ADSBexchange-MLAT/blob/main/Whisper_atcosim3v1.1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install transformers torch librosa soundfile

import os
import torch
from transformers import pipeline
import librosa
import numpy as np
from google.colab import files
import soundfile as sf

# Set up the pipeline
try:
    # Use GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = pipeline("automatic-speech-recognition", model="luigisaetta/whisper-atcosim3", device=device)
    print(f"Pipeline set up successfully on device: {device}")
except Exception as e:
    print(f"Error setting up pipeline: {str(e)}")
    raise

def transcribe_audio(file_path):
    try:
        # Load audio using soundfile
        audio, sr = sf.read(file_path)

        if len(audio) == 0:
            raise ValueError("The audio file is empty or could not be read correctly.")

        print(f"Processing {file_path}")
        print(f"Audio shape: {audio.shape}, Sample rate: {sr}")

        # Convert stereo to mono if necessary
        if len(audio.shape) > 1:
            audio = np.mean(audio, axis=1)

        # Resample to 16kHz if necessary
        if sr != 16000:
            print(f"Resampling from {sr} Hz to 16000 Hz")
            audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000)
            sr = 16000

        print(f"Processed audio shape: {audio.shape}, Sample rate: {sr}")

        # Normalize audio
        audio = (audio / np.max(np.abs(audio))).astype(np.float32)

        # Transcribe using the pipeline
        transcription = pipe({"raw": audio, "sampling_rate": sr})

        print(f"Raw pipeline output: {transcription}")

        if isinstance(transcription, dict) and "text" in transcription:
            return transcription["text"]
        elif isinstance(transcription, list) and len(transcription) > 0 and "text" in transcription[0]:
            return transcription[0]["text"]
        else:
            print(f"Unexpected transcription format: {type(transcription)}")
            return str(transcription)

    except Exception as e:
        print(f"An error occurred processing {file_path}: {str(e)}")
        print(f"Error type: {type(e).__name__}")
        return None

def upload_and_process_files():
    print("Please upload your .wav files:")
    uploaded = files.upload()

    results = {}

    for filename in uploaded.keys():
        if filename.lower().endswith('.wav'):
            with open(filename, 'wb') as f:
                f.write(uploaded[filename])
            transcription = transcribe_audio(filename)
            if transcription:
                results[filename] = transcription
            else:
                results[filename] = "Transcription failed or returned null"

    return results

# Main execution
try:
    transcriptions = upload_and_process_files()

    # Save all transcriptions to a single TXT file
    with open('transcriptions.txt', 'w') as f:
        for filename, transcription in transcriptions.items():
            f.write(f"File: {filename}\n")
            f.write(f"Transcription: {transcription}\n")
            f.write("-" * 50 + "\n")

    print("Transcriptions saved to 'transcriptions.txt'")
    files.download('transcriptions.txt')
except Exception as e:
    print(f"An error occurred during processing: {str(e)}")
    print(f"Error type: {type(e).__name__}")

In [None]:
# prompt: combine 'transcriptions' into single csv

import csv

# Create a CSV file with headers
with open('transcriptions.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['File', 'Transcription'])

    # Write each transcription to the CSV file
    for filename, transcription in transcriptions.items():
        writer.writerow([filename, transcription])

# Download the CSV file
files.download('transcriptions.csv')


Export as .txt and csv combined

In [None]:
# Install required libraries
!pip install transformers torch librosa soundfile pandas

import os
import torch
from transformers import pipeline
import librosa
import numpy as np
from google.colab import files
import soundfile as sf
import pandas as pd
from datetime import timedelta

# Set up the pipeline
try:
    # Use GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = pipeline("automatic-speech-recognition", model="luigisaetta/whisper-atcosim3", device=device)
    print(f"Pipeline set up successfully on device: {device}")
except Exception as e:
    print(f"Error setting up pipeline: {str(e)}")
    raise

def format_timestamp(seconds):
    return str(timedelta(seconds=round(seconds)))

def transcribe_audio(file_path):
    try:
        # Load audio using soundfile
        audio, sr = sf.read(file_path)

        if len(audio) == 0:
            raise ValueError("The audio file is empty or could not be read correctly.")

        print(f"Processing {file_path}")
        print(f"Audio shape: {audio.shape}, Sample rate: {sr}")

        # Convert stereo to mono if necessary
        if len(audio.shape) > 1:
            audio = np.mean(audio, axis=1)

        # Resample to 16kHz if necessary
        if sr != 16000:
            print(f"Resampling from {sr} Hz to 16000 Hz")
            audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000)
            sr = 16000

        print(f"Processed audio shape: {audio.shape}, Sample rate: {sr}")

        # Normalize audio
        audio = (audio / np.max(np.abs(audio))).astype(np.float32)

        # Transcribe using the pipeline with timestamps
        transcription = pipe({"raw": audio, "sampling_rate": sr}, return_timestamps=True)

        print(f"Raw pipeline output: {transcription}")

        if isinstance(transcription, dict) and "chunks" in transcription:
            return transcription["chunks"]
        else:
            print(f"Unexpected transcription format: {type(transcription)}")
            return []

    except Exception as e:
        print(f"An error occurred processing {file_path}: {str(e)}")
        print(f"Error type: {type(e).__name__}")
        return None

def upload_and_process_files():
    print("Please upload your .wav files:")
    uploaded = files.upload()

    results = {}

    for filename in uploaded.keys():
        if filename.lower().endswith('.wav'):
            with open(filename, 'wb') as f:
                f.write(uploaded[filename])
            transcription = transcribe_audio(filename)
            if transcription:
                results[filename] = transcription
            else:
                results[filename] = "Transcription failed or returned null"

    return results

# Main execution
try:
    transcriptions = upload_and_process_files()

    # Save individual transcriptions and prepare data for combined files
    all_transcriptions = []
    for filename, chunks in transcriptions.items():
        if isinstance(chunks, list):
            # Save individual transcript
            with open(f'{filename}_transcript.txt', 'w') as f:
                for chunk in chunks:
                    start_time = format_timestamp(chunk['timestamp'][0])
                    end_time = format_timestamp(chunk['timestamp'][1])
                    f.write(f"[{start_time} - {end_time}] {chunk['text']}\n")
            print(f"Individual transcript saved to '{filename}_transcript.txt'")
            files.download(f'{filename}_transcript.txt')

            # Prepare data for combined files
            for chunk in chunks:
                all_transcriptions.append({
                    'File': filename,
                    'Start Time': format_timestamp(chunk['timestamp'][0]),
                    'End Time': format_timestamp(chunk['timestamp'][1]),
                    'Text': chunk['text']
                })
        else:
            print(f"Skipping {filename}: {chunks}")

    # Save all transcriptions to a single TXT file
    with open('all_transcriptions.txt', 'w') as f:
        for trans in all_transcriptions:
            f.write(f"File: {trans['File']}\n")
            f.write(f"[{trans['Start Time']} - {trans['End Time']}] {trans['Text']}\n")
            f.write("-" * 50 + "\n")

    print("Combined transcriptions saved to 'all_transcriptions.txt'")
    files.download('all_transcriptions.txt')

    # Save all transcriptions to a single CSV file
    df = pd.DataFrame(all_transcriptions)
    df.to_csv('all_transcriptions.csv', index=False)
    print("Combined transcriptions saved to 'all_transcriptions.csv'")
    files.download('all_transcriptions.csv')

except Exception as e:
    print(f"An error occurred during processing: {str(e)}")
    print(f"Error type: {type(e).__name__}")