# SpeedScribe Colab

___

<a href="https://fakeyou.com/"><img src="https://fakeyou.com/fakeyou/FakeYou-Logo.png" alt="FakeYou Logo. Click here to go to the official website."></a>


This is a ASR transcription notebook for Tacotron2 and similar TTS models such as VITS

Notebook author - [justinjohn-03](https://github.com/justinjohn0306)
___

In [None]:
# @markdown ### Clone GitHub repository
!git clone https://github.com/justinjohn0306/SpeedScribe.git
!pip install faster-whisper ctranslate2==4.4.0

# Add the repository path to Python’s search path
import sys
sys.path.append('/content/SpeedScribe')

# Import the ASR function from the script
from SpeedScribe import execute_asr


In [None]:
# @markdown ### Choose the source of the WAV files
import os
source_option = "Google Drive"  # @param ["Google Drive", "Local"]

if source_option == "Google Drive":
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Provide the path to the ZIP file in Google Drive
    zip_file = "/content/drive/MyDrive/path_to_your_zip_file.zip"  # @param {type:"string"}

else:
    # Upload the ZIP file locally
    from google.colab import files
    uploaded = files.upload()
    zip_file = next(iter(uploaded))  # Get the uploaded file name

# Get the name of the ZIP file without the extension
zip_filename = os.path.splitext(os.path.basename(zip_file))[0]

# Set the output folder based on the ZIP file's name
output_folder = f"/content/{zip_filename}"

# Function to handle unzipping
def unzip_files(zip_file, extract_to="./dataset"):
    import zipfile
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Files extracted to {extract_to}")
    return extract_to

extract_to = unzip_files(zip_file, extract_to=output_folder)


In [None]:
# @markdown ### Define the parameters for transcription

# Available model sizes
model_size = "faster-whisper-large-v3-turbo-ct2"  # @param ["tiny", "base", "small", "medium", "large-v2", "large-v3", "faster-whisper-large-v3-turbo-ct2"]

# Select the language for transcription
language_name = "English"  # @param ["Automatic Detection", "Afrikaans", "Amharic", "Arabic", "Assamese", "Azerbaijani", "Bashkir", "Belarusian", "Bulgarian", "Bengali", "Tibetan", "Breton", "Bosnian", "Catalan", "Czech", "Welsh", "Danish", "German", "Greek", "English", "Spanish", "Estonian", "Basque", "Persian", "Finnish", "Faroese", "French", "Galician", "Gujarati", "Hausa", "Hawaiian", "Hebrew", "Hindi", "Croatian", "Haitian Creole", "Hungarian", "Armenian", "Indonesian", "Icelandic", "Italian", "Japanese", "Javanese", "Georgian", "Kazakh", "Khmer", "Kannada", "Korean", "Latin", "Luxembourgish", "Lingala", "Lao", "Lithuanian", "Latvian", "Malagasy", "Maori", "Macedonian", "Malayalam", "Mongolian", "Marathi", "Malay", "Maltese", "Burmese", "Nepali", "Dutch", "Norwegian Nynorsk", "Norwegian", "Occitan", "Punjabi", "Polish", "Pashto", "Portuguese", "Romanian", "Russian", "Sanskrit", "Sindhi", "Sinhala", "Slovak", "Slovenian", "Shona", "Somali", "Albanian", "Serbian", "Sundanese", "Swedish", "Swahili", "Tamil", "Telugu", "Tajik", "Thai", "Turkmen", "Tagalog", "Turkish", "Tatar", "Ukrainian", "Urdu", "Uzbek", "Vietnamese", "Yiddish", "Yoruba", "Chinese", "Cantonese"]

# Define the precision
precision = "float16"  # @param ["float16", "float32", "int8"]


In [None]:
# @markdown ### Start the transcription process
import os
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="huggingface_hub")

# Get the name of the ZIP file without the extension
zip_filename = os.path.splitext(os.path.basename(zip_file))[0]

# Set the output folder to be based on the ZIP file's name
output_folder = f"/content/{zip_filename}"

# Run the transcription process
output_file_path = execute_asr(extract_to, output_folder, model_size, language_name, precision)

# Set the final output file path
final_output_file_path = os.path.join(output_folder, f"{zip_filename}.txt")
os.rename(output_file_path, final_output_file_path)

# Download the transcription file
from google.colab import files
files.download(final_output_file_path)


# Transcribe using SenseVoice

In [None]:
# @markdown ### Clone GitHub repository
!git clone https://github.com/justinjohn0306/SpeedScribe.git
!pip install funasr==1.1.3 modelscope

# Add the repository path to Python’s search path
import sys
sys.path.append('/content/SpeedScribe')

# Import the ASR function from the script
from sensevoice import execute_asr


In [None]:
# @markdown ### Choose the source of the WAV files
import os
source_option = "Google Drive"  # @param ["Google Drive", "Local"]

if source_option == "Google Drive":
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Provide the path to the ZIP file in Google Drive
    zip_file = "/content/drive/MyDrive/test.zip"  # @param {type:"string"}

else:
    # Upload the ZIP file locally
    from google.colab import files
    uploaded = files.upload()
    zip_file = next(iter(uploaded))  # Get the uploaded file name

# Get the name of the ZIP file without the extension
zip_filename = os.path.splitext(os.path.basename(zip_file))[0]

# Set the output folder based on the ZIP file's name
output_folder = f"/content/{zip_filename}"

# Function to handle unzipping
def unzip_files(zip_file, extract_to="./dataset"):
    import zipfile
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Files extracted to {extract_to}")
    return extract_to

extract_to = unzip_files(zip_file, extract_to=output_folder)


In [None]:
# @markdown ### Define the parameters for transcription

# Select the language for transcription
language_name = "ja"  # @param ["auto", "zh", "en", "ja", "yue", "ko"]

# Define the precision
device = "cuda"  # @param ["cuda", "cpu"]


In [None]:
# @markdown ### Start the transcription process
import os
import logging
import warnings

logging.getLogger("modelscope").setLevel(logging.ERROR)
warnings.filterwarnings("ignore", category=UserWarning, module="modelscope")

zip_filename = os.path.splitext(os.path.basename(zip_file))[0]

output_folder = "/content/output_transcriptions"

os.makedirs(output_folder, exist_ok=True)

output_file_path = execute_asr(extract_to, output_folder, language_name, device)

final_output_file_path = os.path.join(output_folder, f"{zip_filename}.txt")

os.rename(output_file_path, final_output_file_path)

# Download the transcription file
from google.colab import files
files.download(final_output_file_path)


# **Misc**

In [None]:
#@markdown ### **Dot-processing for unprocessed transcripts**
#@markdown This section allows you to add dots at the end of each line in your transcript file.
#@markdown You can upload the transcript file from your local system and process it here.

from google.colab import files

# Upload your transcript file from your local system
uploaded = files.upload()

if uploaded:
    input_filename = next(iter(uploaded))
    output_filename = "processed_" + input_filename

    # Function to ensure each line in the transcript ends with a dot
    def DotsAdderTotxtFile(input_file, output_file):
        with open(input_file, 'r') as f_input, open(output_file, 'w') as f_output:
            for line in f_input:
                line = line.strip()
                # Add a dot only if the line doesn't end with '.', '?', or '!'
                if not line.endswith(('.', '?', '!')):
                    line += '.'
                f_output.write(line + '\n')

    # Call the function with the uploaded file
    DotsAdderTotxtFile(input_filename, output_filename)

    print(f"Dots Processing complete! Processed file: {output_filename}")
    print("You can download the processed file now.")

    # Provide download link for the processed file
    files.download(output_filename)
else:
    print("No file uploaded. Please upload a transcript file to process.")
