In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install google-cloud-speech

Collecting google-cloud-speech
  Downloading google_cloud_speech-2.23.0-py2.py3-none-any.whl (274 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/274.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/274.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.5/274.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: google-cloud-speech
Successfully installed google-cloud-speech-2.23.0


In [3]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/drive/MyDrive/PhD/Forced_alignment/mindful-server-408912-9b007c83d67b.json"

In [4]:
from google.cloud import speech_v1p1beta1 as speech
from google.cloud import storage

In [5]:
# Create a GCS client
storage_client = storage.Client()

# Set your GCS bucket name
bucket_name = "audio_files_srprki_jezik"

# List all objects in the specified bucket
blobs = storage_client.list_blobs(bucket_name)

# Dictionary to store file paths for each user and folder
wav_files_by_user_folder = {}

# Iterate through all objects in the bucket
for blob in blobs:
    if blob.name.lower().endswith('.wav'):
        # Extract user and folder names from the object's path
        user_folder_name, folder_name = blob.name.split('/')[0:2]

        # Initialize an empty dictionary if the user_folder is encountered for the first time
        if user_folder_name not in wav_files_by_user_folder:
            wav_files_by_user_folder[user_folder_name] = {}

        # Initialize an empty list if the folder is encountered for the first time
        if folder_name not in wav_files_by_user_folder[user_folder_name]:
            wav_files_by_user_folder[user_folder_name][folder_name] = []

        # Append the file path to the list
        wav_files_by_user_folder[user_folder_name][folder_name].append(blob.name)


In [6]:
# Create a SpeechClient
client = speech.SpeechClient()
language_code = 'sr-Latn'

# Configure the recognition request
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=44100,
    language_code=language_code,
    enable_word_time_offsets=True,
)

In [7]:
def process_transcript(audio_path, response):

    text_path = audio_path[:-4]
    output_file_name = f"/content/drive/MyDrive/PhD/Forced_alignment/transcript/{text_path}_transcript.txt"

    # Print the recognized words and timestamps
    for result in response.results:
        alternative = result.alternatives[0]
        with open(output_file_name, 'w') as output_file:
            output_file.write(f"Transcript: {result.alternatives[0].transcript}\n")

        for word_info in alternative.words:
            start_time = (
                word_info.start_time.seconds
                + word_info.start_time.microseconds * 1e-6
            )
            end_time = (
                word_info.end_time.seconds
                + word_info.end_time.microseconds * 1e-6
            )
            word = word_info.word
            # Redirecting the output to a text file
            with open(output_file_name, 'a') as output_file:
                output_file.write(f"Word: {word}, start: {start_time}, end: {end_time}\n")

        # Printing a message indicating where the output is saved
        #print(f"Output saved to {output_file_name}")


In [8]:
# Print or process the collected files by folder
for user_folder, folders in wav_files_by_user_folder.items():
    print(f"User: {user_folder}")
    print(f"Number of folders: {len(folders)}")

    # Create folder on drive
    folder_path = f"/content/drive/MyDrive/PhD/Forced_alignment/transcript/{user_folder}"
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
      os.makedirs(folder_path)
      print(f"Folder created: {folder_path}")


    for folder, files in folders.items():
      print(f"Folder: {folder}")
      print(f"Number of files: {len(files)}")

      # Create folder on drive
      folder_path = f"/content/drive/MyDrive/PhD/Forced_alignment/transcript/{user_folder}/{folder}"
      # Create the folder if it doesn't exist
      if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder created: {folder_path}")

      for audio_path in files:
        # Configure the audio input
        audio = speech.RecognitionAudio(uri=f"gs://{bucket_name}/{audio_path}")
        # Perform the speech recognition
        try:
          response = client.recognize(config=config, audio=audio)
          process_transcript(audio_path, response)
        except:
          print(audio_path)

User: 1052_Resampled
Number of folders: 5
Folder created: /content/drive/MyDrive/PhD/Forced_alignment/transcript/1052_Resampled
Folder: 0
Number of files: 59
Folder created: /content/drive/MyDrive/PhD/Forced_alignment/transcript/1052_Resampled/0
Folder: 1
Number of files: 60
Folder created: /content/drive/MyDrive/PhD/Forced_alignment/transcript/1052_Resampled/1
1052_Resampled/1/2_1_0_mvmd15n__19-04-21-05-48-07.wav
Folder: 2
Number of files: 59
Folder created: /content/drive/MyDrive/PhD/Forced_alignment/transcript/1052_Resampled/2
Folder: 3
Number of files: 62
Folder created: /content/drive/MyDrive/PhD/Forced_alignment/transcript/1052_Resampled/3
Folder: 4
Number of files: 62
Folder created: /content/drive/MyDrive/PhD/Forced_alignment/transcript/1052_Resampled/4
