<a href="https://colab.research.google.com/github/renatgabitov/Google-Sheets-Custom-Formulas/blob/main/Transcribe_Videos_Represent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set Up

[Watch tutorial](https://youtu.be/_RUhxtGG83c)

Created by Renat Gabitov from [Represent](https://www.representstudio.com).

In [None]:
# Access Your Google Drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install assemblyai
import assemblyai as aai

In [None]:
# Add the API key.
aai.settings.api_key = 'XXXXXX_API_KEY_XXXXXXX'

In [None]:
# Enter the file path below.
file_path = "XXXXXX_FILE_URL_XXXXXX"


## File Naming

In [None]:
# Extract the filename from the input path
filename = file_path.split('/')[-1]

# Remove the file extension (everything after the last '.')
clean_filename = filename.rsplit('.', 1)[0]

# Get file extension
file_extension = filename.rsplit('.',1)[1]
print(file_extension)

# Get file folder
file_folder = file_path.split(filename)[0]

# Create the file naming structure
def create_filename(base_name, extension):

    # Create the final name structure
    final_name = f"{base_name} {clean_filename}{extension}"

    return final_name

print(create_filename("Base Name Example: ", ".txt"))

## Speed up file processing

In [None]:
# Convert video to audio to speed up upload


def extract_audio(original_file_path):
    # Install packages to conver audio
    !pip install ffmpeg

    import ffmpeg
    import subprocess

    # New file path
    global file_path
    file_path = original_file_path.rsplit('.',1)[0] + '.mp3'

    # Extract audio in MP3 format
    command = f"ffmpeg -i \"{original_file_path}\" -vn -ar 44100 -ac 2 -ab 48k -f mp3 \"{file_path}\""

    try:
        # Execute the ffmpeg command
        subprocess.run(command, shell=True, check=True)
        print(f"Audio extracted successfully! \n New File Path: {file_path}")

    except subprocess.CalledProcessError as e:
        # Handle errors in the subprocess
        print(f"An error occurred: {e}")

video_formats = ['mp4', 'mov', 'webm']

if file_extension.lower() in video_formats:
    extract_audio(file_path)

# Transcription

In [None]:
# Transcription settings

# Set general settings
config_general = {
    "language_code": "en", #specify language
    "speaker_labels": True, #identifies speakers
    "entity_detection": True, #entity detection
    "iab_categories": True, #topic detection
    "auto_chapters": True, #chapter detection
    "word_boost": ["aws", "azure", "google cloud"], #boost words for higher transcription accuracy
    "boost_param": "high"
}

config = aai.TranscriptionConfig(**config_general)

# Set custom spelling
config.set_custom_spelling(
  {
    # Format: Correct spelling | Common misspelling
    "OpenAI": ["open ai"],
    "GPT-4": ["gpt four"]
  }
)

# Initialize transcriber with the settings above
transcriber = aai.Transcriber(config=config)

# Transcribe

In [None]:
# Transcribe

transcript = transcriber.transcribe(file_path)

print("Transcription is complete.")

## Save a spreadsheet with breakdown by sentence

In [None]:
import csv

# Function to convert milliseconds to seconds format
def milliseconds_to_seconds(milliseconds):
    hours, remainder = divmod(milliseconds, 3600000)
    minutes, remainder = divmod(remainder, 60000)
    seconds, milliseconds = divmod(remainder, 1000)
    return f"{hours:02}:{minutes:02}:{seconds:02}"

# Define the CSV file name
csv_file_name = file_folder + create_filename("Sentences: ", ".csv")

# Open the CSV file for writing
with open(csv_file_name, 'w', newline='') as csvfile:
    # Create a CSV writer object
    csvwriter = csv.writer(csvfile)

    # Write the header row
    csvwriter.writerow(['Start Time', 'End Time', 'Speaker', 'Text'])

    # Iterate over each sentence and write a row for each
    for utterance in transcript.utterances:
        start_srt = milliseconds_to_seconds(int(utterance.start))
        end_srt = milliseconds_to_seconds(int(utterance.end))
        speaker = 'Speaker ' + utterance.speaker
        text = utterance.text

        # Write the sentence row to the CSV
        csvwriter.writerow([start_srt, end_srt, speaker, text])


## Save text-only transcript

In [None]:
paragraphs = transcript.get_paragraphs()

full_transcript = "\n\n".join(paragraph.text for paragraph in paragraphs)

# Save the the transcript

transcript_file_name = file_folder + create_filename("Transcript ",".txt")

with open(transcript_file_name, 'w') as file:
    file.write(full_transcript)


## Save Subtitles

In [None]:

srt = transcript.export_subtitles_srt(chars_per_caption=32)

subtitles_file_name = file_folder + create_filename("Subtitles: ", ".srt")

# Save the subtitle file
with open(subtitles_file_name, 'w') as file:
    file.write(srt)

## Save Chapters

In [None]:
chapters = transcript.chapters
chapter_strings = []
i = 0

chapters_file_name = file_folder + create_filename("Chapters ", ".txt")

# Get general chapter titles
for chapter in transcript.chapters:
    chapter_title = chapter.gist
    chapter_start = milliseconds_to_seconds(chapter.start)

    chapter_strings.append(f"{chapter_start} - {chapter_title}")

chapter_strings.append("\n\n")

# Get summaries
for chapter in transcript.chapters:
    i += 1
    chapter_title = chapter.gist
    chapter_time = milliseconds_to_seconds(chapter.start) + " --> " + milliseconds_to_seconds(chapter.end)
    chapter_summary = chapter.summary

    chapter_strings.append(f"#{i} {chapter_title}\n{chapter_time}\n{chapter_summary} \n\n")

full_chapters = "\n".join(chapter_strings)

with open(chapters_file_name, 'w') as file:
    file.write(full_chapters)