<a href="https://colab.research.google.com/github/sanailahi1/openai.fm/blob/main/OpenAI_fm_automated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# installations!
!pip install selenium pydub
!apt-get update
!apt install chromium-chromedriver

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import UnexpectedAlertPresentException, TimeoutException, NoAlertPresentException

import os
from selenium.webdriver.chrome.options import Options
from pydub import AudioSegment
import pygame
import re
import glob, time



# 1. Ensure folder exists
download_dir = '/content'
os.makedirs(download_dir, exist_ok=True)

# 2. Chrome options

chrome_options = webdriver.ChromeOptions()
 # Set the path to Chrome binary
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--headless")
chrome_options.add_experimental_option("prefs", {
    "download.default_directory": download_dir,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "profile.default_content_settings.popups": 0,
    "safebrowsing.enabled": True,
    "safebrowsing.disable_download_protection": True
})

driver = webdriver.Chrome(service=Service(),
                          options=chrome_options)
# Website URL
website_url = "https://www.openai.fm"

# Open main website tab once.
driver.get(website_url)
time.sleep(3)  # Wait for page load

# Function to split text into parts of up to 900 chars, splitting at line breaks for flow
def split_text_into_parts(text, max_length=900):

    """
    Splits the input text into parts, each not exceeding the character limit.
    Each part ends at the last complete sentence (ending with a dot).
    If no dot is found, splits at the last word boundary (space).
    If neither is found, includes the unsplit segment at the start of the next part.
    """
    parts = []
    start = 0
    text_length = len(text)


    while start < text_length:
        end = min(start + max_length, text_length)
        part = text[start:end]

        # Try to split at the last dot
        last_dot = part.rfind('.')
        if last_dot != -1:
            split_point = start + last_dot + 1
        else:
            # Try to split at the last space
            last_space = part.rfind(' ')
            if last_space != -1 and end < text_length:
                split_point = start + last_space
            else:
                # No suitable split point found, extend to next dot after limit
                next_dot = text.find('.', end)
                if next_dot != -1:
                    split_point = next_dot + 1
                else:
                    # No dot found, take the rest of the text
                    split_point = text_length

        # If split_point didn't advance, avoid infinite loop by moving at least one character
        if split_point == start:
            split_point = min(start + max_length, text_length)

        # Append the part and update the start index
        parts.append(text[start:split_point].strip())
        start = split_point

    return  parts




# Function to remove non-BMP characters from a string
def remove_non_bmp_characters(text):
    return re.sub(r'[^\u0000-\uFFFF]', '', text)

# Text to convert
text_to_convert = input("Enter the text to convert: ")


# Split the text into parts
text_parts = split_text_into_parts(text_to_convert)

# List to store downloaded file paths (though not used for merging in this cell anymore)
downloaded_files = []

# Create multiple tabs based on the number of text parts
for _ in range(len(text_parts) - 1):  # Create additional tabs (one less since the first tab is already open)
    driver.execute_script("window.open('');")

# Perform initial setup (clicks and placeholder removal) on each tab
for i, part in enumerate(text_parts):
    driver.switch_to.window(driver.window_handles[i])  # Switch to the respective tab

    # Open the website in the current tab
    driver.get(website_url)
    time.sleep(3)  # Wait for the page to load

    # Perform initial setup: click voice and clear the placeholder
    try:
        character_name = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "/html/body/div/main/div[1]/div/div[2]/div/div[6]/div"))
        )
        character_name.click()

        instructions_field = WebDriverWait(driver, 10).until(
             EC.element_to_be_clickable((By.XPATH, "/html/body/div/main/div[2]/div[1]/div[2]/div/textarea"))
        )
        # Clear the placeholder text
        instructions_field.clear()
        # Fill in the instructions field with a placeholder text
        instructions_field.send_keys('''Affect/personality: A cheerful guide

Tone: Friendly, clear, and reassuring, creating a calm atmosphere and making the listener feel confident and comfortable.

Pronunciation: Clear, articulate, and steady, ensuring each instruction is easily understood while maintaining a natural, conversational flow.

Pause: Brief, purposeful pauses after key instructions (e.g., "cross the street" and "turn right") to allow time for the listener to process the information and follow along.

Emotion: Warm and supportive, conveying empathy and care, ensuring the listener feels guided and safe throughout the journey.''')  # Adjust as needed
        time.sleep(0.5)  # Wait for the field to be filled

        text_input = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, "prompt"))
        )
        text_input.clear()

        # Remove non-BMP characters from the text part
        sanitized_part = remove_non_bmp_characters(part)

        # Fill in the text input with the sanitized part text
        text_input.send_keys(sanitized_part)

        # Wait until the generate button is clickable and then click it
        generate_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "/html/body/div/footer/div/div/div[3]/div"))
        )

        # Click the generate button
        generate_button.click()

        # Add a wait for the audio to be generated and the download button to appear
        download_button = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "/html/body/div/footer/div/div/div[1]"))
        )

        # Click the download button
        download_button.click()

    except UnexpectedAlertPresentException:
        print("Caught an unexpected alert. Attempting to accept it.")
        try:
            alert = driver.switch_to.alert
            alert_text = alert.text
            print(f"Alert Text: {alert_text}")
            alert.accept() # Or alert.dismiss() depending on desired behavior
            print("Alert accepted.")
        except NoAlertPresentException:
            print("No alert was present after catching UnexpectedAlertPresentException.")
        # Continue to the next iteration or handle the error as needed
        continue # Skip the rest of the loop for this part and move to the next tab/part
    except TimeoutException:
        print(f"Timeout waiting for element on tab {i+1}. Skipping this part.")
        continue # Skip the rest of the loop for this part and move to the next tab/part
    except Exception as e:
        print(f"An unexpected error occurred on tab {i+1}: {e}")
        continue # Skip the rest of the loop for this part and move to the next tab/part


# Close all tabs and quit the browser
for handle in driver.window_handles:
    driver.switch_to.window(handle)
    driver.close()

# Quit the browser
driver.quit()



In [None]:
from pydub import AudioSegment
import glob
import os

# Path to your folder containing audio files
folder_path = "/content" #current directory
output_path = "merged_output.wav"

# Grab all .mp3 files in folder (sorted alphabetically)
audio_files = sorted(glob.glob(os.path.join(folder_path, "*.mp3")))
#remove the last element in the list of audios and add it before the first element
audio_files.insert(0, audio_files.pop())

# Initialize an empty AudioSegment
combined = AudioSegment.empty()

print(f"Found {len(audio_files)} .mp3 files to attempt to merge.")

# Loop through and append each file, trying to load as WAV first, then MP3
for audio_file in audio_files:
    print(f"Attempting to process: {audio_file}")
    try:
        # Try loading as WAV first, as suggested by manual download behavior
        audio = AudioSegment.from_wav(audio_file)
        combined += audio
        print(f"Successfully processed as WAV: {audio_file}")
    except Exception as e_wav:
        print(f"Could not process {audio_file} as WAV: {e_wav}")
        try:
            # If WAV fails, try loading as MP3
            audio = AudioSegment.from_mp3(audio_file)
            combined += audio
            print(f"Successfully processed as MP3: {audio_file}")
        except Exception as e_mp3:
            print(f"Could not process {audio_file} as MP3 either: {e_mp3}")
            print(f"Skipping file: {audio_file} due to decoding errors.")
            pass # Skip the file if it cannot be processed as either WAV or MP3

# Export the result as WAV (since the content is likely WAV)
combined.export(output_path, format="wav")

print(f"Attempted to merge {len(audio_files)} files into {output_path}")
# You might want to add a check here to see if combined is still empty
if len(combined) > 0:
    print("Merging seems successful.")
else:
    print("Merging resulted in an empty output. Check downloaded files and error messages above.")

Found 10 .mp3 files to attempt to merge.
Attempting to process: /content/openai-fm-fable-audio.mp3
Successfully processed as WAV: /content/openai-fm-fable-audio.mp3
Attempting to process: /content/openai-fm-fable-audio (1).mp3
Successfully processed as WAV: /content/openai-fm-fable-audio (1).mp3
Attempting to process: /content/openai-fm-fable-audio (2).mp3
Successfully processed as WAV: /content/openai-fm-fable-audio (2).mp3
Attempting to process: /content/openai-fm-fable-audio (3).mp3
Successfully processed as WAV: /content/openai-fm-fable-audio (3).mp3
Attempting to process: /content/openai-fm-fable-audio (4).mp3
Successfully processed as WAV: /content/openai-fm-fable-audio (4).mp3
Attempting to process: /content/openai-fm-fable-audio (5).mp3
Successfully processed as WAV: /content/openai-fm-fable-audio (5).mp3
Attempting to process: /content/openai-fm-fable-audio (6).mp3
Successfully processed as WAV: /content/openai-fm-fable-audio (6).mp3
Attempting to process: /content/openai-fm-f

In [None]:

# List all files and directories in the /content folder, just to check if everything is write
print("Contents of /content:")
for item in os.listdir('/content'):
    item_path = os.path.join('/content', item)
    if os.path.isfile(item_path):
        print(f"- {item} (file, size: {os.path.getsize(item_path)} bytes)")
    elif os.path.isdir(item_path):
        print(f"- {item} (directory)")

Contents of /content:
- .config (directory)
- openai-fm-fable-audio.mp3 (file, size: 2839244 bytes)
- openai-fm-fable-audio (5).mp3 (file, size: 2042444 bytes)
- openai-fm-fable-audio (2).mp3 (file, size: 3165644 bytes)
- openai-fm-fable-audio (8).mp3 (file, size: 3187244 bytes)
- openai-fm-fable-audio (4).mp3 (file, size: 3585644 bytes)
- merged_output.wav (file, size: 28068044 bytes)
- openai-fm-fable-audio (3).mp3 (file, size: 3124844 bytes)
- openai-fm-fable-audio (9).mp3 (file, size: 417644 bytes)
- openai-fm-fable-audio (7).mp3 (file, size: 3540044 bytes)
- openai-fm-fable-audio (6).mp3 (file, size: 3295244 bytes)
- openai-fm-fable-audio (1).mp3 (file, size: 2870444 bytes)
- sample_data (directory)
