In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm
import json
from pydub import AudioSegment
from utils.text_to_speech import TextToSpeech, VOICES

In [None]:
# LESSON = 'lesson_1_basics'
# LESSON = 'lesson_2_basic_phrases'
# LESSON = 'lesson_3_transportation_nouns'
# LESSON = 'lesson_4_transportation_phrases'
# LESSON = 'lesson_5_money'
# LESSON = 'lesson_6_hotel'
# LESSON = 'lesson_7_important_words'
# LESSON = 'lesson_8_directions'
# LESSON = 'lesson_9_numbers'
# LESSON = 'lesson_10_more_numbers'
# LESSON = 'lesson_11_big_numbers'
# LESSON = 'lesson_12_very_big_numbers'
# LESSON = 'lesson_13_at_the_restaurant'
# LESSON = 'lesson_14_shopping'
# LESSON = 'lesson_15_time_general'
# LESSON = 'lesson_16_time_of_day'
# LESSON = 'lesson_17_questions'
# LESSON = 'lesson_18_shops'
# LESSON = 'lesson_19_nouns'
LESSON = 'lesson_20_adjectives_and_adverbs'
print(LESSON)

SILENCE_RATE = 2

In [None]:
# first create a dataframe for the text
df = pd.read_csv(f"language_notes/spotify_lessons/{LESSON}/transcript.csv")

In [None]:
len(df), df.columns

In [None]:
tts = TextToSpeech()

In [None]:
json_obj = []
dataframe = pd.DataFrame()
for i, row in tqdm(df.iterrows(), total=len(df)):
    obj = {}
    for lang in 'english', 'italian':
        obj[lang] = {
            "text": row[lang],
        }
        try:
            synth_obj = tts.synthesize(
                text=row[lang],
                voice_name=VOICES[lang]['male'],
                speaking_rate=0.75 if lang == 'italian' else 1.0,
                pitch=0,
                verbose=False,
            )
        except Exception as e:
            print(row)
            print(e)
            raise e
        obj[lang]['hash'] = synth_obj['hash']
        obj[lang]['audio_file'] = synth_obj['audio_file']
        obj[lang]['voice_name'] = synth_obj['voice_name']
        obj[lang]['speaking_rate'] = synth_obj['speaking_rate']
        obj[lang]['pitch'] = synth_obj['pitch']

    json_obj.append(obj)

In [None]:
json.dump(
    json_obj,
    open(f"language_notes/spotify_lessons/{LESSON}/data.json", "w"),
    indent=2,
    ensure_ascii=False,
)

In [None]:
data = json.load(open(f'language_notes/spotify_lessons/{LESSON}/data.json'))

In [None]:
def get_duration(path):
    try:
        audio = AudioSegment.from_file(path)  # Replace with your file path
        duration_seconds = audio.duration_seconds
        return duration_seconds
    except Exception as e:
        print(f"Error reading audio file: {e}")
        return None

In [None]:
for sample_i, sample_obj in tqdm(enumerate(data), total=len(data)):
    for lang in 'english', 'italian':
        path = sample_obj[lang]['audio_file']
        assert(os.path.exists(path))
        duration = get_duration(path) 
        data[sample_i][lang]['duration_ms'] = int(duration * 1000)

In [None]:
def combine_audio_files(input_paths, output_path):
    combined_audio = None
    for path in input_paths:
        if combined_audio is None:
            combined_audio = AudioSegment.from_file(path)
        else:
            combined_audio += AudioSegment.from_file(path)
    combined_audio.export(output_path, format="mp3")

In [None]:
for sample_i, sample_obj in tqdm(enumerate(data), total=len(data)):
    english_path = sample_obj['english']['audio_file']
    italian_path = sample_obj['italian']['audio_file']
    italian_duration = sample_obj['italian']['duration_ms']
    silence_duration = int(((italian_duration * SILENCE_RATE) // 100) * 100)
    silence_path = f'data/audio/silence/{silence_duration}ms.mp3'
    assert os.path.exists(silence_path), f"Silence file {silence_path} does not exist."
    combined_audio_path = f'language_notes/spotify_lessons/{LESSON}/audio/{sample_i}.mp3'
    os.makedirs(os.path.dirname(combined_audio_path), exist_ok=True)
    audio_files = [english_path, silence_path, italian_path, silence_path, italian_path, silence_path]
    if not os.path.exists(combined_audio_path):
        combine_audio_files(audio_files, combined_audio_path)
    else:
        print(f"Skipping {combined_audio_path} as it already exists.")
    data[sample_i]['combined_audio_file'] = combined_audio_path

In [None]:
json.dump(data, open(f'language_notes/spotify_lessons/{LESSON}/data.json', 'w'), indent=2, ensure_ascii=False)

In [None]:
REPO_DIR = '/Users/stevie/repos/language_app'
assert(os.path.exists(REPO_DIR))

DEST_DIR = os.path.join('/Users/stevie/repos/language-app-frontend/assets/lessons', LESSON)
if not os.path.exists(DEST_DIR):
    os.mkdir(DEST_DIR)

In [None]:
json_path = os.path.join(REPO_DIR, f'language_notes/spotify_lessons/{LESSON}/data.json')
data = json.load(open(json_path))

In [None]:
path_rel = data[0]['combined_audio_file']
assert(os.path.exists(os.path.join(REPO_DIR, path_rel)))
# print(os.path.join(DEST_DIR, path_rel))
print(path_rel)

In [None]:
for sample_i, sample_obj in tqdm(enumerate(data), total=len(data)):
    # combined audio file
    path = os.path.join(REPO_DIR, sample_obj['combined_audio_file'])
    assert(os.path.exists(path))
    rel_new_path = f'./audio/combined/{os.path.basename(path)}'
    new_path = os.path.join(DEST_DIR, rel_new_path)
    os.makedirs(os.path.dirname(new_path), exist_ok=True)
    if not os.path.exists(new_path):
        shutil.copyfile(path, new_path)
    path_str = f'require("{rel_new_path}"):::'
    data[sample_i]['combined_audio_file'] = path_str

    # individual audio files
    for language in 'english', 'italian':
        path = os.path.join(REPO_DIR, sample_obj[language]['audio_file'])
        assert(os.path.exists(path))
        rel_new_path = f'./audio/individual/{os.path.basename(path)}'
        new_path = os.path.join(DEST_DIR, rel_new_path)
        os.makedirs(os.path.dirname(new_path), exist_ok=True)
        if not os.path.exists(new_path):
            shutil.copyfile(path, new_path)
        path_str = f'require("{rel_new_path}"):::'
        data[sample_i][language]['audio_file'] = path_str

In [None]:
js_path = os.path.join(REPO_DIR, f'language_notes/spotify_lessons/{LESSON}/data.js')
json.dump(data, open(js_path, 'w'), indent=2, ensure_ascii=False)
text = open(js_path).read()
text = text.replace('\\"', '"')
text = f"samples = {text}"
text = text.replace('"require', 'require').replace(':::"', '')
text += ';\nexport default samples;'
open(js_path, 'w').write(text)

In [None]:
old_js_path = os.path.join(REPO_DIR, f'language_notes/spotify_lessons/{LESSON}/data.js')
new_js_path = os.path.join(DEST_DIR, 'data.js')
shutil.copyfile(old_js_path, new_js_path)

old_json_path = os.path.join(REPO_DIR, f'language_notes/spotify_lessons/{LESSON}/data.json')
new_json_path = os.path.join(DEST_DIR, 'data.json')
shutil.copyfile(old_json_path, new_json_path)

old_csv_path = os.path.join(REPO_DIR, f'language_notes/spotify_lessons/{LESSON}/transcript.csv')
new_csv_path = os.path.join(DEST_DIR, 'transcript.csv')
shutil.copyfile(old_csv_path, new_csv_path)