Setting up the environment

In [2]:
import sys
sys.path.insert(0, '..')  # Add parent directory (WEEK 2) to Python path

import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wavfile
from openai import OpenAI
import io
import os
import json
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Audio, display, Markdown
import time
import librosa
import librosa.display
import soundfile as sf
from dotenv import load_dotenv
import gradio as gr
from src.data_processor import process_input
from src.llm_processor import generate_script
from src.tts_generator import generate_audio

# Load environment variables (API Keys)
from dotenv import load_dotenv

load_dotenv("../.env")

# Set up OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Create directories for storing recordings
os.makedirs('recordings', exist_ok=True)
os.makedirs('results', exist_ok=True)

print("✅ Setup complete!")

✅ Setup complete!


Setting Up Open AI API key access

In [3]:
import os
from dotenv import load_dotenv

# The "../" tells Python to look one folder up (in the WEEK 1 root)
load_dotenv("../.env")

# Retrieve the key
api_key = os.getenv("OPENAI_API_KEY")

if api_key:
    print("✅ API Key successfully loaded!")
else:
    print("❌ API Key not found. Check your .env file location.")

✅ API Key successfully loaded!


Step 3: Text Pre-processing & Chunking

Ensure the text fits within API character limits.

In [4]:
import re
import os

# Load text from Text input folder
rec_dir = 'Text input'
txts = [f for f in os.listdir(rec_dir) if f.lower().endswith('.txt')]
if not txts:
    raise FileNotFoundError(f"No .txt files found in {rec_dir!r}")

fname = os.path.join(rec_dir, txts[0])
with open(fname, 'r', encoding='utf-8') as f:
    full_text = f.read()

print(f"✅ Loaded {len(full_text)} characters from {fname}")

def chunk_text_by_sentences(text, max_chars=4000):
    """
    Splits text into chunks of max_chars, ensuring we don't 
    break sentences in the middle.
    """
    # 1. Basic cleaning: remove extra whitespace/newlines
    text = re.sub(r'\s+', ' ', text).strip()
    
    # 2. Split text into sentences using regex
    sentences = re.split(r'(?<=[.!?]) +', text)
    
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) + 1 <= max_chars:
            current_chunk += (sentence + " ")
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "
    
    if current_chunk:
        chunks.append(current_chunk.strip())
        
    return chunks

text_chunks = chunk_text_by_sentences(full_text)

print(f"Processed {len(text_chunks)} chunks for TTS.")
for i, chunk in enumerate(text_chunks):
    print(f"Chunk {i+1} length: {len(chunk)} characters")

✅ Loaded 8040 characters from Text input\supplements_scorecard_notes.txt
Processed 3 chunks for TTS.
Chunk 1 length: 3949 characters
Chunk 2 length: 3629 characters
Chunk 3 length: 429 characters
