This will be focused on generating the voice locally.
- the script will be passed on (or queried in the vector db from the latest entry )
- then call the TTS api to actually convert it into an mp3

In [31]:
from TTS.api import TTS
import soundfile as sf
import librosa

In [32]:
from langchain_community.llms import Ollama # will be used for prompting
from langchain.vectorstores import Chroma # will be used for vectordb store
from langchain_ollama import OllamaEmbeddings
from langchain.schema import Document # will be used to store text in vector store 
import os

In [33]:
llm = Ollama(model="llama3")
embedding = OllamaEmbeddings(model="llama3")
PERSIST_DIR = "vectordb"
db = Chroma(persist_directory=PERSIST_DIR, embedding_function=embedding)

In [34]:
from langchain.schema import Document  # or adjust import to your LangChain version

def get_most_recent_unprocessed_idea(db):
    results = db.get(include=["documents", "metadatas"])

    unprocessed = [
        (meta["timestamp"], Document(page_content=doc, metadata=meta))
        for meta, doc in zip(results["metadatas"], results["documents"])
        if not meta.get("processed", False)
    ]

    if not unprocessed:
        print("No unprocessed ideas found.")
        return None

    unprocessed_sorted = sorted(unprocessed, key=lambda x: x[0], reverse=True)
    _, document = unprocessed_sorted[0]

    print(f"Most recent unprocessed idea ID: {document.metadata.get('id')}")
    return document


In [35]:
most_recent_idea = get_most_recent_unprocessed_idea(db)
if most_recent_idea:
    print(most_recent_idea.page_content) 


Most recent unprocessed idea ID: 45a68668-ca97-4a70-b313-8d2cbb0c3d0d
Here's a brand new script idea for an AWS-related Instagram video:

**Title:** "Unlocking Serverless Power with AWS Lambda"

**Hook:** "Ready to level up your app development game? Let's talk about serverless computing and how AWS Lambda can revolutionize the way you build!"

**What it is:** "AWS Lambda is a serverless compute service that runs your code in response to events, without you having to manage any servers. Think of it like a superhero sidekick – it does all the heavy lifting so you can focus on writing amazing apps!"

**When to use it:** "Use AWS Lambda when you need to process data streams, handle API requests, or integrate with other services like Alexa or Google Assistant. It's perfect for real-time analytics, IoT, or machine learning projects where scalability and reliability are key."

**How to use it (high-level):** "To get started, simply define your function in code, upload it to AWS Lambda, and c

In [36]:
tts = TTS(model_name="tts_models/en/ljspeech/glow-tts", progress_bar=False, gpu=False)

# Generate audio
tts.tts_to_file(
    text=most_recent_idea.page_content,
    file_path="output_raw.wav"
)

# Speed up to 1.5x
y, sr = librosa.load("output_raw.wav", sr=None)
y_fast = librosa.effects.time_stretch(y, rate=1.5)
sf.write("output.wav", y_fast, sr)

 > tts_models/en/ljspeech/glow-tts is already downloaded.
 > vocoder_models/en/ljspeech/multiband-melgan is already downloaded.
 > Using model: glow_tts
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:0
 | > fft_size:1024
 | > power:1.1
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:50.0
 | > mel_fmax:7600.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 > Vocoder Model: multiband_melgan
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resam