In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.agents import initialize_agent, load_tools, AgentType
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL

In [10]:
from dotenv import load_dotenv, find_dotenv
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)

True

In [46]:
llm = ChatGoogleGenerativeAI(model='gemini-1.5-pro')

In [25]:
agent = create_python_agent(
     llm = llm,
     tool = PythonREPLTool()
)

In [30]:
agent.run('code for prime numbers')

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 8.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 16.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


'```python\ndef is_prime(n):\n  """\n  Checks if n is a prime number (assuming n > 1).\n  """\n  for i in range(2, int(n**0.5) + 1):\n    if n % i == 0:\n      return False\n  return True\n\nprimes = []\nfor i in range(2, 100):\n  if is_prime(i):\n    primes.append(i)\n\nprint(primes)\n``` \n```\n[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n```'

In [37]:
class ScriptAgent:
    def generate_script(self, prompt):
        response = llm.invoke(prompt)
        return response.content

In [38]:
s = ScriptAgent()

In [40]:
a = s.generate_script('write a script for youtube short for the title drink water regularly')

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


In [17]:
from langchain.agents import tool

### script generation agent

In [49]:
@tool
def generate_script(prompt:str)->str:
        """Returns a script"""
        response = llm.invoke(prompt)
        return response.content

In [55]:
agent = initialize_agent(
    tools=[generate_script],
    llm = llm
)

In [None]:
import langchain
langchain.debug = True
agent.run('write a script for youtube short for the title drink water regularly')
langchain.debug = False

### Text-to-Imgae agent

In [64]:
from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")

# # if using torch < 2.0
# # pipe.enable_xformers_memory_efficient_attention()

# # pipe.to("cuda")
pipe.enable_model_cpu_offload()

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [65]:
import bitsandbytes as bnb
import torch.nn as nn

def quantize_model_to_4bit(model):
    replacements = []

    # Collect layers to be replaced
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            replacements.append((name, module))

    # Replace layers
    for name, module in replacements:
        # Split the name to navigate to the parent module
        *path, last = name.split('.')
        parent = model
        for part in path:
            parent = getattr(parent, part)

        # Create and assign the quantized layer
        quantized_layer = bnb.nn.Linear4bit(module.in_features, module.out_features, bias=module.bias is not None)
        quantized_layer.weight.data = module.weight.data
        if module.bias is not None:
            quantized_layer.bias.data = module.bias.data
        setattr(parent, last, quantized_layer)

    return model

# Quantize the UNet part of the pipeline
pipe.unet = quantize_model_to_4bit(pipe.unet)

In [66]:
prompt = "a boy wait in a bustand"
output = pipe(prompt)
image = output.images[0]
image.show()

  0%|          | 0/50 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [33]:
@tool
def generate_image(prompt):
    """generates image"""
    output = pipe(prompt)
    image = output.images[0]
    image.show()
    image.save('outputs/images/generated_img.jpg')
    return image

In [34]:
image_agent = initialize_agent(
    tools = [generate_image],
    llm = llm
)

In [None]:
import langchain
langchain.debug = True
image = image_agent.run('a boy dreaming his future', )
langchain.debug = False

### Audio generation 

In [60]:
from gtts import gTTS

@tool
def generate_audio(script):
    """Generates speech for given script"""
    filename="output.mp3"
    tts = gTTS(text=script, lang='en')
    tts.save(filename)
    return filename

# Example usage
script = "This is a test script for generating audio using Google Text-to-Speech."
audio_file = generate_audio(script)
print(f"Generated audio: {audio_file}")


In [58]:
generate_audio('a boy dreaming his future')

[32;1m[1;3m[tool/start][0m [1m[tool:generate_audio] Entering Tool run with input:
[0m"a boy dreaming his future"
[36;1m[1;3m[tool/end][0m [1m[tool:generate_audio] [806ms] Exiting Tool run with output:
[0m"output.mp3"


'output.mp3'

In [None]:
from gtts import gTTS

def generate_speech(text, speed=1.0, lang='en', filename='output.mp3', use_cache=True, voice=None):
    """
    Generates speech for given script.

    Parameters:
    - text (str): The text to be converted to speech.
    - speed (float): Speed of speech (default: 1.0, normal speed).
    - lang (str): Language code (default: 'en' for English).
    - filename (str): Output file name (default: 'output.mp3').
    - use_cache (bool): Whether to use cached results (default: True).
    - voice (str or None): Optional voice selection (default: None, uses default voice).

    Returns:
    - str: Filename of the saved speech file.
    """
    tts = gTTS(text=text, lang=lang, slow=False if speed > 1.0 else True)
    tts.speed = speed
    
    if voice:
        tts.voice = voice

    tts.save(filename)
    return filename

In [None]:
# Generate speech with custom speed and language
text = "Hello, how are you today?"
filename = generate_speech(text, speed=1.5, lang='en', filename='custom_speed.mp3')

# Generate speech with a specific voice
text = "Bonjour, comment ça va aujourd'hui?"
filename = generate_speech(text, speed=1.0, lang='fr', filename='french_voice.mp3', voice='fr')

print(f"Speech generated and saved as: {filename}")

In [None]:
import pyttsx3

def generate_speech(text, lang='en', speed=200, voice='default', filename='output.mp3'):
    """
    Generates speech for given script.

    Parameters:
    - text (str): The text to be converted to speech.
    - lang (str): Language code (default: 'en' for English).
    - speed (int): Speed of speech (default: 200 words per minute).
    - voice (str): Voice identifier (default: 'default').
    - filename (str): Output file name (default: 'output.mp3').

    Returns:
    - str: Filename of the saved speech file.
    """
    engine = pyttsx3.init()
    
    # Set language and voice
    voices = engine.getProperty('voices')
    if voice == 'default':
        voice_id = voices[0].id
    else:
        # Try to find the voice with the given name
        voice_id = None
        for v in voices:
            if voice in v.name:
                voice_id = v.id
                break
        if not voice_id:
            raise ValueError(f"Voice '{voice}' not found.")
    
    engine.setProperty('voice', voice_id)
    engine.setProperty('rate', speed)

    engine.save_to_file(text, filename)
    engine.runAndWait()
    
    return filename

# Example usage
text = "Hello, how are you today?"
filename = generate_speech(text, lang='en', speed=150, voice='default', filename='output.mp3')
print(f"Speech generated and saved as: {filename}")

In [None]:
engine = pyttsx3.init()
voices = engine.getProperty('voices')
for voice in voices:
   engine.setProperty('voice', voice.id)
   engine.say('The quick brown fox jumped over the lazy dog.')
engine.runAndWait()

In [None]:
import pyttsx3

def list_voices():
    engine = pyttsx3.init()
    voices = engine.getProperty('voices')
    for voice in voices:
        print(f"ID: {voice.id}")
        print(f"Name: {voice.name}")
        print(f"Languages: {voice.languages}")
        print(f"Gender: {voice.gender}")
        print(f"Age: {voice.age}")
        print("-----------")

# List available voices
list_voices()

In [None]:
from gtts import gTTS

def generate_speech(text, lang='en', slow=False, filename='output.mp3'):
    """
    Generates speech for the given script.

    Parameters:
    - text (str): The text to be converted to speech.
    - lang (str): Language code (default: 'en' for English).
    - slow (bool): Whether to use slow speed (default: False).
    - filename (str): Output file name (default: 'output.mp3').

    Returns:
    - str: Filename of the saved speech file.
    """
    tts = gTTS(text=text, lang=lang, slow=slow)
    tts.save(filename)
    return filename

# Example usage with different accents
text = "Hello, how are you today?"

# British English
filename_uk = generate_speech(text, lang='en-uk', filename='output_uk.mp3')
print(f"Speech generated and saved as: {filename_uk}")

# American English
filename_us = generate_speech(text, lang='en-us', filename='output_us.mp3')
print(f"Speech generated and saved as: {filename_us}")

# Australian English
filename_au = generate_speech(text, lang='en-au', filename='output_au.mp3')
print(f"Speech generated and saved as: {filename_au}")

# Indian English
filename_in = generate_speech(text, lang='en-in', filename='output_in.mp3')
print(f"Speech generated and saved as: {filename_in}")
