In [6]:
from crewai import Task, Agent, Crew, Process

In [7]:
import re

In [8]:
from langchain_google_genai import ChatGoogleGenerativeAI, HarmBlockThreshold, HarmCategory
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key='AIzaSyBKo19PtvV9oSMRr4R1wJUueyWOL4n5e5c')

In [31]:
script_agent = Agent(
    role='Senior content Writer',
    goal='To write shot and sweet narration for YouTube short videos',
    backstory="""You are a content writer for a YouTube creator.
    With a flair for simplifying complex topics, you craft engaging narratives that captivate and educate audience.
    You are responsible for writing engaging narrations for the YouTube short videos.
    Your narrations are concise and engaging and always grabs audience's attention.""",
    verbose=True,
    llm=llm,
    allow_delegation = False
)

image_descriptive_agent = Agent(
    role='Visuals writer',
    goal='To create stunning visuals for the YouTube short videos',
    backstory='You are an expert in visual storytelling and creating compelling imagery.',
    verbose=True,
    llm=llm,
    allow_delegation = False
)

img_generator_agent = Agent(
    role='Media Content Creator',
    goal='To generate images for YouTube short videos based on the provided narration',
    backstory='You are an expert in creating multimedia content.',
    verbose=True,
    llm=llm,
    allow_delegation=False,
    tools=[image_generator]
)


story_writing_task = Task(
    description='Write an engaging narration for a YouTube short video on the topic: {topic}',
    expected_output="""A short paragraph suitable for narrating in five seconds also provides immensive experice to audience. Folow the below example for output length and format.

    **Example:**

    **topic:**
    Powerful Kings of History

    **narration:**
    In the pages of history, powerful kings have shaped the destinies of nations.
    From Alexander the Great to Genghis Khan, their conquests have etched unforgettable legacies across civilizations.
    Their leadership continues to inspire awe and fascination to this day.
    """,
    agent=script_agent
)

img_text_task = Task(
    description='Given the narration, write text for each sentence in the narration that is  used as a prompt for an image generation.',
    expected_output="""Follow the below example for output format.

    **Example:**

    **narration:**
    In the pages of history, powerful kings have shaped the destinies of nations. From Alexander the Great to Genghis Khan, their conquests have etched unforgettable legacies across civilizations. Their leadership continues to inspire awe and fascination to this day.

    **text descriptions:**
    <narration>In the pages of history, powerful kings have shaped the destinies of nations.<narration>
    <image>An epic portrayal of ancient kings standing triumphantly, clad in regal attire, commanding their kingdoms with strength and wisdom, amidst grandeur and splendor.<image>
    <narration>From Alexander the Great to Genghis Khan, their conquests have etched unforgettable legacies across civilizations.<narration>
    <image>Dramatic portraits of Alexander the Great and Genghis Khan, adorned in battle armor, leading their armies across vast landscapes and leaving a lasting mark on history.<image>
    <narration>Their leadership continues to inspire awe and fascination to this day.<narration>
    <image>A powerful visual of kings seated on thrones, symbols of authority and ambition, evoking admiration and wonder, against a backdrop of their enduring achievements.<image>
    """,
    agent=image_descriptive_agent,
    context=[story_writing_task]
)

img_generation_task = Task(
    description='Pass the text enclosed in <image> tag into the used including the tags',
    expected_output="""path of the folder where images are saved""",
    context = [img_text_task],
    agent=img_generator_agent
)

In [10]:
from langchain.tools import tool
@tool
def text_extrator(narration):
    """Extracts text used for image generation and speech generation from given narration"""
    text_for_image_generation = re.findall(r'<image>(.*?)<image>', narration)
    text_for_speech_generation = re.findall(r'<narration>(.*?)<narration>', narration)
    return text_for_image_generation, text_for_speech_generation

In [1]:
from diffusers import DiffusionPipeline, StableDiffusionXLPipeline, DPMSolverSinglestepScheduler
import bitsandbytes as bnb
import torch.nn as nn
import torch

pipe = StableDiffusionXLPipeline.from_pretrained("sd-community/sdxl-flash", torch_dtype=torch.float16).to('cuda')
pipe.scheduler = DPMSolverSinglestepScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")

def quantize_model_to_4bit(model):
    replacements = []

    # Collect layers to be replaced
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            replacements.append((name, module))

    # Replace layers
    for name, module in replacements:
        # Split the name to navigate to the parent module
        *path, last = name.split('.')
        parent = model
        for part in path:
            parent = getattr(parent, part)

        # Create and assign the quantized layer
        quantized_layer = bnb.nn.Linear4bit(module.in_features, module.out_features, bias=module.bias is not None)
        quantized_layer.weight.data = module.weight.data
        if module.bias is not None:
            quantized_layer.bias.data = module.bias.data
        setattr(parent, last, quantized_layer)

    return model

pipe.unet = quantize_model_to_4bit(pipe.unet)
pipe.enable_model_cpu_offload()

  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [4]:
import pyttsx3
import os

def generate_speech(text, lang='en', speed=170, voice='default', num=0):
    """
    Generates speech for given script.
    """
    engine = pyttsx3.init()
    
    # Set language and voice
    voices = engine.getProperty('voices')
    if voice == 'default':
        voice_id = voices[0].id
    else:
        # Try to find the voice with the given name
        voice_id = None
        for v in voices:
            if voice in v.name:
                voice_id = v.id
                break
        if not voice_id:
            raise ValueError(f"Voice '{voice}' not found.")
    
    engine.setProperty('voice', voice_id)
    engine.setProperty('rate', speed)
    print(os.path.join(os.path.dirname(os.path.abspath(__file__)), f'outputs/speech/speech_{num}.mp3'))
    engine.save_to_file(text, os.path.join(os.path.dirname(os.path.abspath(__file__)), f'outputs/speech/speech_{num}.mp3'))
    engine.runAndWait()

In [15]:
# @tool
def image_generator(narration, images_dir='./outputs/images.jpg'):
    """Generates images for the given narration.
    Saves it to images_dir and return path"""
    image = pipe(narration, num_inference_steps=6, guidance_scale=2, width=720, height=1280, verbose=0).images[0]
    image.save('outputs/image.png')#os.path.join(images_dir, f'image.jpg'))
    return f'image generated for {narration} and saved to directory {images_dir}'

# @tool
def speech_generator(text, speech_dir='./outputs/audio'):
    """Generates speech for given text"""
    generate_speech(text, speech_dir, num=0)
    return f'speech generated for {text} and saved to directory {speech_dir}'

In [32]:
crew = Crew(
    agents=[script_agent, image_descriptive_agent, img_generator_agent],
    tasks=[story_writing_task, img_text_task, img_generation_task],
    process = Process.sequential,
    # cache = True,
    verbose=2
)



In [None]:
result = crew.kickoff(inputs={'topic': 'Democracy in India'})

In [5]:
import os

In [16]:
image_generator('india')

  0%|          | 0/6 [00:00<?, ?it/s]

FileNotFoundError: [Errno 2] No such file or directory: 'outputs/image.png'

In [50]:
os.path.exists('C:\\Users\\prudh\\Desktop\\Python\\Generative AI\\GenerativeAI-Projects\\10sec Shorts\\outpus\\images')

True

In [30]:
import re
re.findall(r'<image>(.*?)<image>', result)

[]

In [31]:
re.findall(r'<narration>(.*?)<narration>', result)

[]

In [32]:
re.findall(r'<speech>(.*?)</speech>', result)

[]