In [1]:
from crewai import Task, Agent, Crew, Process
from langchain.tools import tool
import re
import os
from langchain_groq import ChatGroq
llm = ChatGroq(model='llama3-70b-8192', temperature=1.0, max_tokens=2048)

In [37]:
@tool
def process_script(script):
    """Used to process the script into dictionary format"""
    dict = {}
    dict['text_for_image_generation'] = re.findall(r'<image>(.*?)<image>', script)
    dict['text_for_speech_generation'] = re.findall(r'<narration>.*?<narration>', script)
    return dict

In [38]:
script_agent = Agent(
    role='Senior Content Writer',
    goal='Craft engaging, concise, and informative narrations for YouTube short videos',
    backstory="""As a seasoned content writer, you excel at breaking down complex topics into captivating narratives that educate and entertain audiences. Your expertise lies in writing concise, attention-grabbing scripts for YouTube short videos.""",
    verbose=True,
    llm=llm,
    allow_delegation=False
)

image_descriptive_agent = Agent(
    role='Visual Storyteller',
    goal='Design stunning, contextually relevant visuals for YouTube short videos',
    backstory='With a keen eye for visual storytelling, you create compelling imagery that elevates the narrative and captivates the audience. You will ',
    verbose=True,
    llm=llm,
    allow_delegation=False
)

ScriptSynthesizer = Agent(
    role='Script Processor',
    goal='To process scripts by extracting text for image and speech generation',
    backstory='you are designed to assist in the automated creation of YouTube Shorts by processing scripts and preparing them for further steps in the content creation pipeline.',
    verbose=True,
    llm=llm,
    allow_delegation=False,
    tools=[process_script]
)

In [39]:
story_writing_task = Task(
    description='Write an engaging narration for a YouTube short video on the topic: {topic}',
    expected_output="""A short paragraph suitable for narrating in five seconds also provides immensive experice to audience. Folow the below example for output length and format.

    **Example:**

    **topic:**
    Powerful Kings of History

    **narration:**
    In the pages of history, powerful kings have shaped the destinies of nations.
    From Alexander the Great to Genghis Khan, their conquests have etched unforgettable legacies across civilizations.
    Their leadership continues to inspire awe and fascination to this day.
    """,
    agent=script_agent
)

img_text_task = Task(
    description='Given the narration,visually describe each sentence in the narration which will be used as a prompt for an image generation.',
    expected_output="""Sentences encoded in <narration> and <image> tags. Follow the below example for output format.

    example input:
    In the pages of history, powerful kings have shaped the destinies of nations. From Alexander the Great to Genghis Khan, their conquests have etched unforgettable legacies across civilizations. Their leadership continues to inspire awe and fascination to this day.

    outupt format:
    <narration>In the pages of history, powerful kings have shaped the destinies of nations.<narration>
    <image>An epic portrayal of ancient kings standing triumphantly, clad in regal attire, commanding their kingdoms with strength and wisdom, amidst grandeur and splendor.<image>
    <narration>From Alexander the Great to Genghis Khan, their conquests have etched unforgettable legacies across civilizations.<narration>
    <image>Dramatic portraits of Alexander the Great and Genghis Khan, adorned in battle armor, leading their armies across vast landscapes and leaving a lasting mark on history.<image>
    <narration>Their leadership continues to inspire awe and fascination to this day.<narration>
    <image>A powerful visual of kings seated on thrones, symbols of authority and ambition, evoking admiration and wonder, against a backdrop of their enduring achievements.<image>
    """,
    agent=image_descriptive_agent,
    context=[story_writing_task]
)

process_script_task = Task(
    description="Extract text for image and speech generation from a provided script.",
    expected_output="A dictionary containing lists of texts for image generation and speech generation.",
    agent=ScriptSynthesizer
)

In [40]:
crew = Crew(
    agents=[script_agent, image_descriptive_agent, ScriptSynthesizer],
    tasks=[story_writing_task, img_text_task, process_script_task],
    process = Process.sequential,
    cache = True,
    # memory=True,
    verbose=2
)

result = crew.kickoff(inputs={'topic': 'Indian People'})



[1m[95m [2024-07-02 11:58:17][DEBUG]: == Working Agent: Senior Content Writer[00m
[1m[95m [2024-07-02 11:58:17][INFO]: == Starting Task: Write an engaging narration for a YouTube short video on the topic: Indian People[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI now can give a great answer!

Final Answer:

**topic:**
Indian People

**narration:**
From the snow-capped Himalayas to the sun-kissed beaches of Kerala, India is home to a vibrant tapestry of cultures, traditions, and histories.
With a rich heritage of spices, spirituality, and innovation, Indian people have contributed significantly to the world's diversity.
From Mahatma Gandhi's non-violent revolution to the digital pioneers of Bengaluru, their stories continue to inspire and fascinate us.

(Note: The narration is concise, engaging, and informative, suitable for a 5-second YouTube short video, providing an immersive experience for the audience.)[0m

[1m> Finished chain.[0m
[1m[92m [2024-

In [41]:
print(result)

{'text_for_image_generation': ["A breathtaking aerial shot of the majestic Himalayas, giving way to a seamless transition of a bustling Indian street, showcasing vibrant colors, ornate architecture, and diverse people from different regions, set against the backdrop of a stunning Kerala beach at sunset, symbolizing the country's rich cultural diversity.", "A visually striking mosaic of iconic Indian elements, including intricate spice patterns, sacred geometric shapes, and ancient Indian instruments, blending seamlessly with modern innovations, such as sleek robots, 3D models, and futuristic cityscapes, representing the country's profound impact on global diversity.", "A powerful split-screen visual, with Mahatma Gandhi's iconic Salt March image on one side, juxtaposed with a modern, sleek, and vibrant portrayal of Bengaluru's thriving tech industry, complete with coding screens, futuristic gadgets, and innovators at work, symbolizing the nation's journey from non-violent revolution to

In [42]:
type(result)

str

In [5]:
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips, VideoFileClip
import cv2
import os

def apply_zoom_in_effect(clip, zoom_factor=1.2):
    width, height = clip.size
    duration = clip.duration

    def zoom_in_effect(get_frame, t):
        frame = get_frame(t)
        zoom = 1 + (zoom_factor - 1) * (t / duration)
        new_width, new_height = int(width * zoom), int(height * zoom)
        resized_frame = cv2.resize(frame, (new_width, new_height))
        
        # Calculate the position to crop the frame to the original size
        x_start = (new_width - width) // 2
        y_start = (new_height - height) // 2
        cropped_frame = resized_frame[y_start:y_start + height, x_start:x_start + width]
        
        return cropped_frame

    return clip.fl(zoom_in_effect, apply_to=['mask'])

def create_video_from_images_and_audio(images_dir, speeches_dir, zoom_factor=1.2):
    """Creates video using images and audios with zoom-in effect"""
    images_paths = os.listdir(images_dir)
    audio_paths = os.listdir(speeches_dir)
    print(images_paths, audio_paths)
    clips = []
    
    for i in range(min(len(images_paths), len(audio_paths))):
        # Load the image
        img_clip = ImageClip(os.path.join(images_dir, images_paths[i]))
        
        # Load the audio file
        audioclip = AudioFileClip(os.path.join(speeches_dir, audio_paths[i]))
        
        # Set the duration of the video clip to the duration of the audio file
        videoclip = img_clip.set_duration(audioclip.duration)
        
        # Apply zoom-in effect to the video clip
        zoomed_clip = apply_zoom_in_effect(videoclip, zoom_factor)
        
        # Add audio to the zoomed video clip
        zoomed_clip = zoomed_clip.set_audio(audioclip)
        
        clips.append(zoomed_clip)
    
    # Concatenate all video clips
    final_clip = concatenate_videoclips(clips)
    
    # Write the result to a file
    final_clip.write_videofile("outputs/final_video/final_video.mp4", codec='libx264', fps=24)
    
    return "final_video.mp4"

# Example usage
image_paths = "outputs/images"
audio_paths = "outputs/speeches"

video_path = create_video_from_images_and_audio(image_paths, audio_paths, zoom_factor=1.2)
print(f"Video created at: {video_path}")

['image1.jpg', 'image2.jpg', 'image3.jpg'] ['speech_1.mp3', 'speech_2.mp3', 'speech_3.mp3']
Moviepy - Building video outputs/final_video/final_video.mp4.
MoviePy - Writing audio in final_videoTEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
Moviepy - Writing video outputs/final_video/final_video.mp4



                                                              

Moviepy - Done !
Moviepy - video ready outputs/final_video/final_video.mp4
Video created at: final_video.mp4
