<a href="https://colab.research.google.com/github/reshmi56/ML-Projects/blob/main/video_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install streamlit ngrok spacy transformers torch torchvision torchaudio moviepy gtts diffusers
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m100.9 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [15]:
%%writefile app.py

import streamlit as st
import spacy
from diffusers import StableDiffusionPipeline
import torch
from PIL import Image
import numpy as np
from moviepy.editor import ImageSequenceClip, VideoFileClip, AudioFileClip
from gtts import gTTS
import os

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load the Stable Diffusion model
model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id)
pipe = pipe.to("cuda")

# Streamlit App Interface
st.title("Text to Video and Audio Generator")

# User Input
text_input = st.text_area("Enter a description of the scene:",
                          "A cat sits on the roof while the sun sets in the background.")

if st.button("Generate Video and Audio"):

    # Step 3: Text Analysis and Understanding
    def extract_entities_actions(text):
        doc = nlp(text)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        actions = [token.lemma_ for token in doc if token.pos_ == 'VERB']
        return entities, actions

    entities, actions = extract_entities_actions(text_input)
    st.write("Entities:", entities)
    st.write("Actions:", actions)

    # Step 4: Scene Generation
    def generate_image_from_text(description):
        image = pipe(description).images[0]
        new_size=(512,512)
        image=image.resize(new_size)
        return image
    torch.cuda.empty_cache()

    # Generate frames (you can modify this to generate different frames based on actions)
    frames = [generate_image_from_text(text_input) for _ in range(10)]

    # Display the first generated image
    st.image(frames[0], caption="Generated Image", use_column_width=True)

    # Step 5: Animation and Motion
    def create_video_from_frames(frames, fps=24):
        frames = [np.array(frame) for frame in frames]  # Convert PIL images to NumPy arrays
        clip = ImageSequenceClip(frames, fps=fps)
        clip.write_videofile("output_video.mp4", codec="libx264")

    create_video_from_frames(frames)
    st.video("output_video.mp4")

    # Step 6: Audio and Speech Generation
    def generate_speech_from_text(text, filename="output_audio.mp3"):
        tts = gTTS(text)
        tts.save(filename)

    generate_speech_from_text(text_input)
    os.system("output_audio.mp3")
    st.audio("output_audio.mp3")

    # Step 7: Integration and Rendering
    def combine_video_and_audio(video_path, audio_path, output_path="final_output.mp4"):
        video = VideoFileClip(video_path)
        audio = AudioFileClip(audio_path)
        final_clip = video.set_audio(audio)
        final_clip.write_videofile(output_path, codec="libx264")

    combine_video_and_audio("output_video.mp4", "output_audio.mp3")
    st.video("final_output.mp4")


Overwriting app.py


In [16]:
!pip install pyngrok
from pyngrok import ngrok

# Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTHTOKEN = "2l0MDPELw940YwOaFXRY5shaDjK_suzqhPRA7FGEmQmNg8tn"  # Replace with your actual authtoken
ngrok.set_auth_token(NGROK_AUTHTOKEN)

# Kill any previous instances of ngrok
!pkill -f streamlit

# Launch the Streamlit app
!streamlit run app.py &>/dev/null&

# Create an ngrok tunnel to the Streamlit app
public_url = ngrok.connect(8501,"http")
print(public_url)

NgrokTunnel: "https://82f2-34-139-13-133.ngrok-free.app" -> "http://localhost:8501"
