In [None]:
import torch
import cv2

# Define your text-to-video PyTorch model
class TextToVideoModel(torch.nn.Module):
    def __init__(self):
        super(TextToVideoModel, self).__init__()
        # Define your model architecture
        # (This is just a placeholder, replace with your actual model)
        self.text_embedding = torch.nn.Linear(100, 256)
        self.text_to_video = torch.nn.Linear(256, 3*64*64)  # Assuming 64x64 RGB frames

    def forward(self, text_input):
        text_embedding = self.text_embedding(text_input)
        video_frames = self.text_to_video(text_embedding)
        return video_frames.view(-1, 3, 64, 64)

# Load pre-trained PyTorch model
model = TextToVideoModel()
model.load_state_dict(torch.load('pretrained_text_to_video_model.pth'))
model.eval()

def generate_video_with_text(text_input, output_filename='output.mp4'):
    # Convert text input to tensor (assuming it's already preprocessed)
    text_input_tensor = torch.tensor(text_input)
    
    # Generate video frames from text input using pre-trained PyTorch model
    with torch.no_grad():
        video_frames = model(text_input_tensor)
    
    # Process video frames further
    video_frames = (video_frames * 255).byte()  # Convert to uint8
    num_frames = video_frames.shape[0]
    frame_height, frame_width = video_frames.shape[2], video_frames.shape[3]
    
    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_filename, fourcc, 24.0, (frame_width, frame_height))
    
    # Write video frames to video file
    for i in range(num_frames):
        frame = video_frames[i].permute(1, 2, 0).numpy()  # Convert PyTorch tensor to NumPy array
        video_writer.write(frame)
    
    # Release video writer
    video_writer.release()

    return output_filename

# Get input from user
text_input = input("Enter the text you want to generate video from: ")
output_filename = generate_video_with_text(text_input)
print(f"Video generated: {output_filename}")
