# Task 1 — AI Video Generator


This notebook implements the full pipeline requested in the assignment:
1. Fetch trending news (Google News RSS or search)
2. Extract summary and key points
3. Generate a 30–60s script using OpenAI (or fallback template)
4. Fetch 3-6 images from Unsplash or Pexels
5. Create TTS audio (gTTS) or cloud TTS if available
6. Assemble a video using MoviePy with text overlays and export `out.mp4`


**Important:** Replace API keys in the `.env` cell before running. Respect websites' robots.txt and terms of service. For sites that disallow scraping, use RSS or official APIs.

In [21]:
!pip install --quiet requests beautifulsoup4 moviepy gTTS Pillow openai python-dotenv keybert sentence-transformers yake pafy youtube_dl validators
!apt-get update -qq && apt-get install -qq -y ffmpeg imagemagick

^C


In [12]:
%%writefile .env
OPENAI_API_KEY=
UNSPLASH_KEY=yX2mnqbopnDkIQRElP1sfhnFircFyvA40YGEy3KUkug
PEXELS_KEY=qNV8NI1Gx00j3KMfwgdqbsNVnjiyLxJ7wWPW8lfdCQsNz1gnqQYBUywS
# If posting or using Wordpress, set WP_URL, WP_USER, WP_PASS (used in Task 2 notebook)
WP_URL=https://yourwordpresssite.com
WP_USER=shafiq2201069cs
WP_PASS=Amishkhan@123


# Load env
from dotenv import load_dotenv
import os
load_dotenv('.env')


print('ENV loaded. Remember to edit .env with your keys.')

Overwriting .env


In [4]:
import os, json, time, requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from pathlib import Path

load_dotenv('.env')

Path('task1_outputs').mkdir(parents=True, exist_ok=True)
Path('task1_outputs/images').mkdir(parents=True, exist_ok=True)



In [6]:
from bs4 import BeautifulSoup
import requests

def fetch_google_news_rss(query=None, max_items=5):
    if query:
        url = f"https://news.google.com/rss/search?q={requests.utils.requote_uri(query)}"
    else:
        url = "https://news.google.com/rss"
    r = requests.get(url, timeout=15)
    r.raise_for_status()
    soup = BeautifulSoup(r.content, 'xml')
    items = soup.find_all('item')[:max_items]
    articles = []
    for it in items:
        articles.append({
            'title': it.title.text,
            'link': it.link.text,
            'pubDate': it.pubDate.text,
            'desc': it.description.text
        })
    return articles

arts = fetch_google_news_rss(max_items=5)
for i,a in enumerate(arts):
    print(i+1, a['title'])

1 Republicans in Congress mocked Trump privately, Marjorie Taylor Greene says - The Guardian
2 Thailand launches airstrikes at Cambodia as border tensions reignite - Reuters
3 Democrats Call for Releasing Video of Deadly Boat Strike in the Caribbean - The New York Times
4 Water leak in Louvre damages hundreds of books - BBC
5 Chinese carrier holds air drills as Tokyo, Beijing trade barbs - Reuters


In [8]:
articles = arts
for idx, a in enumerate(articles):
    print(f"[{idx}] {a['title']}")

chosen_idx = 0
article = articles[chosen_idx]
print('\nSelected:', article['title'])

summary = article.get('desc','')
print('\nSummary (from RSS):')
print(summary)

[0] Republicans in Congress mocked Trump privately, Marjorie Taylor Greene says - The Guardian
[1] Thailand launches airstrikes at Cambodia as border tensions reignite - Reuters
[2] Democrats Call for Releasing Video of Deadly Boat Strike in the Caribbean - The New York Times
[3] Water leak in Louvre damages hundreds of books - BBC
[4] Chinese carrier holds air drills as Tokyo, Beijing trade barbs - Reuters

Selected: Republicans in Congress mocked Trump privately, Marjorie Taylor Greene says - The Guardian

Summary (from RSS):
<ol><li><a href="https://news.google.com/rss/articles/CBMipgFBVV95cUxNaThNb1E5YU9LRFBzbG9wNm9rSE5jNkR0UU9RYjVjQ0RaYzJtR0YtLVN2b2ZGQ200MlVhdmRNSG93TlJIWkpQd19JMDlUYVJxd2kxU3hoWHRWRUVFTUpHVkctSkJNWldka0RwbkZybVJlT1drb1ZETHg4OWpqUXVVbTlTY0hUSFpkQjltdWRGRnVDbFpWYVlXdlVDam1HeVphMkw0R1RR?oc=5" target="_blank">Republicans in Congress mocked Trump privately, Marjorie Taylor Greene says</a>&nbsp;&nbsp;<font color="#6f6f6f">The Guardian</font></li><li><a href="https://new

In [14]:
# --- CELL 7: Generate Video Script (With Safe Fallback) ---

import os, json

# Read API key from .env
from openai import OpenAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Automatically detect if key is available
use_openai = bool(OPENAI_API_KEY)

def generate_script_with_openai(headline, summary, length_seconds=45):
    """
    Generates a 30–60 sec video script using OpenAI (only if API key exists).
    """
    client = OpenAI(api_key=OPENAI_API_KEY)

    prompt = f"""
Create a short 30–60 second video script for a news update video.
Return ONLY valid JSON with:
- voice_script (3–6 sentences)
- overlays (3 very short overlay lines)

Headline: {headline}
Summary: {summary}
"""

    # New OpenAI API call format
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300,
        temperature=0.7
    )

    raw_output = response.choices[0].message.content

    # Try to decode JSON properly
    try:
        return json.loads(raw_output)
    except:
        # If model returns text instead of JSON, convert safely
        lines = [l.strip() for l in raw_output.split("\n") if l.strip()]
        return {
            "voice_script": raw_output,
            "overlays": lines[:3] if len(lines) >= 3 else ["News Update", "Breaking Info", "More Details Soon"]
        }


def fallback_generate_script(headline, summary):
    """
    Safe fallback generator if no OpenAI key or API quota exceeded.
    """
    voice = (
        f"{headline}. {summary} "
        f"This is a brief update providing the key highlights. "
        f"Stay tuned for more information as the story develops."
    )

    overlays = [
        headline[:40],        # shortened headline
        "Quick Update",
        "Stay Informed"
    ]

    return {"voice_script": voice, "overlays": overlays}


# --- MAIN EXECUTION ---
try:
    if use_openai:
        print("OpenAI key found — generating script with OpenAI...")
        script_json = generate_script_with_openai(article["title"], summary)
    else:
        print("No OpenAI key found — using fallback generator.")
        script_json = fallback_generate_script(article["title"], summary)

except Exception as e:
    print("OpenAI failed — switching to fallback.", e)
    script_json = fallback_generate_script(article["title"], summary)


# --- SHOW OUTPUT ---
print("\nVoice Script:\n", script_json["voice_script"])
print("\nOverlay Text:", script_json["overlays"])


OpenAI key found — generating script with OpenAI...
OpenAI failed — switching to fallback. Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

Voice Script:
 Republicans in Congress mocked Trump privately, Marjorie Taylor Greene says - The Guardian. <ol><li><a href="https://news.google.com/rss/articles/CBMipgFBVV95cUxNaThNb1E5YU9LRFBzbG9wNm9rSE5jNkR0UU9RYjVjQ0RaYzJtR0YtLVN2b2ZGQ200MlVhdmRNSG93TlJIWkpQd19JMDlUYVJxd2kxU3hoWHRWRUVFTUpHVkctSkJNWldka0RwbkZybVJlT1drb1ZETHg4OWpqUXVVbTlTY0hUSFpkQjltdWRGRnVDbFpWYVlXdlVDam1HeVphMkw0R1RR?oc=5" target="_blank">Republicans in Congress mocked Trump privately, Marjorie Taylor Greene says</a>&nbsp;&nbsp;<font color="#6f6f6f">The Guardian</font></li><li><a href="https://news.google.com/rss/article

In [16]:
import os
UNSPLASH_KEY = os.getenv('UNSPLASH_KEY')
PEXELS_KEY = os.getenv('PEXELS_KEY')

def fetch_unsplash_images(query, count=4, out_dir='task1_outputs/images'):
    if not UNSPLASH_KEY:
        raise ValueError('UNSPLASH_KEY missing in .env')
    url = 'https://api.unsplash.com/search/photos'
    headers = {'Authorization': f'Client-ID {UNSPLASH_KEY}'}
    params = {'query': query, 'per_page': count}
    r = requests.get(url, params=params, headers=headers, timeout=15)
    r.raise_for_status()
    data = r.json()
    img_urls = [it['urls']['regular'] for it in data['results'][:count]]
    files = []
    for i,u in enumerate(img_urls):
        fn = f"{out_dir}/{i+1}.jpg"
        with requests.get(u, stream=True, timeout=30) as rr:
            rr.raise_for_status()
            with open(fn, 'wb') as f:
                for chunk in rr.iter_content(1024):
                    f.write(chunk)
        files.append(fn)
    return files

query = article['title']
try:
    images = fetch_unsplash_images(query, count=4)
    print('Downloaded images:', images)
except Exception as e:
    print('Unsplash fetch failed:', str(e))
    print('If you do not have an API key, upload images manually to task1_outputs/images/')
    images = sorted(list(Path('task1_outputs/images').glob('*.jpg')))
    images = [str(p) for p in images]
    print('Existing images:', images)

Downloaded images: ['task1_outputs/images/1.jpg', 'task1_outputs/images/2.jpg', 'task1_outputs/images/3.jpg', 'task1_outputs/images/4.jpg']


In [18]:
from gtts import gTTS
from pathlib import Path

voice_text = script_json['voice_script']
tts_file = 'task1_outputs/voice_tts.mp3'

try:
    tts = gTTS(voice_text)
    tts.save(tts_file)
    print('Saved TTS to', tts_file)
except Exception as e:
    print('gTTS failed:', e)
    print('Check internet connection or use offline TTS')

Saved TTS to task1_outputs/voice_tts.mp3


In [24]:
!apt-get update
!apt-get install -y imagemagick

import os

policy_file = "/etc/ImageMagick-6/policy.xml"
if os.path.exists(policy_file):
    with open(policy_file, "r") as f:
        policy = f.read()

    policy = policy.replace("<policy domain=\"path\" rights=\"none\" pattern=\"@*\" />", "")
    policy = policy.replace("<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\" />", "")

    with open(policy_file, "w") as f:
        f.write(policy)

print("ImageMagick installed + policy fixed!")

0% [Working]            Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
0% [Connecting to archive.ubuntu.com (91.189.92.24)] [Connected to cloud.r-proj                                                                               Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Connected to cloud.r-project.org (65.9.86.28)] [Conne                                                                               Hit:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
0% [Waiting for headers] [Connecting to r2u.stat.illinois.edu (192.17.190.167)]                                                                               Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
0% [Waiting for headers] [Connecting to r2u.stat.illinois.edu (192.17.190.167)]                                                                               Hit:5 https://cli.github.com/packages stable InRelease
0% [Waiting for

In [25]:
from moviepy.config import change_settings
change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})

In [36]:
# --- CELL 10 (fixed): Render text overlays with Pillow (no ImageMagick) ---
from moviepy.editor import ImageClip, concatenate_videoclips, CompositeVideoClip, AudioFileClip
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import os

image_files = images
seconds = 45
per_img = max(3, int(seconds / max(1, len(image_files))))

# Build image clips
clips = []
for img in image_files:
    clip = ImageClip(img).set_duration(per_img).resize(width=720)
    clips.append(clip)

video = concatenate_videoclips(clips, method='compose')

# Prepare overlays (render with Pillow)
overlays = script_json.get("overlays", [])
text_clips = []
per_overlay = max(2, int(seconds / max(1, len(overlays)))) if overlays else 2
start = 0

# Video dimensions
w, h = video.size  # width, height

# Choose a font. Colab has DejaVu fonts; fall back to default if not found.
def get_font(size=36):
    possible = [
        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
    ]
    for p in possible:
        if os.path.exists(p):
            return ImageFont.truetype(p, size=size)
    return ImageFont.load_default()

font = get_font(size=36)

def text_dimensions(draw, text, font):
    """
    Return (width, height) for given text using draw.textbbox (works across Pillow versions).
    """
    bbox = draw.textbbox((0,0), text, font=font)
    width = bbox[2] - bbox[0]
    height = bbox[3] - bbox[1]
    return width, height

def make_text_image(text, width, height=120, font=font, padding=20, bg_alpha=150):
    """
    Create a transparent PNG (numpy array) with centered text at given width.
    bg_alpha: background alpha (0 transparent .. 255 opaque)
    """
    # Create transparent background
    img = Image.new("RGBA", (width, height), (0, 0, 0, 0))
    draw = ImageDraw.Draw(img)

    # Background rectangle with semi-transparent black
    rect_h = height
    draw.rectangle(((0, 0), (width, rect_h)), fill=(0, 0, 0, bg_alpha))

    # Wrap text if too long
    max_w = width - 2*padding
    lines = []
    words = text.split()
    line = ""
    for wword in words:
        test = (line + " " + wword).strip()
        tw, th = text_dimensions(draw, test, font)
        if tw <= max_w:
            line = test
        else:
            if line:
                lines.append(line)
            line = wword
    if line:
        lines.append(line)

    # compute total text height
    line_heights = [text_dimensions(draw, l, font)[1] for l in lines]
    total_h = sum(line_heights)
    # start y to center vertically in the rect
    y = (rect_h - total_h) // 2
    for i, l in enumerate(lines):
        tw, th = text_dimensions(draw, l, font)
        x = (width - tw) // 2
        draw.text((x, y), l, font=font, fill=(255, 255, 255, 255))
        y += line_heights[i]

    return np.array(img)

# Create ImageClips from text images and position them
for ov in overlays:
    txt_img = make_text_image(ov, width=w, height=100, font=font, padding=20, bg_alpha=140)
    txt_clip = ImageClip(txt_img).set_duration(per_overlay).set_start(start)
    # position at bottom center with a small margin
    txt_clip = txt_clip.set_position(("center", h - 100 - 20))
    text_clips.append(txt_clip)
    start += per_overlay

# If there were no overlays, you can optionally add one generic overlay
if not text_clips:
    default_img = make_text_image("Quick Update", width=w, height=100, font=font, bg_alpha=140)
    dc = ImageClip(default_img).set_duration(seconds).set_position(("center", h - 100 - 20))
    text_clips.append(dc)

# Attach audio
audio = AudioFileClip(tts_file)
final_audio = audio.set_duration(video.duration)

final = CompositeVideoClip([video, *text_clips]).set_audio(final_audio)

out_path = "task1_outputs/out.mp4"
final.write_videofile(out_path, fps=24, bitrate="4000k")

print("Video saved to", out_path)

Moviepy - Building video task1_outputs/out.mp4.
MoviePy - Writing audio in outTEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video task1_outputs/out.mp4





Moviepy - Done !
Moviepy - video ready task1_outputs/out.mp4
Video saved to task1_outputs/out.mp4


In [30]:
!apt-get update
!apt-get install -y imagemagick

import os

policy_file = "/etc/ImageMagick-6/policy.xml"
if os.path.exists(policy_file):
    with open(policy_file, "r") as f:
        policy = f.read()

    policy = policy.replace("<policy domain=\"path\" rights=\"none\" pattern=\"@*\" />", "")
    policy = policy.replace("<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\" />", "")

    with open(policy_file, "w") as f:
        f.write(policy)

print("ImageMagick installed + policy fixed!")

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Connecting to security.ubuntu.com (91.189.91.82)] [Connected to cloud.r-pro                                                                               Hit:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
0% [Waiting for headers] [Connecting to security.ubuntu.com (91.189.91.82)] [Wa                                                                               Hit:3 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
0% [Connecting to security.ubuntu.com (91.189.91.82)] [Waiting for headers] [Co                                                                               Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
0% [Connecting to security.ubuntu.com (91.189.91.82)] [Connecting to r2u.stat.i                                                                               Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu22

In [37]:
from google.colab import files
files.download('task1_outputs/out.mp4')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
report = f"""
# Task 1 — Report


Input article: {article['link']}
Generated script:
{voice_text}
Images used: {images}
Output video: task1_outputs/out.mp4


Steps followed: Google News RSS -> Script generation -> Unsplash images -> gTTS -> MoviePy assembly


Limitations: TTS quality; image licensing; scraping restrictions.
"""
with open('task1_outputs/Task1_Report.md','w') as f:
    f.write(report)

files.download('task1_outputs/Task1_Report.md')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>