# Capture & Automate Conversational Search Videos

1. Run `google-chrome` at least once from Linux Shell to configure Chrome.
2. From Linux Chrome, log into a `google.com` and `bing.com` with desired user account.
3. Run this Notebook. Videos at `%USERPROFILE%\repos\intro\videos` (copy/paste into File Explorer)

In [None]:
#| default_exp chatshot
#| export

# Set your keyword search, query or prompt:
prompt = "Write a Hello World program in Python"     
slow_mo = 10
width = 1280
height = 1800
short_delay = 2
long_delay = 25
delete_after_days_old = 7
min_file_size = 200 * 1024  # KBs
REPO = "/home/ubuntu/repos/intro/"
video_folder = f"{REPO}videos"

## Delete previously captured videos.

In [None]:
#| export
import os
from pathlib import Path


Path(video_folder).mkdir(exist_ok=True)
files = os.listdir(video_folder)

for file in files:
    file_path = os.path.join(video_folder, file)
    if os.path.isfile(file_path):
        os.remove(file_path)
        print(f"Deleted {file}")

print("Deletion of prior video files complete.")

Deleted 2023-08-28.mp4
Deleted b60ae2bbcbbc5ceb445aa923865f2734.webm
Deletion of all files complete.


## Open Google Chrome and perform searches

In [None]:
#| export
import asyncio
import nest_asyncio
from playwright.async_api import async_playwright


try:
    get_ipython()
    headless = False
except NameError:
    headless = True

print(f"Capturing video headless: {headless}")

# Create a new event loop (necessary for Playwright in Juptyer)
nest_asyncio.apply()
loop = asyncio.get_event_loop()
new_loop = asyncio.new_event_loop()
asyncio.set_event_loop(new_loop)


async def browser_automation():
    """Records Google and Bing conversational web search as video."""

    async with async_playwright() as p:
        playwright = await async_playwright().start()
        browser = await playwright.chromium.launch_persistent_context(
            accept_downloads=True,
            args=["--window-position=100,100"],
            channel="chrome",
            downloads_path="/home/ubuntu/Downloads",
            executable_path="/usr/bin/google-chrome",
            headless=headless,
            no_viewport=False,
            record_video_dir=video_folder,
            record_video_size={"width": width, "height": height},
            slow_mo=slow_mo,
            user_data_dir="/home/ubuntu/.config/google-chrome/",
            viewport={"width": width, "height": height},
        )

        # Open the browser window
        page = await browser.new_page()

        # Perform a Bing Search
        placeholder = "Ask me anything"
        await page.goto("https://www.bing.com/")
        await asyncio.sleep(short_delay)
        await page.get_by_placeholder(placeholder).click()
        await page.get_by_placeholder(placeholder).type(prompt)
        await page.get_by_placeholder(placeholder).press("Enter")
        await asyncio.sleep(long_delay)

        # Perform a Google Search
        await page.goto("https://www.google.com/")
        await asyncio.sleep(short_delay)
        await page.get_by_label("Search", exact=True).click()
        await page.get_by_label("Search", exact=True).type(prompt)
        await page.get_by_label("Search", exact=True).press("Enter")
        await asyncio.sleep(long_delay)

        await browser.close()
        print("Done capturing videos.")


# Set the new event loop as the current event loop
asyncio.run(browser_automation())

Done capturing videos.


## Delete extra file and convert webm to mp4

In [None]:
#| export
video_files = [f for f in Path(video_folder).iterdir() if f.is_file()]

# Deletes the smallest of multiple files.
if len(video_files) == 1:
    print("Already down to 1 file.")
else:
    smallest_file = min(video_files, key=lambda f: f.stat().st_size)
    smallest_file_path = video_folder / smallest_file
    smallest_file_path.unlink()
    print(f"Deleted the smallest file: {smallest_file}")

print("Done deleting smallest files.")

Deleted the smallest file: /home/ubuntu/repos/intro/videos/593f02f8fcbb0fb15f267a90993f3dbd.webm
Done deleting smallest files.


## Find the Newest .webm File in Folder

In [None]:
#| export
webm_files = [
    f
    for f in video_files
    if f.name.lower().endswith(".webm") and f.is_file()
]

newest_webm_file = (
    max(webm_files, key=lambda f: f.stat().st_mtime)
    if webm_files
    else None
)
print(f"Found to convert: {newest_webm_file}")

Found to convert: /home/ubuntu/repos/intro/videos/ffe10f808b34de51989740314ad202b0.webm


## Converts webm to mp4 for iPhone compatibility

In [None]:
#| export
import subprocess
from datetime import date


def convert_webm_to_mp4(input_path, output_path):
    cmd = [
        "ffmpeg",
        "-i", input_path,
        "-c:v", "libx264",
        "-preset", "slow",
        "-crf", "40",  # Low quality (51 = lowest)
        "-r", "15",
        "-vf", f"setpts={1/2}*PTS",  # Adjust playback speed using setpts filter
        "-an",  # This disables audio
        output_path
    ]
    
    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)


output_mp4_file = f"/home/ubuntu/repos/intro/videos/{date.today()}.mp4"
convert_webm_to_mp4(newest_webm_file, output_mp4_file)

print("Done converting.")

Done converting.


## Sends email with video as an attachment

In [None]:
#| export
import smtplib
from email import encoders
from datetime import datetime
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart


from_email_txt = f"{REPO}service/mail_from.txt"
to_email_txt = f"{REPO}service/mail_to.txt"

if not Path(from_email_txt).exists() or not Path(to_email_txt).exists():
    print("The email from-file should be a gmail on line 1 and an App Password on line 2.")
    print("The email to-file can be 1 email per line.")
    raise SystemExit("Email credentials not found.")

file_name = Path(output_mp4_file).name

# Create the plain-text email
msg_text = f'''{prompt}

Your video recording of a Google and Bing search for {file_name} is attached.

Regards'''

# Create the HTML email
msg_html = f'''<html><head></head><body><h1>{prompt}</h1>
<p>Your video recording of a Google and Bing search for {file_name} is attached.</p>
<h3>Regards</h3></body></html>'''

with open(from_email_txt) as fh:
    email, paswd = [x.strip() for x in fh.readlines()]

with open(to_email_txt) as fh:
    mail_to = [x.strip() for x in fh.readlines()]

server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.login(email, paswd)

msgdict = MIMEMultipart()
msgdict.preamble = 'This is a multi-part message in MIME format.'
msgdict['From'] = email
msgdict['To'] = ', '.join([x for x in mail_to])
msgdict['Subject'] = prompt

# Create plain text and HTML alternatives
msg_alts = MIMEMultipart('alternative')
msgdict.attach(msg_alts)
msg_alts.attach(MIMEText(msg_text))
msg_alts.attach(MIMEText(msg_html, 'html'))

mimecats = MIMEBase('application', 'octet-stream')
with open(Path(output_mp4_file), 'br') as zfh:
    mimecats.set_payload(zfh.read())
encoders.encode_base64(mimecats)
mimecats.add_header('Content-Disposition', f"video; filename={file_name}")
msgdict.attach(mimecats)

try:
    server.sendmail(email, mail_to, msgdict.as_string())
    print ('Email sent')
except:
    print ('error sending mail')

server.quit()

The email from-file should be a gmail on line 1 and an App Password on line 2.
The email to-file can be 1 email per line.


SystemExit: Email credentials not found.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


## Creates service/chatshot.py file

In [None]:
#| hide

# This exports the above code as a .py file (Linux service)
import nbdev

nbdev.nbdev_export()