In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math, json, os
import torch
import itertools
from sentence_transformers import SentenceTransformer
import ffmpeg


# 1. Get transcript from Video. Here I use Deepgram - https://console.deepgram.com/

In [4]:
with open("resources/videos/NUSFutureWork-Deepgram.json") as f:
    deepgram_json = json.load(f)
f.close()

transcript = deepgram_json['results']['channels'][0]['alternatives'][0]['transcript']
paragraphs = deepgram_json['results']['channels'][0]['alternatives'][0]['paragraphs']
sentences_obj = [_ for _ in list(itertools.chain(*[para['sentences'] for para in paragraphs['paragraphs']]))] # needed for start and end times
sentences = [_['text'] for _ in list(itertools.chain(*[para['sentences'] for para in paragraphs['paragraphs']]))]
sentence_lengths = [int((_['end']-_['start'])*1) for _ in list(itertools.chain(*[para['sentences'] for para in paragraphs['paragraphs']]))]

## print script sentences
# for sentence in sentences:
#     print(sentence)

len(sentences), len(sentence_lengths)

(79, 79)

# 2. Generate summary using any LLM. Here I use ChatGPT. 

## ChatGPT Prompt ==> 'Summarize this transcript "<contents>". In the summary, use very short sentences. Each sentence must be a distinct topic in the transcript. Do not use abbreviations and titles.'

In [7]:
chat_gpt_summary =  "The event welcomed attendees to the second National University of Singapore Lifelong Learning Festival, themed Future Work 2 point 0: AI in Action. The opening remarks emphasized the importance of embracing artificial intelligence's transformative potential across industries. The University’s President highlighted the growing impact of artificial intelligence, likening it to earlier technological revolutions like personal computers and the internet. The University has been proactive in integrating artificial intelligence across its programs. It launched the National University of Singapore AI Institute to foster research and collaborations with industry and government. The institution also developed policies and training initiatives to guide staff and students on using artificial intelligence in education, including workshops on generative AI for course design and assessment. Moreover, the University’s administrative departments use artificial intelligence to enhance productivity. Staff training in data literacy and artificial intelligence has reached thousands, preparing employees for AI-driven tasks. The University also supports lifelong learning through a wide selection of courses, certificates, and modular learning options in artificial intelligence and other fields. Recognizing the cost of education as a barrier, the University announced a 40% tuition fee rebate for Singaporeans and permanent residents on over 80 master’s programs. The speaker concluded with a call to use artificial intelligence to empower, rather than replace, human workers. The event closed with thanks to the School of Continuing and Lifelong Learning and all participants."
chatgpt_summary_sentences = []

for sentence in chat_gpt_summary.split("."):
    if len(sentence) > 0:
        chatgpt_summary_sentences.append(sentence.strip()+".")

# print(chatgpt_summary_sentences)

# 3. Score each sentence in the original transcript to its similarity to each summary sentence

In [9]:
# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [10]:
chatgpt_embeddings = model.encode(chatgpt_summary_sentences)
sentences_embeddings = model.encode(sentences)

In [12]:
# Two ways of similarity. Any one is ok!

similarities = model.similarity(chatgpt_embeddings, sentences_embeddings) # Not used
for ind1, similarity in enumerate(similarities):
    for ind2, s in enumerate(similarity):
        # similarities[ind1][ind2] = s * sentence_lengths[ind2]# Dont do this. Sentence len is skewing up things.
        similarities[ind1][ind2] = s 

cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
cos_similarities = []
for chatgpt_embedding in chatgpt_embeddings:
    chatgpt_sim = []
    for ind, sentences_embedding in enumerate(sentences_embeddings):
        # chatgpt_sim.append(cos(torch.from_numpy(chatgpt_embedding), torch.from_numpy(sentences_embedding)).numpy()*sentence_lengths[ind]) # Dont do this. Sentence len is skewing up things.
        chatgpt_sim.append(cos(torch.from_numpy(chatgpt_embedding), torch.from_numpy(sentences_embedding)).numpy()*sentence_lengths[ind])
    cos_similarities.append(chatgpt_sim)

In [13]:
len(similarities), len(cos_similarities)

(12, 12)

In [14]:
# For each summary sentence print the sentence from the original transcript which max similarity score. i.e., print the sentence in the orig transcript which encompasses max topic similarity to the summary sentence. 
for i in range(len(chatgpt_summary_sentences)):
    print(chatgpt_summary_sentences[i], '==>', \
          sentences[np.argmax(similarities[i])], \
          similarities[i][np.argmax(similarities[i])], \
          sentence_lengths[np.argmax(similarities[i])])
    print('--------')

The event welcomed attendees to the second National University of Singapore Lifelong Learning Festival, themed Future Work 2 point 0: AI in Action. ==> The theme for this year's learning festival is Future Work 2.0, AI in Action. tensor(0.7325) 7
--------
The opening remarks emphasized the importance of embracing artificial intelligence's transformative potential across industries. ==> It may sound like a paradox, but AI is for real and in action, transforming work and industries. tensor(0.6801) 8
--------
The University’s President highlighted the growing impact of artificial intelligence, likening it to earlier technological revolutions like personal computers and the internet. ==> And this brings together AI researchers from across the university to accelerate frontier AI research as well as to boost real world impact. tensor(0.6425) 12
--------
The University has been proactive in integrating artificial intelligence across its programs. ==> NUS has also been proactively guiding and

In [15]:
len(cos_similarities), len(chatgpt_summary_sentences), len(sentence_lengths), len(sentences)

(12, 12, 79, 79)

# 4. Define capacity (in seconds) and run knapsack DP

In [16]:
capacity = 30 #40 seconds | capacity

#knapsack problem

In [17]:
items = [] # sentences
weights = [] # sentence lengths
values = [] # similarities

item_starts = [] #start and stop times in video
item_ends = []

for i in range(len(chatgpt_summary_sentences)):
    items.append(sentences[np.argmax(similarities[i])])
    weights.append(sentence_lengths[np.argmax(similarities[i])])
    values.append(similarities[i][np.argmax(similarities[i])])

    item_starts.append(sentences_obj[np.argmax(similarities[i])]['start'])
    item_ends.append(sentences_obj[np.argmax(similarities[i])]['end'])


In [18]:
n = len(items)
dp = [[0 for i in range(capacity+1)] for w in range(n+1)] #15 X 401

In [19]:
for i in range(n+1):
    for w in range(capacity+1):
        if i==0 or w==0:
            dp[i][w] = 0
        elif weights[i-1] <= w:
            dp[i][w] = max(values[i-1]+dp[i-1][w-weights[i-1]], dp[i-1][w])
        else:
            dp[i][w] = dp[i-1][w]
    w = capacity
    chosen_items = []

    for i in range(n, 0, -1):
        if dp[i][w] != dp[i-1][w]:
            chosen_items.append(items[i-1])
            w -= weights[i-1]
    total_value = dp[n][capacity]

In [21]:
chosen_items[::-1], total_value

(["The theme for this year's learning festival is Future Work 2.0, AI in Action.",
  'It may sound like a paradox, but AI is for real and in action, transforming work and industries.',
  'Without much ado, I would like to thank the school of continuing and lifelong learning for putting this event together.'],
 tensor(2.0718))

# 5. Final video clips for final summary video based on chosen items

In [24]:
probe = ffmpeg.probe('resources/videos/NUS FutureWork.mp4')
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
fps = int(video_info['r_frame_rate'].split('/')[0])
fps, video_info['r_frame_rate']

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


(25, '25/1')

In [28]:
input_file = ffmpeg.input('resources/videos/NUS FutureWork.mp4')


tot = 0
for i in range(len(chosen_items)):
    snip = sentences_obj[sentences.index(chosen_items[::-1][i])]
    tot += (snip['end']-snip['start'])
    print(snip['start']*fps, snip['end']*fps)

    out_temp = 'resources/videos/output_'+str(i)+'.mp4'
    if os.path.exists(out_temp):
        os.remove(out_temp)
        
    pts='PTS-STARTPTS'
    vid = input_file.trim(start=snip['start'], end=snip['end']).setpts(pts)
    aud = (input_file
          .filter_('atrim', start=snip['start'], end=snip['end'])
          .filter_('asetpts', pts))
    
    vid_aud = ffmpeg.concat(vid, aud, v=1, a=1)
    output_file = ffmpeg.output(vid_aud, out_temp, format='mp4')
    ffmpeg.run(output_file)
print(tot)

4603.625 4796.125


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.4)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/7.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enabl

5089.49975 5298.375


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.4)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/7.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enabl

22805.625 23024.12425


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.4)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/7.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enabl

24.794979999999953


# 6. Create ffmpeg commands (to run on command line) to concat chosen clips

In [29]:
final_out = 'resources/videos/output_final.mp4'
if os.path.exists(final_out):
    os.remove(final_out)

str_command = 'ffmpeg '
str_command_args = ' -filter_complex "'
for i in range(len(chosen_items)):
    str_command += ' -i resources/videos/output_'+str(i)+'.mp4'
    str_command_args += '['+str(i)+':v]['+str(i)+':a]'

str_command_args += 'concat=n='+str(len(chosen_items))+':v=1:a=1" -vsync vfr '+final_out
str_command += str_command_args

print("Run this on command line. Ensure you have ffmpeg installed => \n" + str_command)

Run this on command line. Ensure you have ffmpeg installed => 
ffmpeg  -i resources/videos/output_0.mp4 -i resources/videos/output_1.mp4 -i resources/videos/output_2.mp4 -filter_complex "[0:v][0:a][1:v][1:a][2:v][2:a]concat=n=3:v=1:a=1" -vsync vfr resources/videos/output_final.mp4
