In [None]:
import os
import wget
import json
from RAG.system import RAGSystem
from hyperparameters import merge
from dotenv import load_dotenv
from qa_datasets import tuning_dataset
from tqdm import tqdm

In [None]:
load_dotenv()
DATA_DIR = os.getenv("DATA_DIR")
assert DATA_DIR is not None, "Define DATA_DIR in .env file!"

In [None]:
rag = RAGSystem(
    hypers=merge,
    data_dir=DATA_DIR,
    random_seed=17,
)

In [None]:
psychology_course_questions = list(filter(lambda qa: qa['course'] ==
     'MIT 9.00SC Introduction to Psychology, Fall 2011', tuning_dataset))

print(f"Psychology course questions: {len(psychology_course_questions)}\n")
first_question = psychology_course_questions[0]
print(f"First question: {json.dumps(first_question, indent=2)}")

In [None]:
rag.ask(first_question['question'])

In [None]:
rag.ask(first_question['question'], force_answer=True)

In [None]:
videos_to_download = [
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec01_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 1 - Introduction.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec02_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 2 - Science & Research.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec03_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 3 - Brain I: Structure and Functions.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec04_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 4 - Brain II: Methods of Research.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec05_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 5 - Vision I.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec06_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 6 - Vision II.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec07_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 7 - Attention.mp4'
    },
    {
        'url': 'http://www.archive.org/download/MIT9.00SCF11/MIT9_00SCF11_lec08_300k.mp4',
        'directory': 'MIT9.00SCF11',
        'filename': 'Lecture 8 - Consciousness.mp4'
    },
]

In [None]:
VIDEO_DIR_NAME = 'videos'

for video in (pbar := tqdm(videos_to_download)):
    video_folder = os.path.join(DATA_DIR, VIDEO_DIR_NAME, video['directory'])
    if not os.path.exists(video_folder):
        os.makedirs(video_folder)
    
    if not os.path.exists(os.path.join(video_folder, video['filename'])):
        pbar.set_description(f"Downloading {video['filename']} to {video_folder}")
        wget.download(video['url'], out=os.path.join(video_folder, video['filename']))

In [None]:
videos_to_preprocess = list()
for folder in os.listdir(os.path.join(DATA_DIR, VIDEO_DIR_NAME)):
    for video in os.listdir(os.path.join(DATA_DIR, VIDEO_DIR_NAME, folder)):
        video_path = os.path.join(DATA_DIR, VIDEO_DIR_NAME, folder, video)
        unique_video_name = os.path.join(folder, video)
        videos_to_preprocess.append({
            'unique_video_name': unique_video_name,
            'video_path': video_path,
        })

print(f"Videos to preprocess: {len(videos_to_preprocess)}\n")

first_video = videos_to_preprocess[0]
print(f"First video: {json.dumps(first_video, indent=2)}")

In [None]:
for video in (pbar := tqdm(videos_to_preprocess)):
    pbar.set_description(f"Preprocessing {video['unique_video_name']}")
    rag.preprocess_video(video['unique_video_name'], video['video_path'])