# Setups

In [None]:
import sys
import os

# print(os.path.abspath(os.path.join(os.path.dirname(__file__))))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [None]:
from constants import video_ids, search_keywords, playlist_ids, channel_ids
from tests.utils import measure_time

devKey = open("devKey").read()

# Type 1: Iterable Foreman

## CommentThreadsForeman

In [None]:
from yt_pipeline.foreman import CommentThreadsForeman
from yt_pipeline.retriever import PipeSettings, RetrieverSettings


worker = CommentThreadsForeman()
results = worker.invoke(
    iterable=video_ids, developerKey=devKey, 
    pipe_settings=PipeSettings(retrieval="all", max_page=1), 
    retriever_settings=RetrieverSettings(output_folder="backup/CommentThreadsRetriever", multithread=True), max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df

## PlaylistItemsForeman

In [None]:
from yt_pipeline.foreman import PlaylistItemsForeman
from yt_pipeline.retriever import PipeSettings, RetrieverSettings


worker = PlaylistItemsForeman()
results = worker.invoke(
    iterable=playlist_ids, developerKey=devKey, 
    pipe_settings=PipeSettings(retrieval="all", max_page=3), 
    retriever_settings=RetrieverSettings(output_folder="backup/PlaylistItemsRetriever", multithread=True), max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df

In [None]:
flattened = []

for playlist_id, thumbnails in results.thumbnails.items():
    for thumb in thumbnails:
        flattened.append({
            "playlistId": playlist_id,
            "quality": thumb["quality"],
            "url": thumb["url"],
            "width": thumb["width"],
            "height": thumb["height"]
        })

# convert to DataFrame
df = pd.DataFrame(flattened)
df

## PlaylistsForeman

In [None]:
from yt_pipeline.foreman import PlaylistsForeman
from yt_pipeline.retriever import PipeSettings, RetrieverSettings


worker = PlaylistsForeman()
results = worker.invoke(
    iterable=channel_ids, developerKey=devKey, 
    pipe_settings=PipeSettings(retrieval="all", max_page=3), 
    retriever_settings=RetrieverSettings(output_folder="backup/PlaylistsRetriever", multithread=True), max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df

## SearchForeman

In [None]:
from yt_pipeline.foreman import SearchForeman
from yt_pipeline.retriever import PipeSettings, RetrieverSettings, SearchTypeCheckboxProps, SearchParamProps

iterable = [
    SearchParamProps(search_keywords[0], channelId='UC84t1K5ri-7u9bFCaUKTXDA', order="relevance"),
    SearchParamProps(search_keywords[0], channelId='UC84t1K5ri-7u9bFCaUKTXDA', videoDuration="long", order="relevance"),
    SearchParamProps(search_keywords[0], channelId='UC84t1K5ri-7u9bFCaUKTXDA', videoDuration="long", order="relevance", 
                    publishedAfter="2023-02-01T13:00:02Z"),
    SearchParamProps(search_keywords[0], channelId='UC84t1K5ri-7u9bFCaUKTXDA', videoCategoryId="19",
                     order="relevance"),
]
# types = SearchTypeCheckboxProps(video=True, playlist=True, channel=True)
types = SearchTypeCheckboxProps(video=True)
worker = SearchForeman(types=types)
results = worker.invoke(
    iterable=iterable, developerKey=devKey, 
    pipe_settings=PipeSettings(retrieval="all", max_page=1), 
    retriever_settings=RetrieverSettings(output_folder="backup/SearchRetriever", multithread=True), max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df

# Type 2: Unique Foreman

## VideosForeman

In [None]:
from yt_pipeline.foreman import VideosForeman
from yt_pipeline.retriever import RetrieverSettings


worker = VideosForeman()
results = worker.invoke(
    iterable=video_ids, developerKey=devKey,  
    retriever_settings=RetrieverSettings(output_folder="backup/VideosRetriever", multithread=True), 
    max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df

In [None]:
flattened = []

for playlist_id, thumbnails in results.thumbnails.items():
    for thumb in thumbnails:
        flattened.append({
            "playlistId": playlist_id,
            "quality": thumb["quality"],
            "url": thumb["url"],
            "width": thumb["width"],
            "height": thumb["height"]
        })

# convert to DataFrame
df = pd.DataFrame(flattened)
df

## ChannelsForeman

In [None]:
from yt_pipeline.foreman import ChannelsForeman
from yt_pipeline.retriever import RetrieverSettings


worker = ChannelsForeman()
results = worker.invoke(
    iterable=channel_ids, developerKey=devKey,  
    retriever_settings=RetrieverSettings(output_folder="backup/ChannelsRetriever", multithread=True), 
    max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df

# Type 3: CaptionsForeman

In [None]:
from yt_pipeline.foreman import CaptionsForeman
from yt_pipeline.foreman.captions import CaptionsParams
from yt_pipeline.retriever import RetrieverSettings


iterable = [CaptionsParams(videoId=videoId) for videoId in video_ids[:10]]
worker = CaptionsForeman()
results = worker.invoke(
    iterable=iterable, developerKey=devKey,  
    retriever_settings=RetrieverSettings(output_folder="backup/CaptionsRetriever", multithread=True), 
    max_workers=16)

In [None]:
import pandas as pd

main_df = pd.DataFrame(results.main_records)
main_df