In [1]:
# data science
import numpy as np
import pandas as pd

from sentence_transformers import SentenceTransformer

import youtube.get_youtube_data as get_youtube_data
import machine_learning.embedding as embedding

In [2]:
# Step 1. Get the youtube API key. For now we can just grab 1, but in the future
#         we will need to obtain a list of API keys due to the high quota cost of
#         performing a youtube search.
YOUTUBE_API_KEY = get_youtube_data.get_youtube_api_key()
# YOUTUBE_API_KEYS = get_youtube_data.get_youtube_api_key_list()

In [3]:
# Step 2. Build the youtube client to make API calls.
youtube = get_youtube_data.make_client(YOUTUBE_API_KEY)

In [4]:
# Step 3. Perform a youtube search with a user-specified query.
youtube_df = get_youtube_data.search_youtube(
    youtube,
    query='Patrick Bet David',
    max_vids=15,        # youtube accepts 50 as the max value
    order='relevance'   # default is relevance
)

In [5]:
youtube_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   video_id                13 non-null     object
 1   published_at            13 non-null     object
 2   channel_id              13 non-null     object
 3   title                   13 non-null     object
 4   description             13 non-null     object
 5   channel_title           13 non-null     object
 6   thumbnail_default_url   13 non-null     object
 7   thumbnail_medium_url    13 non-null     object
 8   thumbnail_high_url      13 non-null     object
 9   thumbnail_standard_url  11 non-null     object
 10  thumbnail_maxres_url    10 non-null     object
 11  tags                    11 non-null     object
 12  video_duration          11 non-null     object
 13  video_caption           11 non-null     object
 14  video_view_count        11 non-null     object
 15  video_li

In [6]:
youtube_df.sample(5)

Unnamed: 0,video_id,published_at,channel_id,title,description,channel_title,thumbnail_default_url,thumbnail_medium_url,thumbnail_high_url,thumbnail_standard_url,thumbnail_maxres_url,tags,video_duration,video_caption,video_view_count,video_like_count,video_comment_count
7,UZAYgSUPF6Q,2023-06-14T02:00:16Z,UCAjmXPKv1zpYftSDAMeJz8A,Patrick Bet-David Explains Who George Soros is...,Watch Full Episode - https://youtu.be/TGx_HUPw...,FLAGRANT CLIPS,https://i.ytimg.com/vi/UZAYgSUPF6Q/default.jpg,https://i.ytimg.com/vi/UZAYgSUPF6Q/mqdefault.jpg,https://i.ytimg.com/vi/UZAYgSUPF6Q/hqdefault.jpg,https://i.ytimg.com/vi/DO_pELccUVI/sddefault.jpg,https://i.ytimg.com/vi/DO_pELccUVI/maxresdefau...,"[Patrick Bet-David, Valeutainment, The BetDavi...",3132.0,False,445458.0,15654.0,2810.0
8,I3UORgx3DBw,2023-02-15T14:00:11Z,UCIHdDJ0tjn_3j-FS7s_X1kQ,10 Rules For Making Your First Million,Do you have a goal of one day earning 1 millio...,Valuetainment,https://i.ytimg.com/vi/I3UORgx3DBw/default.jpg,https://i.ytimg.com/vi/I3UORgx3DBw/mqdefault.jpg,https://i.ytimg.com/vi/I3UORgx3DBw/hqdefault.jpg,https://i.ytimg.com/vi/MDjPdoSbcvs/sddefault.jpg,https://i.ytimg.com/vi/MDjPdoSbcvs/maxresdefau...,"[Entrepreneur, Entrepreneurs, Entrepreneurship...",45.0,False,117252.0,6424.0,83.0
10,MDjPdoSbcvs,2023-05-10T13:00:26Z,UCIHdDJ0tjn_3j-FS7s_X1kQ,Patrick Bet-David&#39;s Multi Millionaire Diet,shorts #short #valuetainment #patrickbetdavid.,Valuetainment,https://i.ytimg.com/vi/MDjPdoSbcvs/default.jpg,https://i.ytimg.com/vi/MDjPdoSbcvs/mqdefault.jpg,https://i.ytimg.com/vi/MDjPdoSbcvs/hqdefault.jpg,https://i.ytimg.com/vi/lrcIcd-RL-E/sddefault.jpg,https://i.ytimg.com/vi/lrcIcd-RL-E/maxresdefau...,"[david pakman patrick bet david, david pakman ...",6562.0,False,197691.0,6062.0,4098.0
11,4XL4P2IZLxo,2023-07-05T13:00:07Z,UCIHdDJ0tjn_3j-FS7s_X1kQ,LGBTQ Movement Exposed: Shocking History Behin...,"In this episode, Patrick exposes the shocking ...",Valuetainment,https://i.ytimg.com/vi/4XL4P2IZLxo/default.jpg,https://i.ytimg.com/vi/4XL4P2IZLxo/mqdefault.jpg,https://i.ytimg.com/vi/4XL4P2IZLxo/hqdefault.jpg,,,,,,,,
0,2hzPRsnXOJs,2023-07-11T15:10:47Z,UCGX7nGXpz-CmO_Arg-cgJ7A,Lauren Chen | PBD Podcast | Ep. 284,"In this Episode, Lauren Chen joins the podcast...",PBD Podcast,https://i.ytimg.com/vi/2hzPRsnXOJs/default.jpg,https://i.ytimg.com/vi/2hzPRsnXOJs/mqdefault.jpg,https://i.ytimg.com/vi/2hzPRsnXOJs/hqdefault.jpg,https://i.ytimg.com/vi/2hzPRsnXOJs/sddefault.jpg,https://i.ytimg.com/vi/2hzPRsnXOJs/maxresdefau...,"[Patrick Bet-David, Valeutainment, The BetDavi...",6576.0,False,147624.0,7778.0,849.0


In [7]:
filter_sent = "Politics"
list_of_videos = ["Who'se Really Supporting Russia",
                  "The Perfect Hillary Clinton Analogy",
                  "The Evolution of Alex Jones",\
                  "Patrick Bet David on The Breakfast Club",
                  "The Truth About The 2020 Election",
                  "Kobe Bryant’s Last Great Interview"]
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embedding.filter_out_embed(model,filter_sent,list_of_videos)


['Patrick Bet David on The Breakfast Club',
 'Kobe Bryant’s Last Great Interview']

In [8]:
titles = youtube_df['title'].tolist()
print("titles:",titles)
print("api test")
print(embedding.filter_out_embed(model,filter_sent,titles))

titles: ['Lauren Chen | PBD Podcast | Ep. 284', 'Patrick Bet David - Home Prices Skyrocket w/ Supply Dwindling - U.S. Housing Market Update 2023', 'Why Hollywood Elites Don’t Want You To Watch Sound Of Freedom', 'Patrick Bet-David Responds To Jason Whitlock Calling Him Out', 'Patrick Bet-David Offers Tucker Carlson $100 Million To Join Valuetainment', 'Exclusive: Andrew Tate UNCENSORED Interview with Patrick Bet-David', 'Col. Douglas Macgregor | PBD Podcast | Ep. 283', 'Patrick Bet-David Explains Who George Soros is &amp; Why He&#39;s Important in Society', '10 Rules For Making Your First Million', 'EMERGENCY Podcast | Reaction To Tucker Carlson Leaving Fox News &amp; Don Lemon Being Fired By CNN', 'Patrick Bet-David&#39;s Multi Millionaire Diet', 'LGBTQ Movement Exposed: Shocking History Behind It', 'The Entire Patrick Bet-David Fiasco']
api test
['Lauren Chen | PBD Podcast | Ep. 284', 'Patrick Bet David - Home Prices Skyrocket w/ Supply Dwindling - U.S. Housing Market Update 2023', '