In [1]:
from googleapiclient.discovery import build
import pandas as pd
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()
api_key = os.getenv('YT_API_KEY')

### Find trending videos per query

In [3]:
req_json = build('youtube', 'v3', developerKey=api_key).search().list(
            part='snippet',
            maxResults=50,
            q='fishing',
            order='viewCount',
            type='video',
            regionCode='US',
            videoDuration='medium',
            publishedAfter='2023-08-01T00:00:00Z'
        ).execute()

In [4]:
user, user_id, vid_id, title, date = [], [], [], [], []

for item in req_json['items']:
    user.append(item['snippet']['channelTitle'])
    user_id.append(item['snippet']['channelId'])
    vid_id.append(item['id']['videoId'])
    title.append(item['snippet']['title'])
    date.append(item['snippet']['publishTime'])

df_temp = pd.DataFrame({
    'username': user,
    'user_id':user_id,
    'vid_id': vid_id,
    'date': date,
    'title': title
})

### Find subscribers per channel

In [5]:
user_id_out = ""
for i in set(user_id):
    user_id_out += i + ','
user_id_out = user_id_out[:-1]

In [6]:
subs_json = build('youtube', 'v3', developerKey=api_key).channels().list(
            part='statistics',
            id=user_id_out
        ).execute()

In [7]:
subs = {}
for i in subs_json['items']:
    channel_id = i['id']
    subs[channel_id] = i['statistics']['subscriberCount']

In [8]:
df_subs = pd.DataFrame(subs.items(), columns=['user_id','subscribers'])
df_subs['subscribers'] = df_subs['subscribers'].astype(int)

### Find views per video

In [9]:
vid_id_out = ""
for val in set(vid_id):
    vid_id_out += val + ','
vid_id_out = vid_id_out[:-1]

In [10]:
views_json = build('youtube', 'v3', developerKey=api_key).videos().list(
            part='statistics, contentDetails',
            id=vid_id_out
        ).execute()

In [11]:
from pprint import pprint
pprint(views_json)

{'etag': 'HsxaCXhaVntBp5IyuJOo88uHebA',
 'items': [{'contentDetails': {'caption': 'false',
                               'contentRating': {},
                               'definition': 'hd',
                               'dimension': '2d',
                               'duration': 'PT17M48S',
                               'licensedContent': True,
                               'projection': 'rectangular'},
            'etag': 'oW1R-iGp35Mx3_rDWl_OJIXLh14',
            'id': 'MMSFmoNzQEE',
            'kind': 'youtube#video',
            'statistics': {'favoriteCount': '0',
                           'likeCount': '103238',
                           'viewCount': '2570204'}},
           {'contentDetails': {'caption': 'false',
                               'contentRating': {},
                               'definition': 'hd',
                               'dimension': '2d',
                               'duration': 'PT10M46S',
                               'licensedContent': Tr

In [12]:
views = {}
for i in views_json['items']:
    vid_id = i['id']
    views[vid_id] = i['statistics']['viewCount']

In [13]:
df_views = pd.DataFrame(views.items(), columns=['vid_id','views'])
df_views['views'] = df_views['views'].astype(int)

### Merge

In [14]:
df = df_temp.merge(df_subs, how='left', on='user_id')
df = df.merge(df_views, how='left', on='vid_id')
df['viewsToSubs'] = df['views']/df['subscribers']

### Which videos received the most views per subscriber?

In [15]:
# These videos found a way to become discoverable by new audiences
df.sort_values(by='viewsToSubs', ascending=False)

Unnamed: 0,username,user_id,vid_id,date,title,subscribers,views,viewsToSubs
4,The Lawless Tide,UCqD-eS-qRKsGi9LfT-4jiJA,RpNfMFNz2VM,2023-10-18T20:55:00Z,Someone Called the GAME WARDEN Because I Was C...,32600,4437572,136.12184
31,Tom Epic,UC57qJQqeVCq2BUTqvpBsHXw,EGhvfudN390,2023-08-31T21:06:11Z,DIY Saltwater Fish Pond At The Beach With A...,18500,851710,46.038378
23,Whimsy World,UCn-yQfooWz39iQQpWYVOHdA,MlQlpU3uyss,2023-10-16T11:42:13Z,Amazing! Attractive Best Best Big Fish Market ...,28300,1185058,41.874841
40,Sabang Angler,UCxBVjI9xUjgcsJ-_z2aHjgQ,8pXx4MUDXJM,2023-09-05T11:05:21Z,Galit na Galit mga isda sa Pain ko !,20100,647668,32.222289
46,Adventure Fishing UK,UC7Da9nUj8TXcKwQlULbcpBQ,QQ7CU-CLkz0,2023-09-10T13:30:12Z,I Hiked HOURS to Fish this TINY Mountain Pond....,25700,573893,22.330467
38,"Kapuso mo, Jessica Stories",UC_ktPLZUDrSXNOyFzhQbMpA,iXJqAxXqKS8,2023-10-15T11:00:29Z,"Kapuso mo, Jessica soho: DAMBUHALANG COBRA, SI...",69300,684191,9.872886
8,Final Affliction,UCcOCstS20fFk383lSHo7ifw,KvB73Ziaa70,2023-09-08T07:11:38Z,The HORRIFYING Last Minutes of Gloria Serge EA...,371000,3258808,8.783849
33,Dung Girl Fishing,UCytnlBRb1zB8QUAZDvJr6lw,Z8PaNFb8Is8,2023-08-27T12:44:44Z,Amazing Fishing | Beautiful Girl Fishing Big a...,88600,742422,8.379481
7,village food taste,UCaCUtSfruaAfE3gqeT5ZHVg,6_fesOKe7NY,2023-09-30T09:33:38Z,Pond Water Fish Catching and Cooking Fish Grav...,416000,3387712,8.143538
5,Wild Fighter,UCc_lepicYywQXEvyeJcz1TA,dMZTwIDN5Bk,2023-08-14T12:26:05Z,Shark Attack Man In Sea Fishing Boat | Fun Mad...,572000,4159510,7.271871


### Retrieve Audio Transcripts

In [19]:
from youtube_transcript_api import YouTubeTranscriptApi

In [58]:
def get_transcript(vid_id):
    try:
        srt = YouTubeTranscriptApi.get_transcript(vid_id)
        audio_transcription = ""
        for vals in srt:
            if vals['text'][0] == '[':
                audio_transcription += ""
            else:
                audio_transcription += vals['text'] + " "
        return audio_transcription
    except Exception:
        return ""

In [60]:
df['audio_transcript'] = df['vid_id'].apply(get_transcript)

# LDA from All Audio Transcripts

In [115]:
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

custom_stop_words = [
    "fishing", "fish", "angler", "catch", "angling", "bait", "lure", "hook", 
    "reel", "rod", "line", "boat", "water", "lake", "river", "ocean", 
    "pond", "species", "tackle", "equipment", "angling", "angling", 
    "video", "watch", "subscribe", "channel", "comment", "like", "share",
    "tip", "trick", "tutorial", "how-to", "guide", "advice", "vlog", 
    "subscribe", "click", "notification", "bell", "new", "latest", "today",
    "recent", "upload", "post", "episode", "season", "series", "show",
    "check", "visit", "website", "shop", "store", "purchase", "buy", "sale",
    "discount", "offer", "promo", "code", "link", "description", "comment",
    "follow", "social", "media", "twitter", "facebook", "instagram",
    "subscribe", "subscribers", "viewer", "viewers", "audience", "fan", 
    "fans", "follower", "followers", "click", "clicks", "watch", "watches",
    "watching", "view", "views", "watcher", "watchers", "video", "videos",
    "clip", "clips", "episode", "episodes", "content", "channel", "channels",
    "show", "shows", "season", "seasons", "series", "playlist", "playlists",
    "channel", "channels", "video", "videos", "upload", "uploads", "watch",
    "watching", "watchers", "viewer", "viewers", "subscribe", "subscribers",
    "like", "likes", "dislike", "dislikes", "comment", "comments", "share",
    "shares", "click", "clicks", "channel", "channels", "video", "videos",
    "watch", "watching", "view", "views", "viewer", "viewers", "subscribe",
    "subscribers", "like", "likes", "comment", "comments", "share", "shares",
    "click", "clicks", "watch", "watching", "view", "views", "subscriber",
    "subscribers", "like", "likes", "comment", "comments", "share", "shares",
    "video", "videos", "channel", "channels", "watch", "watching", "view",
    "views", "subscribe", "subscribers", "like", "likes", "comment", "comments",
    "share", "shares", "click", "clicks", "video", "videos", "channel", "channels",
    "watch", "watching", "view", "views", "subscribe", "subscribers", "like",
    "likes", "comment", "comments", "share", "shares", "click", "clicks",
    "watch", "watching", "view", "views", "subscriber", "subscribers", "like",
    "likes", "comment", "comments", "share", "shares", "fishermen", "caught",
    "huge", "one", "found","back","ever","right","going","oh","got","go","get",
    "gonna","yeah","good"
]

stop_words = set(stopwords.words('english'))
stop_words.update(custom_stop_words)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Kyrie\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [133]:
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models import LdaModel
from gensim import matutils


# Extract the 'text' column as a list of strings
documents = df['audio_transcript'].tolist()
documents_cleaned = []
for transcripts in documents:
    if len(transcripts) > 1000:
        documents_cleaned.append(transcripts)

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_df=0.85, max_features=1000, stop_words=list(stop_words))

# Fit and transform the documents into a TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(documents_cleaned)

# Convert the TF-IDF matrix into a Gensim corpus directly
corpus = matutils.Sparse2Corpus(tfidf_matrix, documents_columns=False)

# Create a dictionary from the TF-IDF vectorizer vocabulary
id2word = {idx: word for idx, word in enumerate(tfidf_vectorizer.get_feature_names_out())}

# Train LDA model using the TF-IDF corpus
num_topics = 15  # Choose the number of topics
lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, passes=20, random_state=42)

# Print topics
topics = lda_model.print_topics(num_words=10)
for topic in topics:
    print(topic)

(0, '0.001*"tide" + 0.001*"sergeant" + 0.001*"bro" + 0.001*"puffer" + 0.001*"pool" + 0.001*"majors" + 0.001*"waves" + 0.001*"know" + 0.001*"guys" + 0.001*"little"')
(1, '0.008*"treasure" + 0.006*"trash" + 0.005*"squid" + 0.005*"joe" + 0.005*"grill" + 0.004*"lobster" + 0.004*"money" + 0.004*"giant" + 0.003*"woo" + 0.003*"island"')
(2, '0.008*"tank" + 0.004*"house" + 0.004*"ben" + 0.003*"stop" + 0.003*"le" + 0.003*"hannah" + 0.003*"flooded" + 0.003*"derek" + 0.003*"yo" + 0.003*"leak"')
(3, '0.009*"mac" + 0.009*"shark" + 0.005*"dogghouse" + 0.005*"bull" + 0.004*"tunnel" + 0.004*"bye" + 0.004*"tank" + 0.004*"beach" + 0.004*"dog" + 0.004*"fisherman"')
(4, '0.001*"plush" + 0.001*"please" + 0.001*"pop" + 0.001*"pool" + 0.001*"pole" + 0.001*"point" + 0.001*"possible" + 0.001*"plenty" + 0.001*"plants" + 0.001*"plate"')
(5, '0.011*"kitty" + 0.006*"creek" + 0.006*"catfish" + 0.006*"crappie" + 0.006*"sarang" + 0.005*"thailand" + 0.004*"bass" + 0.004*"tail" + 0.004*"india" + 0.004*"rat"')
(6, '0.00

### Prompt ChatGPT for analysis of results:

"Here is a LDA output for topic frequency from popular youtube videos involving the search query "fishing":

{insert topic output}

With this information, suggest 10 topics for videos I should make as a fishing youtuber based on this data"

### ChatGPT's Response
"
Based on the topics extracted from popular fishing YouTube videos, here are 10 potential video topics for you as a fishing YouTuber:

1. Exploring Tide Patterns: Create videos exploring different tide patterns and their impact on fishing success.
2. Treasure and Trash Diving: Dive into underwater treasure and trash hunting, showcasing your findings.
3. Aquarium Tank Setup: Share tips on setting up and maintaining aquarium tanks, especially focusing on fish suitable for beginners.
4. Shark Fishing Techniques: Teach viewers various techniques for shark fishing, including bait selection and safe catch-and-release practices.
5. Night Fishing Adventure: Capture the excitement of night fishing, discussing best practices and unique challenges faced during nighttime angling.
6. Creative Cooking with Seafood: Showcase creative seafood recipes, demonstrating unique and delicious ways to cook fish and seafood.
7. Exotic Fish Species: Explore exotic fish species found in different parts of the world, providing insights into their habitat and behavior.
8. DIY Fishing Gear: Share DIY projects related to fishing gear, such as making your own lures, rods, or tackle boxes.
9. Fishing in Unusual Locations: Take your audience to unconventional fishing spots, like bridges, piers, or even urban areas, and demonstrate successful fishing techniques there.
10. Piano Music and Fishing: Combine your passion for fishing with music by creating relaxing piano compositions inspired by fishing experiences. Share these compositions with your viewers, creating a unique ambiance for your channel.


Remember to tailor these ideas to your unique style and preferences, ensuring they align with your audience's interests and expectations. Good luck with your fishing YouTube channel!

"