In [3]:
def search_videos_chnl(query, channelId):
    search_request = youtube.search().list(
        part='snippet', q=query, channelId=channelId, type='video', order='viewCount', maxResults=50)
    search_response = search_request.execute()
    # while nextToken exists
    return search_response

In [4]:
def search_videos(query):
    search_request = youtube.search().list(
        part='snippet', q=query, type='video', order='viewCount', maxResults=50)
    search_response = search_request.execute()
    # while nextToken exists
    return search_response

In [2]:
def get_relevant_videos(query, search_response):
    vidId_dict = {}
    for i in range(len(search_response['items'])):
        video_id = search_response['items'][i]['id']['videoId']
        vid_title = search_response['items'][i]['snippet']['title']
        vidId_dict[video_id] = vid_title
    vid_dict = {k:v for k,v in vidId_dict.items() if query in v}
    return vid_dict

In [3]:
def get_video_stats(video_dict):
    video_ids = list(video_dict.keys())
    all_video_info = []
    
    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=','.join(video_ids)
        )
    response = request.execute() 
 
    for video in response['items']:
        stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                         'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                         'contentDetails': ['duration', 'definition', 'caption']
                        }
        video_info = {}
        video_info['video_id'] = video['id']

        for k in stats_to_keep.keys():
            for v in stats_to_keep[k]:
                try:
                    video_info[v] = video[k][v]
                except:
                    video_info[v] = None

        all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

In [4]:
def get_video_comments(video_df):
    
    drop_rows_wo_cmt = list(video_df.query('commentCount == "0" | commentCount.isnull()').index)
    video_df.drop(drop_rows_wo_cmt, inplace=True)
    video_ids = list(video_df.video_id)
    
    all_cmt_info = []
    for vidId in video_ids:
        
        vid_comment_request = youtube.commentThreads().list(part='id, snippet', videoId=vidId, order='relevance',maxResults=100)
        vid_comment_response = vid_comment_request.execute()
        all_cmt_info = get_clean_comments(all_cmt_info, vid_comment_response, 100)
    
    return pd.DataFrame(all_cmt_info)

In [4]:
def get_channel_comments(channelId):
    chnl_comment_request = youtube.commentThreads().list(part='id, snippet', allThreadsRelatedToChannelId=channelId, order='relevance',maxResults=100)
    chnl_comment_response = chnl_comment_request.execute()
    
    all_cmt_info = []
    all_cmt_info = get_clean_comments(all_cmt_info, chnl_comment_response, 0)
    
    while chnl_comment_response.get('nextPageToken', None):
        chnl_comment_request = youtube.commentThreads().list(
            part='id, snippet',
            allThreadsRelatedToChannelId=channelId, 
            order='relevance',
            maxResults=100,
            pageToken=chnl_comment_response['nextPageToken']
        )
        chnl_comment_response = chnl_comment_request.execute()
        
        all_cmt_info = get_clean_comments(all_cmt_info, chnl_comment_response, 0)
        
        if 'nextPageToken' in chnl_comment_response.keys():
            continue
        else:
            break
        
    return pd.DataFrame(all_cmt_info)

In [1]:
def get_clean_comments(all_cmt_info, comment_response, threshold):
    
    for item in comment_response['items']:
        cmt_info = {}
        cmt_likes = item['snippet']['topLevelComment']['snippet']['likeCount']
        if cmt_likes >= threshold:
            cmt_id = item['snippet']['topLevelComment']['id']
            cmt_info['id'] = cmt_id
            cmt_info['reply_count'] = item['snippet']['totalReplyCount']
            # cmt_info['video_name'] = get_video_names(cmt_id)
            stats_to_keep = ['videoId', 'textOriginal', 'authorDisplayName', 'publishedAt', 'likeCount']
            for k in stats_to_keep:
                try:
                    cmt_info[k] = item['snippet']['topLevelComment']['snippet'][k]
                except:
                    cmt_info[k] = None
            all_cmt_info.append(cmt_info)
            
        else:
            continue

    return all_cmt_info

In [5]:
def get_video_names(videoId):
    video_request = youtube.videos().list(
        part = 'id, snippet', 
        id = videoId, 
    )
    video_response = video_request.execute()
    video_name = video_response['items'][0]['snippet']['title']
    return video_name

In [1]:
def filtered_positive_comments(cmt):
    keyword_lst = tuple(['great', 'excellent', 'outstanding', 'brilliant', 'knowledge', 'respect', 'subscribed', 'masterpiece',\
               'impressed', 'story', 'storytelling', 'informational', 'informative', 'entertaining', 'appreciated',\
               'amazing', 'awesome', 'gifted', 'storyteller', 'beautiful', 'wonderful', 'perfect', 'insight', 'best',\
               'fan', 'epic', 'love', 'genius', 'wow', 'thank', 'treasure', 'passion', 'pleasure', 'inform', 'educat',\
               'unreal', 'incredible', 'quality', 'well done', 'magnificent', 'master', 'appreciate', 'breakdown', 'kenshin',\
                'lawrence', 'expert', 'matrix']) 
    for w in keyword_lst:
        if w in cmt:
            return True
        else:
            if w != keyword_lst[-1]:
                continue
            else:
                return False

In [1]:
def get_author_comments(channelId, authorId):
    df_auth_cmt = pd.DataFrame(columns = ['comment', 'likes', 'date', 'replies', 'comment_id', 'video_id', 'video_name']) 
    chnl_comment_request = youtube.commentThreads().list(part='id, snippet', allThreadsRelatedToChannelId=channelId, order='relevance',maxResults=100)
    chnl_comment_response = chnl_comment_request.execute()
    
    get_clean_comments(df_chnl, chnl_comment_response)
    
    while chnl_comment_response.get('nextPageToken', None):
        chnl_comment_request = youtube.commentThreads().list(
            part='id, snippet',
            allThreadsRelatedToChannelId=channelId, 
            order='relevance',
            maxResults=100,
            pageToken=chnl_comment_response['nextPageToken']
        )
        chnl_comment_response = chnl_comment_request.execute()
        
        get_clean_comments(df_chnl, chnl_comment_response)
        
        if 'nextPageToken' in chnl_comment_response.keys():
            continue
        else:
            break
        
    return df_chnl

In [5]:
def get_comment_replies(comment, reply_response):
    for key, value in comment.items():
        if comment['replies'] > 0:
            for r in range(comment['replies']):
                reply_txt = reply_response['items'][r]['snippet']['textOriginal']
                comment[key].append(reply_txt)
        break
    return value

In [1]:
#retrieve replies for each comments if any (for a single video)
def reply_response(parentId):
    reply_request = youtube.comments().list(
        part='id, snippet',parentId=parentId, maxResults=100)
    reply = reply_request.execute()
    return reply

In [3]:
def filtered_cmt_label(cmt, keyword_lst, index):
    for w in keyword_lst:
        if w in cmt:
            return True
        elif w != keyword_lst[-1]:
            continue
        else:
            return False