In [25]:
import pandas as pd
import json
import numpy as np
import nltk
from nltk.corpus import stopwords
import re
import gensim
import math

In [26]:
global AVG_TAGS_PER_VIDEO, US_CA_GB_TOKEN_CORPUS, US_VIDEOS_DF, US_FINAL_DF
global CA_VIDEOS_DF, CA_FINAL_DF, GB_VIDEOS_DF, GB_FINAL_DF, US_CA_GB_FINAL_DF

In [27]:
#get rid of the punctuations and set all characters to lowercase
RE_PREPROCESS = r'\W+|\d+' #the regular expressions that matches all non-characters

#get rid of punctuation and make everything lowercase
#the code belows works by looping through the array of text
#for a given piece of text we invoke the `re.sub` command where we pass in the regular expression, a space ' ' to
#subsitute all the matching characters with
#we then invoke the `lower()` method on the output of the re.sub command
#to make all the remaining characters
#the cleaned document is then stored in a list
#once this list has been filed it is then stored in a numpy array

In [28]:
RE_REMOVE_URLS = r'http\S+'

In [29]:
def processFeatures(desc):
    try:
        desc = re.sub(RE_REMOVE_URLS, ' ', desc)
        return re.sub(RE_PREPROCESS, ' ', desc)
    except:
        return " "

In [30]:
def processDataFrame(data_frame, country_code='US'):
    data_frame.sort_values(by=['video_id', 'trending_date'], ascending=True, inplace=True)
    grouped_videos = data_frame.groupby(['video_id']).last().reset_index()
    
    #Reading categories from the json file depending on country_code
    json_location = './data/' + country_code +'_category_id.json'
    with open(json_location) as data_file:
        data = json.load(data_file)    
    categories = []
    for item in data['items']:
        category = {}
        category['category_id'] = int(item['id'])
        category['title'] = item['snippet']['title']
        categories.append(category)

    categories_df = pd.DataFrame(categories)
    # Merging videos data with category data
    final_df = grouped_videos.merge(categories_df, on = ['category_id'])
    final_df.rename(columns={'title_y': 'category', 'title_x': 'video_name'}, inplace=True)
    
    # Creating a features column that consists all features used for prediction.
    # Also creating a corpus column that consists of all data required to train the model.
    final_df['video_features'] = ''
    final_df['video_corpus'] = ''
    
    if final_df['video_name'].astype(str) is not None:
        final_df['video_features'] += final_df['video_name'].astype(str)

    if final_df['channel_title'].astype(str) is not None:
        final_df['video_features'] += final_df['channel_title'].astype(str)
        
    if final_df['description'].astype(str) is not None:
        final_df['video_features'] += final_df['description'].astype(str)
    
    final_df['video_corpus'] += final_df['video_features']
    if final_df['tags'].astype(str) is not None:
        final_df['video_corpus'] += final_df['tags'].astype(str)
    
        
    final_df['video_features'] = final_df['video_features'].apply(processFeatures)
    final_df['video_corpus'] = final_df['video_corpus'].apply(processFeatures)
    return final_df

In [31]:
def removeStopwords(documents):
    stopwords_list = stopwords.words('english')
    processed_corpus = []
    for document in documents:
        processed_document = []
        for word in document.split():
            if word not in stopwords_list:
                processed_document.append(word)
        processed_corpus.append(processed_document)
    return processed_corpus

In [32]:
def processCorpus(feature_corpus):
    feature_corpus = [comment.lower() for comment in feature_corpus]
    processed_feature_corpus = removeStopwords(feature_corpus)
    return processed_feature_corpus

In [33]:
def trainModel(token_corpus):
    model = gensim.models.Word2Vec(sentences=token_corpus, min_count=1, size = 32)
    model.train(token_corpus, total_examples=model.corpus_count, epochs=model.iter)
    model.save('word2vec_model.w2v')
    return model

In [34]:
def recommendTags(word2vec_model, input_words = ['trump', 'president'], number_of_tags = 10, model_name = 'word2vec_model.w2v'):
    global US_CA_GB_TOKEN_CORPUS
    tags = []
    if word2vec_model is None:
        gensim.models.Word2Vec.load(model_name)
    try:
        tags = word2vec_model.most_similar(positive=input_words, topn=number_of_tags)
    except:
        US_CA_GB_TOKEN_CORPUS.append(input_words)
        word2vec_model.build_vocab(US_CA_GB_TOKEN_CORPUS, update=True)
        word2vec_model.train(US_CA_GB_TOKEN_CORPUS, total_examples=word2vec_model.corpus_count, epochs=word2vec_model.iter)
        word2vec_model.save(model_name)
        tags = word2vec_model.most_similar(positive=input_words, topn=number_of_tags)
    
    return tags

In [35]:
def calculateAvgTagsPerVideo():
    total_tags = 0
    for tag_list in US_CA_GB_FINAL_DF['tags'].values:
        total_tags += len(tag_list.split('|'))
    return math.ceil(total_tags/len(US_CA_GB_FINAL_DF))

Running the algorithm for US, CA, and GB videos

In [36]:
def initializeAndFetchRecommendations():
    global US_VIDEOS_DF, US_FINAL_DF, CA_VIDEOS_DF, CA_FINAL_DF, GB_VIDEOS_DF, GB_FINAL_DF
    global US_CA_GB_FINAL_DF, US_CA_GB_FINAL_DF, AVG_TAGS_PER_VIDEO, US_CA_GB_TOKEN_CORPUS
    US_VIDEOS_DF = pd.read_csv('./data/USvideos.csv')
    US_FINAL_DF = processDataFrame(US_VIDEOS_DF, country_code='US')
    
    CA_VIDEOS_DF = pd.read_csv('./data/CAvideos.csv')
    CA_FINAL_DF = processDataFrame(CA_VIDEOS_DF, country_code='CA')
    
    GB_VIDEOS_DF = pd.read_csv('./data/GBvideos.csv')
    GB_FINAL_DF = processDataFrame(GB_VIDEOS_DF, country_code='GB')
        
    US_CA_GB_FINAL_DF = pd.concat([US_FINAL_DF, CA_FINAL_DF, GB_FINAL_DF])
    US_CA_GB_FINAL_DF.reset_index(inplace=True)
    
    US_CA_GB_TOKEN_CORPUS = processCorpus(US_CA_GB_FINAL_DF['video_corpus'].values)
    US_CA_GB_FINAL_DF['video_features'] = processCorpus(US_CA_GB_FINAL_DF['video_features'].values)
    US_CA_GB_FINAL_DF['video_corpus'] = US_CA_GB_TOKEN_CORPUS
        
    AVG_TAGS_PER_VIDEO = calculateAvgTagsPerVideo()
    word2vec_model = trainModel(US_CA_GB_TOKEN_CORPUS)
    tags = recommendTags(word2vec_model, input_words=['trump', 'president'], 
                     number_of_tags=AVG_TAGS_PER_VIDEO, 
                     model_name = 'word2vec_model.w2v')
    print(tags)
    

In [37]:
initializeAndFetchRecommendations()

[('donald', 0.9887422919273376), ('pocahontas', 0.9632692337036133), ('timespresident', 0.9601442813873291), ('trumpgolden', 0.9559758901596069), ('stupidity', 0.9556787014007568), ('clinton', 0.9549026489257812), ('rodrigo', 0.9482265710830688), ('presidency', 0.9479796886444092), ('napakahusay', 0.9451906681060791), ('pinoy', 0.9351366758346558), ('politcs', 0.9347646236419678), ('wikileaks', 0.9342126846313477), ('hillary', 0.9337647557258606), ('duterte', 0.9330844283103943), ('summit', 0.9324768781661987), ('newshourpresident', 0.9313408136367798), ('ukdonald', 0.9309765100479126), ('colluding', 0.9304609298706055), ('ndonal', 0.9290434122085571)]


## Dividing the dataset into training (80%) and testing sets (20%).

In [38]:
len(US_CA_GB_FINAL_DF)

3029

In [39]:
np.random.seed(seed=13579)
us_ca_gb_final_df_shuffled = US_CA_GB_FINAL_DF.iloc[np.random.permutation(len(US_CA_GB_FINAL_DF))]

In [40]:
train_size = 0.80
us_ca_gb_df_train = us_ca_gb_final_df_shuffled[:int((train_size)*len(us_ca_gb_final_df_shuffled))]
us_ca_gb_df_test = us_ca_gb_final_df_shuffled[int((train_size)*len(us_ca_gb_final_df_shuffled)):]

In [41]:
w2v_train_model = gensim.models.Word2Vec(sentences=us_ca_gb_df_train['video_corpus'], min_count=1, size = 32)
w2v_train_model.train(us_ca_gb_df_train['video_corpus'].values, total_examples=w2v_train_model.corpus_count, epochs=w2v_train_model.iter)
w2v_train_model.save('w2v_train_model.w2v')

In [42]:
w2v_train_model = gensim.models.Word2Vec.load('w2v_train_model.w2v')

In [43]:
predicted_tags = []
for idx in us_ca_gb_df_test.index:
    video_features = us_ca_gb_df_test.loc[idx, 'video_features']
    if len(video_features) > 0:
        tag_probability_list = recommendTags(w2v_train_model, input_words=video_features,
                                             number_of_tags=AVG_TAGS_PER_VIDEO, 
                                             model_name = 'w2v_train_model.w2v')
        predicted_tags.append([tag[0] for tag in tag_probability_list])

    

NameError: name 'US_CA_GB_TOKEN_CORPUS' is not defined

In [None]:
us_ca_gb_df_test['predicted_tags'] = predicted_tags

In [43]:
us_ca_gb_df_test['tags'] = us_ca_gb_df_test['tags'].apply(processFeatures)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [44]:
match_found = 0
count = 0
for idx in us_ca_gb_df_test.index:
    tag_list = us_ca_gb_df_test.loc[idx,'tags'].lower()
    tag_list = tag_list.split(' ')
    predicted_tag_list = us_ca_gb_df_test.loc[idx, 'predicted_tags']
    
    for i in range(len(tag_list)):
        if tag_list[i] in predicted_tag_list:
            match_found += 1
            break
    count += 1
print('Match found: ', match_found )
print('Accuracy: ', match_found/len(us_ca_gb_df_test))
    
    


['battle', 'universe', 'nerf', 'battle', 'universe', 'nerf', 'challenge', 'epic', 'nerf', 'battle', 'nerf', 'gun', 'fight', 'nerf', 'fight', 'team', 'edge', 'hi', '', 'hi', '', 'nerf', 'hi', '', 'studios', 'matthias', 'vlogs', 'nerf', 'nurf', 'nerf', 'war', 'nerf', 'battle', 'nerf', 'dungeons', 'dragons', 'team', 'edge', 'nerf', 'nerf', 'war', 'nerf', 'attack', '']
['involved', 'key', 'bialik', 'deserves', 'lives', 'working', 'rating', 'nostalgia', 'language', 'feels', '親愛的', 'begin', 'global', 'helping', 'wildlife', 'interesting', 'njim', 'disability', 'warming']
['graham', 'norton', 'graham', 'norton', 'show', 'official', 'entertainment', 'chat', 'show', 'graham', 'norton', 'full', 'episodes', 'the', 'graham', 'norton', 'show', 'full', 'eps', 'graham', 'norton', 'full', 'eps', 'watch', 'graham', 'norton', 'online', 'jason', 'momoa', 'hugh', 'grant', 'sarah', 'millican', 'kelly', 'clarkson', 'graham', 'norton', 'red', 'chair', 'game', 'of', 'thrones', '']
['trident', 'rick', 'katharin

['sri', 'lanka', 'sinhala', 'tv', 'derana', 'derana', 'dtv', 'lanka', 'premium', 'entertainment', 'deweni', 'inima', 'dewana', 'inima', 'deveni', 'inima', 'devana', 'inima', 'cricket', 'sri', 'lanka', 'cricket', 'cricket', 'match', 'dawana', 'inima', 'deweni', 'enima', 'deveni', 'enima', 'deweni', '', 'nd', 'innings', 'innings', 'second', 'inning', 'cricket', 'team', 'saranga', 'mendis', 'janaka', 'siriwardana', 'sidu', 'saman', 'edirimuni', 'best', 'teledrama', 'sri', 'lankan', 'teledrama', 'top', 'teledrama', 'sri', 'lanka', 's', 'best', 'play', 'cricket', 'love', 'romantic', 'school', 'school', 'love', 'adventure', 'fight', '']
['shayad', 'shehroz', 'ary', 'tanhai', 'séquence', 'dolby', 'allah', 'alif', 'philippine', 'naired', 'thediplomats', 'nbuffon', 'aisi', 'nandriod', 'schedule', 'directv', 'paramount', 'harpalgeo', 'relative']
['late', 'night', 'seth', 'meyers', 'closer', 'look', 'roy', 'moore', 'trump', 'putin', 'nbc', 'nbc', 'tv', 'television', 'funny', 'talk', 'show', 'come

['gourmet', 'corners', 'inventive', 'appetite', 'wellness', 'meal', 'shina', 'ribs', 'components', 'nestle', 'bledor', 'cravings', 'cornbread', 'nutracelle', 'pork', 'steaks', 'prioritization', 'potato', 'lanes']
['essiebutton', 'estée', 'lalonde', 'estee', 'lalonde', 'essie', 'button', 'essie', 'no', 'makeup', 'makeup', 'drugstore', 'makeup', 'tutorial', 'fashion', 'cute', 'outfits', 'makeup', 'tutorials', 'healthy', 'foods', 'eye', 'makeup', 'outfit', 'ideas', 'natural', 'makeup', 'easy', 'healthy', 'recipes', 'skin', 'care', 'products', 'best', 'makeup', 'beauty', 'favourites', 'favorites', 'november', '', '', 'haul', 'new', 'in', 'glossier', 'you', 'perfume', 'review', 'ouai', 'messy', 'hair', 'charlotte', 'tilbury', 'skincare', 'lancome', 'highlighter', 'glow', 'scandinavian', 'rupi', 'kaur', 'fashion', '']
['becm', 'nhome', 'nglass', 'naddiction', 'n___', 'n____________', 'nsloth', 'paperweight', '젤브라운', 'ntravel', 'pillow', 'n미디엄', 'nyellow', 'laundry', 'nstationery', '만다린', 'pe

['todrick', 'hall', 'pentatonix', 'gay', 'lgbt', 'justice', 'league', 'justice', 'league', 'movie', 'batman', 'wonder', 'woman', 'cyborg', 'the', 'flash', 'aquaman', 'j', 'fla', 'j', 'fla', 'samantha', 'harvey', 'clara', 'marz', 'mario', 'bautista', 'gary', 'clark', 'jr', 'come', 'together', 'come', 'together', 'song', 'cover', 'come', 'together', 'cover', 'dc', 'comics', 'super', 'heros', 'dc', 'entertainment', 'warner', 'brothers', 'wb', 'pictures', 'warner', 'bros', 'musical', 'beatles', 'beatle', 'cover', 'music', 'video', 'sam', 'harvey', 'alter', 'ego', 'superhero', 'beyonce', 'taylor', 'swift', '']
['blues', 'trailerblockbusterbasmati', 'avengers', 'prolific', 'ziegler', 'confident', 'culturally', 'isaacs', 'usfind', 'emoji', 'reverend', 'broadwaytodrickhallclick', 'recreating', 'raja', 'nace', 'portugal', 'roundtables', 'nworld', 'insensitive']
['sia', 'santa', 's', 'coming', 'for', 'us', 'holiday']
['snowmansiavevomove', 'ziegler', 'frosty', 'chemins', 'snowman', 'zhu', 'rayve

['perception', 'photographs', 'nyellow', 'yu', 'nsweat', 'ddr', 'chromeo', 'nstream', 'cassette', 'migos', 'geazymusicvevonew', 'namazon', 'liquidwe', 'damned', 'rosterteam', 'everywhere', 'ngoogle', 'pinkvevop', 'bendy']
['green', 'day', 'back', 'in', 'the', 'usa', 'god', 's', 'favorite', 'band', 'billie', 'joe', 'armstrong', 'tre', 'cool', 'mike', 'dirnt', '']
['beautiful', 'frosty', 'snowmansiavevomove', 'albums', 'single', 'omid', 'singles', 'watt', 'olaf', 'alternative', 'featuring', 'nstream', 'soulful', 'arrière', 'profiter', 'nlisten', 'worldstarhiphop', 'hybridisation', 'touring']
['ricky', 'rebel', 'glambert', 'pop', 'star', 'idol', 'boy', 'solo', 'artist', 'glam', 'lgbt', 'gay', 'queer', 'time', 'video', 'music', 'adam', 'lambert', 'lady', 'gaga', 'britney', 'spears', 'madonna', '']
['iiii', 'letters', 'tar', 'referred', 'djawadi', 'nuvola', 'trippy', 'tiges', 'nmailing', 'bleeding', 'datant', 'ramin', 'nivea', 'lakey', 'bianca', 'sight', 'freaky', 'audionautix', 'nbetter']


['africananoook', 'senegaltv', 'maroditv', 'omour', 'deviamagal', 'divamagal', 'devamagal', 'வமகள', 'geokhaani', 'deivamahal', 'geohar', 'ய', 'amasclevver', 'devaimagal', 'த', 'درامامسلسل', 'sn', 'bandfreaks', 'الاله']
['carmax', 'lamborghini', 'miura', 'miura', 'carmax', 'lamborghini', 'carmax', 'took', 'lamborghini', 'to', 'carmax', 'supercar', 'carmax', 'selling', 'my', 'car', 'at', 'acrmax', 'lambo', 'carmax', 'stradman', 'vehicle', 'virgins', 'doug', 'demuro', '']
['lamborghini', 'carest', 'pocketing', 'briefed', 'livechance', 'behave', 'nbetter', 'confused', 'alright', 'beetle', 'zhen', 'selfish', 'prefer', 'indulgents', 'scarab', 'vaughn', 'mobilité', 'titter', 'wishbone']
['telefoon', 'babbu', 'maan', 'mehfil', 'mitran', 'di', 'new', 'punjabi', 'songs', 'the', 'babbu', 'maan', 'store', 'hey', 'yolo', 'swag', 'music', 'latest', 'punjabi', 'songs', '', '', 'latest', 'punjabi', 'songs', 'new', 'punjabi', 'songs', '', '', 'punjabi', 'music', 'punjabi', 'music', '', '', 'punjabi', '

['emotional', 'meghan', 'markle', 'parents', 'thomas', 'doria', 'ragland', 'congratulations', 'engaged', 'harry', 'prince', 'harry', 'meghan', 'markle', '']
['nms', 'projected', 'hospital', 'dunne', 'whirlwind', 'proposal', 'cancel', 'humanitarian', 'aged', 'announcers', 'detained', 'netflixnetflixroyalty', 'marry', 'royal', 'commit', 'maguire', 'campaigner', 'taxation', 'hospitals']
['電視劇', '大陸電視劇', '猎场', '职场', '商战', '爱情', '都市', '胡歌', '陈龙', '孙红雷', '张嘉译', '祖峰', '李强', '胡兵', '菅纫姿', '万茜', '章龄之', '徐阁', '柯蓝', '罗海琼', '王茜', '董勇', '赵立新', 'huge', 'chenlong', 'sunhonglei', 'zhangjiayi', 'zufeng', 'liqiang', 'hubing', 'wanqian', 'zhanglingzhi', 'zhaolixin', '']
['组不怕老婆联盟', '造反', '三位老公', 'nstockings', '全能好丈夫', 'stockings', '赛制升级听歌猜人更难', 'n미디엄', 'n北京時間', '鄧紫棋強勢加盟', '만다린', '北京時間', 'matalan', 'marker', '吴京户外生存技能获赞', '剧情介绍', '라이너', '블랙', 'n렌즈']
['the', 'x', 'factor', 'x', 'factor', 'x', 'factor', 'uk', 'x', 'factor', '', '', 'simon', 'cowell', 'nicole', 'sharon', 'louis', 'talent', 'auditions', 'judge

['awards', 'music', '', '', 'american', 'music', 'awards', 'wochit', 'red', 'carpet', 'fashion', 'hailee', 'steinfeld', 'selena', 'gomez', 'e', 'news', 'e', 'entertainment', 'e', 'news', 'now', 'news', 'e', 'top', 'stories', 'pop', 'culture', 'breaking', 'news', 'breaking', 'live', 'interviews', 'e', 'style', 'collective', 'trending', 'jason', 'kennedy', 'catt', 'sadler', 'sibley', 'scoles', 'celeb', 'news', 'gossip', '']
['sza', 'sports', 'great', 'connect', 'mateo', 'interviews', 'power', 'primetime', 'coverage', 'etonline', 'local', 'vevo', 'fm', 'ema', 'worldstarhiphop', 'blackoutu', 'concerts', 'visit', 'bts']
['kandee', 'johnson', 'kandee', 'beauty', 'how', 'to', 'new', 'kylie', 'jenner', 'kylie', 'lip', 'kit', 'kylie', 'holiday', 'makeup', 'review', 'testing', 'makeup', 'kylie', 'cosmetics', 'kylie', 'cosmetics', 'holiday', '', '', 'holiday', 'makeup', 'holiday', 'wet', 'set', 'sugar', 'lip', 'spice', 'lip', 'kylie', 'cosmetics', 'review', 'testing', 'out', 'kandy', 'johnson', '

['baking', 'snacks', 'delicious', 'tasty', 'chocolate', 'ware', 'mathesonmunchiescook', 'snack', 'ncheesecloth', 'decorating', 'clusters', 'holidays', 'chai', 'tasted', 'saffron', 'adding', 'enlightening', 'dishes', 'taipeimikey']
['thomas', 'sanders', 'thomas', 'sanders', 'vine', 'thomas', 'sanders', 'vlog', 'thomas', 'sanders', 'channel', 'vine', 'comedy', 'thomas', 'sanders', 'ultimate', 'storytime', 'thomas', 'sanders', 'vines', 'storytime', 'guy', 'narrator', 'guy', 'vine', 'guy', 'musical', 'musicals', 'broadway', 'song', 'sing', 'singing', 'singers', 'songs', 'game', 'guitar', 'piano', 'theatre', 'theater', 'once', 'the', 'musical', 'once', 'the', 'producers', 'holy', 'musical', 'b', 'tman', 'holy', 'musical', 'batman', 'be', 'more', 'chill', 'urinetown', 'women', 'on', 'the', 'verge', 'of', 'a', 'nervous', 'breakdown', '', '', 'chump', 'street', 'hadestown', 'death', 'note', 'deathnote', '']
['available', 'nmy', 'follow', 'share', 'guys', 'blog', 'awesome', 'want', 'playlist', 

['⓸', 'matching', 'snl', 'monologues', 'bottoms', 'hairlate', 'birdlate', 'permanently', 'longer', 'njigar', 'nbus', 'refresh', 'meyerscarey', 'remake', 'show', 'nbastards', 'talk', 'balsamoon', 'kimmel']
['', 's', 'music', '', 's', 'songs', 'madonna', 'do', 'college', 'kids', 'know', '', 's', 'music', '', '', 'react', 'do', 'they', 'know', 'it', 'do', 'they', 'know', 'it', 'react', 'reaction', 'thefinebros', 'fine', 'brothers', 'fine', 'brothers', 'entertainment', 'finebros', 'fine', 'bros', 'fbe', 'laugh', 'challenge', 'try', 'not', 'to', 'laugh', 'try', 'to', 'watch', 'without', 'laughing', 'or', 'grinning', 'react', 'gaming', 'kids', 'versus', 'food', 'staff', 'reacts', 'lyric', 'breakdown', 'do', 'teens', 'know', 'do', 'parents', 'know', 'do', 'kids', 'know', 'do', 'elders', 'know', 'stevie', 'wonder', '', 's', 'music', 'hits', '', '', 's', '']
['christianmeme', 'zhu', 'cutter', 'galy', 'teters', 'iconjadensmithvevosyre', 'nerds', 'shrooty', 'slap', 'coordinators', 'mishra', 'nind

['wwe', 'world', 'wrestling', 'entertainment', 'wrestling', 'wrestler', 'wrestle', 'superstars', 'क', 'श', 'त', 'पहलव', 'न', 'डब', 'ल', 'डब', 'ल', 'ई', 'म', 'च', 'स', 'परस', 'ट', 'र', 'व', 'य', 'वस', 'य', 'क', 'क', 'श', 'त', 'مصارعه', 'raw', 'paul', 'heyman', 'brock', 'lesnar', 'raw', 'results', 'monday', 'night', 'raw', 'raw', 'winners', 'raw', 'videos', 'monday', 'night', 'raw', 'results', 'wwe', 'proposal', 'wwe', 'engagement', '']
['proposing', 'microtransaction', 'smack', 'timenext', 'strikeforce', 'win', 'superstar', 'keow', 'edubirdie', 'rematch', 'shaqtin', 'nba', 'rookie', 'capped', 'whistle', 'ufc', 'ea', 'championship', 'shield']
['valérie', 'plante', 'mairesse', 'montréal', 'tout', 'le', 'monde', 'en', 'parle', 'tlmep', 'maire', '']
['prise', 'cultivent', 'lui', 'quelques', 'aies', 'univers', 'ça', 'société', 'longtemps', 'expliquez', 'nbattletube', 'votre', 'vint', 'difficiles', 'controverse', 'personnes', 'rejoint', 'besoin', 'roquette']
['punjabi', 'songs', 'punjabi', 'b

['goodies', 'halmark', 'wanting', 'visiontime', 'philosophie', 'mullaigh', 'kuripu', 'undertake', 'nbilal', 'eleanor', 'tomscottgo', 'vikatantv', 'apne', 'disclosed', 'foraged', 'tget', 'vani', 'alot', 'nehraji']
['morning', 'joe', 'joe', 'scarborough', 'mika', 'brzezinski', 'willie', 'geist', 'msnbc', 'msnbc', 'news', 'msnbc', 'live', 'msnbc', 'tv', 'robert', 'mueller', 'investigation', 'mueller', 'investigating', 'jared', 'kushner', 'jared', 'kushner', 'contact', 'with', 'russia', 'jared', 'kushner', 'contact', 'with', 'israel', 'jared', 'kushner', 'russia', 'trump', 'white', 'house', 'fbi', 'donald', 'trump', 'presidential', 'elections', 'robert', 'mueller', 'russia', 'investigation', 'gop', '', '', 'election', 'trump', 'administration', 'vladimir', 'putin', 'mueller', 'investigation', 'russia', 'probe', 'russia', 'collusion', '']
['committee', 'elites', 'zeroes', 'cooperating', 'muller', 'issues', 'congress', 'jejomar', 'ottawa', 'corrupt', 'discredit', 'newsreproter', 'coordinate'

['christianmeme', 'npurchase', 'gdw', 'nerds', 'btw', 'peopleareawesome', 'nverse', 'iconjadensmithvevosyre', 'nbrooklin', 'nquality', 'know', 'ntea', 'nlydon', 'downnfvevoofficial', 'zhu', 'dubstep', 'molen', 'via', 'notifications']
['creaders', '万维tv', '万维读者网', '万维读报', '川普', '习近平', '双十一', '马云', '淘宝', '天猫', '沙特', '王子', '国王', '台湾', '解放军', '特朗普', '']
['creaders', '万维读者网', '万维tv', '淘宝', '완벽한', '双十一', '川普', '대조', '马云', '위', '中国', 'diştaş', 'break비트', '남성', '보컬의', '抓紧所剩不多的时间为素雅打算', '돋보이는', '国王', '王子']
['true', 'events', 'sony', 'entertainment', 'channel', 'conspiracy', 'set', 'tv', 'serial', 'crime', 'patrol', 'crime', 'patrol', 'diall', '', '', 'drama', 'anup', 'soni', 'affair', 'police', 'crimes', 'against', 'women', 'sony', 'tv', 'investigation', 'india', 'murder', 'case', 'indian', 'television', 'anoop', 'soni', 'murder', 'crimes', 'in', 'india', 'crime', 'patrol', 'latest', 'episode', 'hindi', 'crime', 'serials', 'sony', 'liv', 'crime', 'dial', '', '', 'setindia', 'india', 'crime', 'c

['morning', 'week', 'better', 'sunday', 'tonight', 'collider', 'days', 'snl', 'daily', 'biggest', '⓸', 'host', 'macysparade', 'obamacare', 'last', 'exclusive', 'corden', 'nmestalla', 'topic']
['dark', 'web', 'deep', 'web', 'darknet', 'dark', 'web', 'net', 'deep', 'dark', 'net', 'ross', 'william', 'ulbricht', 'ross', 'william', 'ulbricht', 'ross', 'ulbricht', 'silk', 'road', 'silk', 'road', 'fbi', 'tor', 'onion', 'i', 'p', 'freenet', 'hacker', 'anonymous', 'bitcoin', 'internet', '']
['pleurer', 'explication', 'loup', 'avis', 'expliquez', 'vienne', 'filme', 'cultivent', 'bossé', 'véritables', 'audiovisuel', 'téléphone', 'nmais', 'monde', 'droit', 'nmot', 'société', 'viennent', 'agréables']
['makeup', 'makeup', 'tutorial', 'surgery', 'transformation', 'surgery', 'makeup', 'tutorial', 'before', 'and', 'after', 'christen', 'dominique', 'christen', 'dominique', 'make', 'up', 'plastic', 'surgery', 'makeup', 'beauty', 'power', 'of', 'makeup', 'plastic', 'surgery', 'glam', 'full', 'coverage', '

['sistersinsweatgatorade', 'finalejames', 'boyfriend', 'movlogsbusiness', 'charleshi', 'penny', 'partrdge', 'singhiisuperwomanii', 'discussed', 'vire', 'tutorialjames', 'shamea', 'kendall', 'parks', 'subzero', 'wolf', 'galway', 'pebbleswe', 'lauhter']
['miranda', 'sings', 'tinder', 'tinder', 'takeover', 'miranda', 'sings', 'tinder', 'hijacks', 'dating', 'account', 'hijacks', 'tinder', 'account', 'dating', 'profile', 'hijacks', 'tinder', 'miranda', 'sings', 'funny', 'miranda', 'sings', '', '', 'colleen', 'miranda', 'sings', 'miranda', 'sings', 'challenge', 'miranda', 'sings', 'songs', 'mirandasings', '', 'miranda', 'sings', 'tinder', 'account', 'tinder', 'takeover', 'miranda', 'sings', 'miranda', 'sings', 'vanity', 'fair', 'vanity', 'fair', 'vanity', 'fair', 'magazine', 'vf', '']
['nerdwriter', 'brings', 'things', 'strangest', 'tips', 'elsagate', 'nparking', 'ways', 'fearless', 'dating', 'essays', 'calbelhere', 'fairvictoria', 'wiredwiredkeanu', 'ballinger', 'seriesfabio', 'weird', 'dis

['team', 'liquid', 'doublelift', 'piglet', 'liquid', 'na', 'lcs', 'lcs', '', '', 'pobelter', 'reignover', 'dardoch', 'tsm', 'tsm', 'doublelift', 'xmithie', 'clg', 'exodia', 'rebirth', 'squad', 'tl', 'tlwin', 'lets', 'go', 'liquid', 'liquid', '', 'franchise', 'feature', 'announcment', 'official', 'liquid', 'league', 'of', 'legends', 'lol', 'esports', 'dota', '', 'na', 'lcs', 'esports', 'north', 'america', 'league', 'championship', 'series', 'lol', 'roster', 'starting', 'na', 'lolesports', 'riot', 'games', 'nalcs', '']
['us', 'nwant', 'nget', 'follow', 'nfacebook', 'connected', 'nblog', 'blog', 'nfollow', 'tickets', 'gotta', 'sure', 'available', 'stay', 'soon', 'nsubscribe', 'nmerch', 'nwebsite', 'visit']
['jordan', 'sather', 'destroying', 'the', 'illusion', 'pedogate', 'pizzagate', 'israel', 'saudi', 'arabia', 'storm', '', 'chan', 'qanon', 'follow', 'the', 'white', 'rabbit', 'fiji', 'conspiracy', 'deep', 'state', 'uranium', 'one', 'trafficking', 'new', 'world', 'order', 'hillary', 'clin

['extreme', 'tan', 'tanning', 'tanning', 'bed', 'how', 'to', 'get', 'best', 'tan', 'black', 'to', 'white', 'white', 'to', 'tan', 'how', 'to', 'apply', 'fake', 'tan', 'tanning', 'routine', 'best', 'fake', 'tan', 'how', 'to', 'tan', 'faster', 'in', 'the', 'sun', 'how', 'to', 'make', 'tan', 'last', 'tanned', 'skin', 'tan', 'makeup', '']
['captioning', 'npatreon', 'suggest', 'nfollow', 'nproleter', 'rekt', 'ndude', 'ukbikeskills', 'provided', 'nbarnes', 'nmerch', 'nfacebook', 'tanning', 'royy_ledger', 'cc', 'nposters', 'spreadshirt', 'phosphate', 'audioblocks']
['reaction', 'time', 'reaction', 'teens', 'react', 'elders', 'react', 'kids', 'react', 'free', 'time', 'tal', 'fishman', 'family', 'friendly', 'facts', 'top', 'children', 'family', 'friendly', 'learn', 'game', 'time', '']
['follow', 'likes', 'working', 'nfind', 'livingbiginatinyhouse', 'connect', 'nrecent', 'polls', 'social', 'safe', 'ntwitter', 'share', 'sponsorship', 'say', 'enjoy', 'enge', 'videos', 'ntumblr', 'like']
['tanner', 

['captioning', 'npatreon', 'suggest', 'nfollow', 'nproleter', 'rekt', 'ndude', 'ukbikeskills', 'provided', 'nbarnes', 'nmerch', 'nfacebook', 'tanning', 'royy_ledger', 'cc', 'nposters', 'spreadshirt', 'phosphate', 'audioblocks']
['nails', 'nail', 'art', 'nail', 'tutorial', 'beauty', 'tutorial', 'nail', 'art', 'tutorial', 'diy', 'nails', 'easy', 'nail', 'art', 'diy', 'nail', 'art', 'cute', 'nail', 'art', 'simply', 'nailogical', 'holosexual', 'holo', 'holographic', 'diamond', 'glitter', 'holo', 'glitter', 'coffee', 'latte', 'cappuccino', 'diy', 'latte', 'holo', 'cappuccino', 'diamond', 'cappuccino', 'glitter', 'coffee', 'gold', 'coffee', 'edible', 'glitter', 'food', 'glitter', 'drink', 'glitter', 'disco', 'dust', 'cake', 'glitter', 'fancy', 'cappucino', 'diy', 'cappuccino', 'simplybakelogical', 'edible', 'holo', '']
['nailogicalwhen', 'ツ', 'thumbnail', 'notspons', '_famous', 'clickbait', '_believe', '_calling', '_awakened', 'nbag', 'nsally', 'coilgun', 'checked', 'n_______________________

['recent', 'date', 'go', 'bbc', 'female', 'marketplace', 'happening', 'new', 'alleged', 'order', 'events', 'latest', 'find', 'performanceclevver', 'taken', 'atlantis', 'live', 'biggest', 'rokshok']
['first', 'date', 'animation', 'animated', 'short', 'shorts', 'animation', 'shorts', 'cartoon', 'ihascupquake', 'redb', '', 'cupquake', 'red', 'cupquake', 'and', 'red', 'first', 'date', 'ihascupquake', 'first', 'date', 'funny', 'animation', 'funny', 'cartoon', 'funny', 'first', 'date', 'movies', 'jlo', 'anaconda', 'married', 'couple', 'couple', 'gamer', 'couple', 'our', 'first', 'date', 'ihascupquake', 'animated', 'cupquake', 'animation', 'ihascupquake', 'animation', '']
['check', 'awesome', 'read', 'much', 'amazing', 'available', 'share', 'watch', 'topic', 'get', 'nsubscribe', 'want', 'liked', 'playlist', 'follow', 'time', 'better', 'hit', 'nwebsite']
['shawn', 'johnson', 'andrew', 'east', 'shawn', 'east', 'shawn', 'and', 'andrew', 'olympian', 'nfl', 'player', 'athletes', 'vlog', 'couples',

['deivamagal', 'deiva', 'magal', 'deiva', 'magal', 'serial', 'vikatantv', 'deivamagal', 'episode', 'today', 'deivamagal', 'today', 'deivamagal', 'today', 'episode', 'deivamagal', 'episode', '', '', 'deivamagal', '', '', 'november', '', '', 'deiva', 'magal', 'serial', '', '', '', '', '', '', 'deiva', 'magal', 'episode', '', '', 'த', 'ய', 'வமகள', 'devamagal', 'deivamagal', 'episode', 'deviamagal', 'deivamahal', 'devaimagal', 'divamagal', '']
['thendral', 'வமகள', 'kanagu', 'ய', 'nithish', 'த', 'devamagal', 'favorite', 'devaimagal', 'deivamahal', 'ygs', 'clevverstyle', 'deviamagal', 'talkcollidervideoson', 'piriyamanaval', 'nthink', 'crocodiles', 'nwatch', 'fukreyreturnsfukrey']
['吳宗憲', '吳姍儒', '小明星大跟班', 'sandy', 'jacky', 'wu', '憲哥', '中天綜合台', 'ch', '', '中天電視台', '我愛小明星大跟班', '舒子晨', '康茵茵', '夏語心', '詹子晴', '成語蕎', '熊熊', '嘻小瓜', '斯亞', '小賴', '友情', '友誼', '舞蹈表演', '']
['中天新聞台', '每週一至週五晚間', '新聞深喉嚨', '賴岳謙', 'n國家與國際事務專家', '張斯綱', '巴紐案搬', '心中最軟的那塊', '能撇清', '億說', '中天電視', '張志豪', '又是黃志芳', '府', '否認錄音檔喬', '不受犯罪嫌疑

['favorite', 'site', 'durantthe', 'scene', 'factor', 'talkcollidervideoson', 'spoilers', 'hit', 'pledge', 'wonderful', 'stuttgart', 'compare', 'goodfella', 'welcome', 'deleted', 'greysloangrey', 'movies', 'upcoming', 'mistakes']
['kpop', '', 'thek', '원더케이', 'loen', '로엔', '뮤비', '티져', 'mv', 'teaser', '신곡', 'new', 'song', '한류', 'hallyu', 'ロエン', 'ミュージック', 'ミュージックビデオ', 'ケーポップ', '韓国の歌', 'アイドル', '韓流', '韓国', '아이돌', 'idol', 'kard', 'you', 'in', 'me', '비엠', '제이셉', '전소민', '전지우', 'bm', 'jseph', 'somin', 'jiwoo', '']
['용감한형제', '제이셉', '프로듀스', '비엠', '전소민', '느낌으로', '돌아온', '호흡이', '번의', '하이디', '에는', '무엘이', '앨범에서', '설레임을', 'twinkle', '풍미한', '사무엘', '아티스트가', '년대를']
['', 'none', '']
['counterattack', 'start', 'running', 'sublime', 'honor', 'maryse', 'wwethe', 'survivoar', 'superstar', 'stunning', 'tournamentcow', 'toward', 'chaotic', 'chopthe', 'miz', 'kosportsnetkevin', 'stage', 'mbappe', 'volley']
['marvel', 'comics', 'hip', 'hop', 'black', 'eyed', 'peas', 'masters', 'of', 'the', 'sun', 'the', 'zombie', '

['album', 'ft', 'naija', 'audio', 'everywhere', 'nollywood', 'storieshybrid', 'cellosget', 'managementbuy', 'nspotify', 'beautiful', 'mh', 'nnoel', 'swinsky', 'check', 'labrinth', 'together', 'nsubscribe', 'noelgallaghervevonoel']
['the', 'viral', 'fever', 'tvf', 'humour', 'qtiyapa', 'permanent', 'roommates', 'barely', 'speaking', 'drama', 'originals', 'comedy', 'sketch', '']
['ngladvin', 'mrighdeep', 'upaskar', 'picardo', 'umesh', 'nmanish', 'karmawala', 'badri', 'bhullar', 'nsejal', 'badala', 'dimple', 'nfilm', 'nbadri', 'khushpal', 'nashish', 'chavan', 'neeshita', 'bhatia']
['madison', 'beer', 'say', 'it', 'to', 'my', 'face', 'official', 'audio', 'ditto', 'music', 'pop', '']
['want', 'new', 'bad', 'version', 'online', 'nsubscribe', 'us', 'follow', 'playlist', 'awesome', 'nclick', 'music', 'perfect', 'ninstagram', 'good', 'great', 'nfacebook', 'go', 'amazing']
['noel', 'gallagher', 'noel', 'gallagher', 'high', 'flying', 'birds', 'oasis', 'chasing', 'yesterday', 'who', 'built', 'the',