<img src="resources/tag_metters_Logo.png" width="299" height="45">

## #Tag is invisible but it does actually track the trends
To understand how tags play the role by using it on NYTimes daily and how people really react about the topic by looking at other media platforms through NYTimes tags.

In [1]:
import sys
sys.executable

'/usr/local/opt/python/bin/python3.7'

In [2]:
sys.path

['/Users/hh/Documents/Pratt/Adv.ProjectsinVis/Monthly-Frequency-of-NYTimes-Tag',
 '/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python37.zip',
 '/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7',
 '/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/lib-dynload',
 '',
 '/Users/hh/Library/Python/3.7/lib/python/site-packages',
 '/usr/local/lib/python3.7/site-packages',
 '/usr/local/lib/python3.7/site-packages/IPython/extensions',
 '/Users/hh/.ipython']

In [4]:
import json
import requests
import pandas as pd
import numpy as np
import operator
import time
import praw
import nltk
import configparser

from apiclient import discovery
from datetime import datetime
from dateutil.relativedelta import relativedelta

from pytrends.request import TrendReq
from praw.models import MoreComments
from googleapiclient import discovery
from textblob import TextBlob
from pandas.io.json import json_normalize

import pprint
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey

from sqlalchemy.orm import sessionmaker

In [6]:
# Use CofigParser to safely store the password or key
config = configparser.ConfigParser()
config.read('key_pair.ini')

Times_key = config['Times']['key']

Reddit_client_id = config['Reddit']['client_id']
Reddit_client_secret = config['Reddit']['client_secret']
Reddit_username = config['Reddit']['username']
Reddit_password = config['Reddit']['password']
Reddit_user_agent = config['Reddit']['user_agent']

YOUTUBE_API_SERVICE_NAME = config['Youtube']['YOUTUBE_API_SERVICE_NAME']
YOUTUBE_API_VERSION = config['Youtube']['YOUTUBE_API_VERSION']
DEVELOPER_KEY = config['Youtube']['DEVELOPER_KEY']

### Storing Data using SQLalchemy

In [6]:
Base = declarative_base()

class Init(Base): 
    __tablename__ = 'metadata'
    __table_args__ = {'extend_existing': True}

    id = Column(Integer, primary_key=True)
    Tag = Column(String(255))
    Frequency = Column(String(255))
    Title = Column(String(255))
    Date = Column(String(25))
    Url = Column(String(255))
    img_URL = Column(String(255))

### New York Times API
https://developer.nytimes.com/apis

Archive API
- Finds all articles's metadata from archive APi (about 7000 every month): title, section_name, period, url, word_count, keywords

In [161]:
def get_NYTimes_metadata():

    # To get most frequently used tag in NYTimes
    today = str(datetime.today())
    
    monthly_archive = {}
    frequent_tags_archive = {}

    for yy in reversed(range(2019, int(today[:4]) + 1)):
        if str(yy) == today[:4]:
            ends = int(today[5:7]) + 1
            start = 1
        else:
            ends = 13
            if '0' in today[5:7]:
                start = int(today[6:7])
            else:
                start = int(today[5:7])
        for mm in reversed(range(start,ends)):
            print('--1--',yy,mm)
            parameters = {'api-key': Times_key}           
            archived_Url = 'https://api.nytimes.com/svc/archive/v1/'+ str(yy) +'/'+ str(mm) +'.json'
            archives = requests.get(archived_Url, params=parameters).json()

            monthly_article = []
            for a in archives['response']['docs']:
                articles = {}
                articles['title'] = a['headline']['main']
                articles['pub_date'] = a['pub_date'][:10]
                articles['url'] = a['web_url']
                if len(a['multimedia']) !=0 and a['multimedia'][0]['url']:
                    articles['thm_img'] = 'https://static01.nyt.com/' + a['multimedia'][0]['url']
                else:
                    articles['thm_img'] = 'no_image_found'
                articles['tags'] = [''.join(tag['value']) for tag in a['keywords']]
                monthly_article.append(articles)
            monthly_archive[str(yy)+'-'+str(mm)] = monthly_article
            
            tag_arr = []
            for m in monthly_article:
                for t in m['tags']:
                    for string in t.split(', '):
                        tag_arr.append(', '.join(string.split(', ')))
            
            count_tag = {}
            for tag in tag_arr:
                if tag in ['Trump', 'Donald J']:
                    tag = 'Donald Trump'
                if tag in ['Joseph R Jr', 'Biden']:
                    tag = 'Joe Biden'
                if tag in ['Brett M', 'Supreme Court (US)', 'Kavanaugh']:
                    tag = 'Brett Kavanaugh'
                if tag in ['Putin', 'Vladimir V']:
                    tag = 'Putin'
                if tag in ['Fla', 'Parkland']:
                    tag = 'Parkland'
                if tag in ['Coronavirus Aid', 'Relief', 'and Economic Security Act (2020)']:
                    tag = 'Coronavirus Aid, Relief, and Economic Security Act (2020)'
                if tag in ['School Shootings and Armed Attacks']:
                    tag = 'School Shootings'
                if tag in ['Shutdowns (Institutional)']:
                    tag = 'Shutdowns'
                if tag in ['New York City', 'NYC','NY)','New York State' ]:
                    tag = 'New York State'
                if tag in ['States (US)', 'United States']:
                     tag = 'United States'   
                if tag in ['Donald Trump', 'New York State','United States Politics and Government', 'Politics and Government', 'Books and Literature', 'Television', 'Movies', 'Real Estate and Housing (Residential)', 'United States', 'United States International Relations' 'Primaries and Caucuses', 'United States Economy', 'Elections']:
                    tag = ''

                if tag is not '':
                    if tag in count_tag:
                        count_tag[tag] += 1
                    else:
                        count_tag[tag] = 1   

#             # This variable is what we want to get in NYTimes
            tags_with_frequency = sorted(count_tag.items(),key=operator.itemgetter(1),reverse=True)[:11]
            frequent_tags_archive[str(yy)+'-'+str(mm)] = tags_with_frequency
    
    return monthly_archive, frequent_tags_archive

In [162]:
# monthly_archive = get_NYTimes_metadata()[0]
frequent_tags_archive = get_NYTimes_metadata()[1]

--1-- 2020 4
--1-- 2020 3
--1-- 2020 2
--1-- 2020 1
--1-- 2019 12
--1-- 2019 11
--1-- 2019 10
--1-- 2019 9
--1-- 2019 8
--1-- 2019 7
--1-- 2019 6
--1-- 2019 5
--1-- 2019 4


In [10]:
def get_NYTimes_metadata():

    # To get most frequently used tag in NYTimes
    today = str(datetime.today())
    monthly_archive = {}
    frequent_tags_archive = {}

    for yy in reversed(range(2019, int(today[:4]) + 1)):
        if str(yy) == today[:4]:
            ends = int(today[5:7]) + 1
            start = 1
        else:
            ends = 13
            if '0' in today[5:7]:
                start = int(today[6:7])
            else:
                start = int(today[5:7])
        for mm in reversed(range(start,ends)):
            print('--1--',yy,mm)
            parameters = {'api-key': Times_key}           
            archived_Url = 'https://api.nytimes.com/svc/archive/v1/'+ str(yy) +'/'+ str(mm) +'.json'
            archives = requests.get(archived_Url, params=parameters).json()

            monthly_article = []
            for a in archives['response']['docs']:
                articles = {}
                articles['title'] = a['headline']['main']
                articles['pub_date'] = a['pub_date'][:10]
                articles['url'] = a['web_url']
                if len(a['multimedia']) !=0 and a['multimedia'][0]['url']:
                    articles['thm_img'] = 'https://static01.nyt.com/' + a['multimedia'][0]['url']
                else:
                    articles['thm_img'] = 'no_image_found'
                articles['tags'] = [''.join(tag['value']) for tag in a['keywords']]
                monthly_article.append(articles)
            monthly_archive[str(yy)+'-'+str(mm)] = monthly_article

            tag_arr = []
            for m in monthly_article:
                for t in m['tags']:
                    for string in t.split(', '):
                        tag_arr.append(', '.join(string.split(', ')))
            
            count_tag = {}
            for tag in tag_arr:
                if tag in ['Trump', 'Donald J']:
                    tag = 'Donald Trump'
                if tag in ['Joseph R Jr', 'Biden']:
                    tag = 'Joe Biden'
                if tag in ['Brett M', 'Supreme Court (US)', 'Kavanaugh']:
                    tag = 'Brett Kavanaugh'
                if tag in ['Putin', 'Vladimir V']:
                    tag = 'Putin'
                if tag in ['Fla', 'Parkland']:
                    tag = 'Parkland'
                if tag in ['Coronavirus Aid', 'Relief', 'and Economic Security Act (2020)']:
                    tag = 'Coronavirus Aid, Relief, and Economic Security Act (2020)'
                if tag in ['School Shootings and Armed Attacks']:
                    tag = 'School Shootings'
                if tag in ['Shutdowns (Institutional)']:
                    tag = 'Shutdowns'
                if tag in ['New York City', 'NYC','NY)','New York State' ]:
                    tag = 'New York State'
                if tag in ['States (US)', 'United States']:
                     tag = 'United States'   
                if tag in ['Donald Trump', 'New York State','United States Politics and Government', 'Politics and Government', 'Books and Literature', 'Television', 'Movies', 'Real Estate and Housing (Residential)', 'United States', 'United States International Relations' 'Primaries and Caucuses', 'United States Economy', 'Elections']:
                    tag = ''

                if tag is not '':
                    if tag in count_tag:
                        count_tag[tag] += 1
                    else:
                        count_tag[tag] = 1   

            # This variable is what we want to get in NYTimes
            tags_with_frequency = sorted(count_tag.items(),key=operator.itemgetter(1),reverse=True)[:11]
            frequent_tags_archive[str(yy)+'-'+str(mm)] = tags_with_frequency

    return monthly_archive, frequent_tags_archive

In [11]:
## frequent_tags_archive = get_NYTimes_metadata()[1]
monthly_archive = get_NYTimes_metadata()[0]
frequent_tags_archive = get_NYTimes_metadata()[1]

--1-- 2020 5
--1-- 2020 4
--1-- 2020 3
--1-- 2020 2
--1-- 2020 1
--1-- 2019 12
--1-- 2019 11
--1-- 2019 10
--1-- 2019 9
--1-- 2019 8
--1-- 2019 7
--1-- 2019 6
--1-- 2019 5
--1-- 2020 5
--1-- 2020 4
--1-- 2020 3
--1-- 2020 2
--1-- 2020 1
--1-- 2019 12
--1-- 2019 11
--1-- 2019 10
--1-- 2019 9
--1-- 2019 8
--1-- 2019 7
--1-- 2019 6
--1-- 2019 5


In [15]:
frequent_tags_archive

{'2020-5': [('Coronavirus (2019-nCoV)', 320),
  ('Quarantines', 82),
  ('Joe Biden', 56),
  ('Quarantine (Life and Culture)', 53),
  ('Coronavirus Aid, Relief, and Economic Security Act (2020)', 36),
  ('Presidential Election of 2020', 35),
  ('Shutdowns', 30),
  ('Deaths (Fatalities)', 28),
  ('Democratic Party', 23),
  ('Epidemics', 21),
  ('Cooking and Cookbooks', 20)],
 '2020-4': [('Coronavirus (2019-nCoV)', 3978),
  ('Quarantines', 857),
  ('Coronavirus Aid, Relief, and Economic Security Act (2020)', 807),
  ('Parenting', 592),
  ('Presidential Election of 2020', 537),
  ('Epidemics', 435),
  ('Children and Childhood', 388),
  ('Joe Biden', 370),
  ('Primaries and Caucuses', 302),
  ('Deaths (Obituaries)', 287),
  ('Deaths (Fatalities)', 279)],
 '2020-3': [('Coronavirus (2019-nCoV)', 3463),
  ('Quarantines', 860),
  ('Joe Biden', 817),
  ('Presidential Election of 2020', 813),
  ('Epidemics', 650),
  ('Primaries and Caucuses', 532),
  ('Democratic Party', 424),
  ('Shutdowns', 372

In [23]:
dicts={}
for periode in frequent_tags_archive:
    for tag_with_F in frequent_tags_archive[periode]:
        if tag_with_F[0] in dicts:
            dicts[tag_with_F[0]] = tag_with_F[1] + dicts[tag_with_F[0]]
        else:
            dicts[tag_with_F[0]] = tag_with_F[1]
            
tags_with_frequency = sorted(dicts.items(),key=operator.itemgetter(1),reverse=True)[:11]
tags_with_frequency       

[('Coronavirus (2019-nCoV)', 8361),
 ('Presidential Election of 2020', 5962),
 ('Joe Biden', 4537),
 ('Democratic Party', 3805),
 ('United States International Relations', 2327),
 ('Trump-Ukraine Whistle-blower Complaint and Impeachment Inquiry', 2111),
 ('Quarantines', 1799),
 ('Epidemics', 1375),
 ('Impeachment', 1352),
 ('Primaries and Caucuses', 1306),
 ('China', 1253)]

### Find the most appeared tags among these days

In [9]:
general_tags = [('Donald Trump', 28),
 ('New York State', 28),
 ('United States Politics and Government', 28),
 ('Politics and Government', 28),
 ('Books and Literature', 28),
 ('Television', 28),
 ('Movies', 28),
 ('Real Estate and Housing (Residential)', 28),
 ('United States', 27),
 ('United States International Relations', 26),
 ('Primaries and Caucuses', ),
 ('United States Economy', ),
 ('United States Defense and Military Forces', ),
 ('Appointments and Executive Changes',),
 ('Weddings and Engagements', ),
 ('Elections',)]#,
#  ('Democratic Party', 25),
#  ('China', 25),
#  ('Art', 25),
#  ('Deaths (Obituaries)', 24),
#  ('Women and Girls', 21),
#  ('Elections', 20),
#  ('Republican Party', 20),
#  ('House of Representatives', 18),
#  ('Theater', 17),
#  ('Presidential Election of 2020', 16),
#  ('Weddings and Engagements', 15)]

### Pytrends API
https://pypi.org/project/pytrends/

Sample codes to see Google doesn't block my IP

In [111]:
pytrends = TrendReq(hl='en-US', tz=360, timeout=(10,25))
tag_arr = ['Coronavirus (2019-nCoV)']
pytrends.build_payload(tag_arr, cat=0, timeframe='2020-04-01 ' + str(datetime.now())[:10], geo='', gprop='')
time.sleep(2)
df = pytrends.interest_over_time().reset_index()
df.head()

Unnamed: 0,date,Coronavirus (2019-nCoV),isPartial
0,2020-04-01,85,False
1,2020-04-02,48,False
2,2020-04-03,100,False
3,2020-04-04,55,False
4,2020-04-05,41,False


In [94]:
df['date'].iloc[0]

Timestamp('2020-04-01 00:00:00')

In [170]:
# Get a unique tag collection for the search query
def get_trends_Tags(frequent_tags_archive):    
    frequent_tag_only = {}
    
    for time_period in frequent_tags_archive:
        tag_only = []
        for each in frequent_tags_archive[time_period]:    
            tag_only.append(each[0])
            frequent_tag_only[time_period] = tag_only
        
    pytrends = TrendReq(hl='en-US', tz=360)
    monthly_interests = {}
    
    for period in frequent_tag_only:
        print(period + ': ', len(result[period]))
        data = []
        for tag in frequent_tag_only[period]:
            if tag in ['Russian Interference in 2016 US Elections and Ties to Trump Associates']:
                tag = 'Russian interference in the 2016 United States elections'
            if tag in ['Homosexuality and Bisexuality']:
                tag = 'Homosexuality'
            if 'Trump-Ukraine' in tag:
                tag = 'Trump-Ukraine'
            if tag in ['Biden, Joseph R Jr']:
                tag = 'Joe Biden'
            if tag in ['Sanders, Bernard']:
                tag = 'Bernie Sanders'
            if tag in ['Buttigieg, Pete (1982- )']:
                tag = 'Pete Buttigieg'
            tag_arr = []
            tag_arr.append(tag)
#             print(tag)
            interest_over_time = {}
            
            datetyped_period = datetime.strptime(a, "%Y-%m")
            one_month_ago = (datetyped_period + relativedelta(months=-1)).replace(day=datetime.today().day).strftime("%Y-%m-%d")
            if period == str(datetime.today())[:7]:
                one_month_later = str(datetime.today())[:10]
            else:
                one_month_later = (datetyped_period + relativedelta(months=+1)).replace(day=datetime.today().day).strftime("%Y-%m-%d")
            time.sleep(2)
            try:
                pytrends.build_payload(tag_arr, cat=0, timeframe = one_month_ago + ' ' + one_month_later, geo='', gprop='')
                df = pytrends.interest_over_time().reset_index()
            except Exception as e:
                print(tag_arr, ', and which reason? ', e)
                pass
            
            interest_over_time['Tag'] = tag
            try:
                interest_over_time['StartDate'] = one_month_ago
                interest_over_time['EndDate'] = one_month_later
                interest_over_time['Rate'] = [rate for rate in df[tag]]
            except Exception as e:
                pass
            data.append(interest_over_time)     
        monthly_interests[period] = data
    return monthly_interests

In [121]:
monthly_interests = get_trends_Tags(frequent_tags_archive)
monthly_interests

Coronavirus (2019-nCoV)
1
2020-04-22
Deaths (Fatalities)
7
2020-04-28


### Reddit API
https://praw.readthedocs.io/en/latest/

In [173]:
def get_reddit_Posts(times_metadata):   
#     result = {}
#     for time_period in times_metadata:
#         data = []
#         for each in times_metadata[time_period]:     
#             data.append(each['Tag'])
#             result[time_period] = data  
            
    reddit = praw.Reddit(client_id = Reddit_client_id,
                         client_secret = Reddit_client_secret,
                         username = Reddit_username,
                         password = Reddit_password,
                         user_agent = Reddit_user_agent)
    result = {'2020-04':['Coronavirus (2019-nCoV)', 'Quarantines', 'Parenting'], '2019-04': ['China', 'Epidemics', 'Deaths (Fatalities)']}
 
    reddit_metadata = []
    for period in result:
        print(period + ': ', len(result[period]))
        reddit_posts = {}
        add_post_arr = []
        for tag in result[period]:
            if tag in ['Biden, Joseph R Jr']:
                tag = 'Joe Biden'
            if tag in ['Sanders, Bernard']:
                tag = 'Bernie Sanders'

            subreddit = reddit.subreddit('all')
            time.sleep(2)
            for i, post in enumerate(subreddit.search(tag, sort='relevance', syntax='lucene', limit=100)):
                if not post.stickied:
                    each_tag = {}
                    each_tag['Tag'] = tag
#                     print('--1--',tag)
                    each_tag['Title'] = post.title
                    each_tag['CreatedAt'] = datetime.fromtimestamp(post.created_utc).isoformat()[:10]
                    each_tag['DiscussionAbout'] = post.url
                    each_tag['Ups'] = post.ups
                    each_tag['Downs'] = post.downs
                    add_post_arr.append(each_tag)
        reddit_posts[period] = add_post_arr
        reddit_metadata.append(reddit_posts)
#         print('--2--',reddit_metadata)
    
#     add_tag = {}
#     for item in reddit_metadata:
#         for period in item: 
#             monthly_posts = []
#             byTime = {}
            
#             for each_post in item[period]:
#                 each_post['CreatedAt'] = each_post['CreatedAt'][:7]

#                 if each_post['Tag'] in add_tag:
#                     if each_post['CreatedAt'] in byTime:
#                         byTime[each_post['CreatedAt']][0] += 1
#                         byTime[each_post['CreatedAt']][1] += each_post['Ups']
#                     else:
#                         add_freq_votes = []
#                         byTime[each_post['CreatedAt']] = add_freq_votes
#                         add_freq_votes.append(1)
#                         add_freq_votes.append(each_post['Ups'])
#                 else:
#                     add_tag[each_post['Tag']] = byTime
#                     if each_post['CreatedAt'] in byTime:
#                         byTime[each_post['CreatedAt']][0] += 1
#                         byTime[each_post['CreatedAt']][1] += each_post['Ups']
#                     else:
#                         add_freq_votes = []
#                         byTime[each_post['CreatedAt']] = add_freq_votes
#                         add_freq_votes.append(1)
#                         add_freq_votes.append(each_post['Ups'])
# #                     print('--1--', add_tag)
#             monthly_posts.append(add_tag)
# #             print('--2--', monthly_posts)
    
# #     print('--3--', monthly_posts)
    return reddit_metadata

In [174]:
get_reddit_Posts(times_metadata)

2020-04:  3
2019-04:  3


[{'2020-04': [{'Tag': 'Coronavirus (2019-nCoV)',
    'Title': 'coronavirus_2019_ncov mods ban everyone who call them out on their shitty posts',
    'CreatedAt': '2020-04-27',
    'DiscussionAbout': 'https://www.reddit.com/r/WatchRedditDie/comments/g95tjl/coronavirus_2019_ncov_mods_ban_everyone_who_call/',
    'Ups': 3,
    'Downs': 0},
   {'Tag': 'Coronavirus (2019-nCoV)',
    'Title': 'Coronavirus 2019-ncov background with viral cells 7',
    'CreatedAt': '2020-04-27',
    'DiscussionAbout': 'https://www.reddit.com/r/u_Apprehensive323/comments/g99w6g/coronavirus_2019ncov_background_with_viral_cells_7/',
    'Ups': 1,
    'Downs': 0},
   {'Tag': 'Coronavirus (2019-nCoV)',
    'Title': 'World Health Organization (@WHO) tweeted this on 14 Jan 2020: "Preliminary investigations conducted by the Chinese authorities have found no clear evidence of human-to-human transmission of the novel #coronavirus (2019-nCoV) identified in #Wuhan, #China"',
    'CreatedAt': '2020-04-21',
    'DiscussionA

## Not using anymore

- Top Stories API: (about 30 articles by 7 days): title, pub_date, url, section, des_facet, geo_facet
- Newswire API: Finds by Archive API's url : des_facet
- Most Popular API
- Top Stories API, Times Tags API, Community API

### Youtube API
https://developers.google.com/youtube/v3/docs/search/list?hl=en_US

In [None]:
# creating Youtube Resource Object 
youtube_object = discovery.build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey = DEVELOPER_KEY)

In [None]:
def youtube_search_keyword(times_metadata):  
    # calling the search.list method to retrieve youtube search results 
    result = {}
    max_results = 1
    for time_period in times_metadata:
        data = []
        for each in times_metadata[time_period]:     
            data.append(each['Tag'])
            result[time_period] = data
        
    pytrends = TrendReq(hl='en-US', tz=360)
    monthly_interests = {}
    
    for i, period in enumerate(result):
        results = []
        for tag in times_metadata:
            print(tag)
            search_tags = youtube_object.search().list(q = tag, part = "id, snippet", order = 'viewCount', maxResults = max_results, publishedAfter = "2020-02-17T00:00:00Z").execute() 
            videos = []
            for item in search_tags.get("items", []):
                time.sleep(2)
                video = {}
                
#               Pull Date!!!
                video["Tag"] = tag
                video["videoId"] = item["id"]["videoId"]
                video["publishedAt"] = item['snippet']['publishedAt']
                video["title"] = item['snippet']['title']
                video['description'] = item['snippet']['description']
                stats = youtube_object.videos().list(part='statistics, snippet', id=item["id"]["videoId"]).execute()
                video['statistics'] = stats.get("items", [])[0]['statistics']
                videos.append(video)
            results.append(videos)
    return results

## Reddit

In [None]:
nltk.download('averaged_perceptron_tagger')

In [None]:
def get_sentimental_chart(reddit_metadata):
    tags_metadata = []
    for single_tag in reddit_metadata:
        tag_sentiment = {}
        avg_polarity = 0
        pos_count = 0
        neg_count = 0
        neutral_count = 0
        for single_comment in single_tag['Comments']: 
            
            # Get Word Sentimental analysis
            blobed_word = TextBlob(single_comment)
            if blobed_word.sentiment.polarity > 0:
                pos_count += 1
            elif blobed_word.sentiment.polarity == 0:
                neutral_count += 1
            else:
                neg_count += 1      
            avg_polarity += blobed_word.sentiment.polarity
            
            tag_sentiment['tag'] = single_tag['Tag']    
            tag_sentiment['avg_polarity'] = avg_polarity / len(single_tag['Comments'])
            tag_sentiment['pos_count'] = pos_count
            tag_sentiment['neg_count'] = neg_count
            tag_sentiment['neutral_count'] = neutral_count
            
            # Get Adjective words' frequencies
            adgs_arr = []
            for blobed_tags in blobed_word.tags:
                if blobed_tags[1] in ['JJ', 'JJR', 'JJS']:
                    adgs_arr.append(blobed_tags[0])
            adg_frequency = {}
            for adg in adgs_arr: 
                if adg in adg_frequency:
                    adg_frequency[adg] += 1
                else:
                    adg_frequency[adg] = 1 
            tag_sentiment['adg_frequency'] = sorted(adg_frequency.items(),key=operator.itemgetter(1),reverse=True)[:3]       
        tags_metadata.append(tag_sentiment)
        
    return tags_metadata

In [138]:
frequent_tags_archive = {'2020-04': [('Coronavirus (2019-nCoV)', 3591),
  ('Quarantines', 822)]}