In [1]:
# !pip3 install prettytable

In [2]:
import utilities
import os
import json
from collections import OrderedDict
from prettytable import PrettyTable

### Directory/file addresses & file read/writes

In [3]:
data_dir = 'data/'
skills_addr = os.path.join(data_dir,'us_skills_raw.json')

In [4]:
all_skills_raw = utilities.read_file(skills_addr)

### Processing raw skills

In [5]:
def process_skills(all_skills_raw):
    all_skills = {}

    for skill in all_skills_raw:
        try:
            skill_json = json.loads(skill)
            skill_id = skill_json['Skill_link'].split('/')[-2]

            all_skills[skill_id] = {}
            all_skills[skill_id] = skill_json
        except:
            print('Encountered an error while processing:', skill)
            break
    return all_skills

In [6]:
all_skills = process_skills(all_skills_raw)

In [7]:
len(all_skills)

61364

### Skill json structure

    'Main Category': 'Business & Finance',
    'SubCategory': 'Business & Finance',
    'Sample_Invocation_Utterances': ['“Alexa, ask PayPal to check my balance”', ... ],
    'Name': 'PayPal',
    'Developer': 'by PayPal',
    'Skill_permission': [],
    'Account_linking': 'Account linking required',
    'Review_Count': None,
    'Rating': '4.4 out of 5 stars',
    'Total_customer_that_rate_the_skill': '217',
    'Total_Customers_Reviews': '',
    'Cost': 'Free to Enable',
    'In_skill_purchase': None,
    'Skill_description': 'Description\nWant to send or request m...',
    'Skill_link': 'https://www.amazon.com/PayPal/dp/B075764QCX/ref=lp_14284819011_1_1?s=digital-skills&ie=UTF8&qid=1602769443&sr=1-1',
    'privacy_policy': 'https://www.paypal.com/us/webapps/mpp/ua/privacy-full',
    'Terms_of_use': 'https://www.paypal.com/us/webapps/mpp/ua/useragreement-full'}

In [8]:
def get_skill_categories(all_skills):
    categories = {}
    sub_categories = {}

    for key in all_skills:
        category = all_skills[key]['Main Category']
        sub_category = all_skills[key]['SubCategory']

        if category not in categories:
            categories[category] = 0

        if sub_category not in sub_categories:
            sub_categories[sub_category] = 0

        categories[category] += 1
        sub_categories[sub_category] += 1
    return categories, sub_categories

In [9]:
categories, sub_categories = get_skill_categories(all_skills)

ordered_categories = OrderedDict(sorted(categories.items(), key=lambda k: k[1], reverse=True))
ordered_sub_categories = OrderedDict(sorted(sub_categories.items(), key=lambda k: k[1], reverse=True))

In [10]:
def print_table(ordered_dict, count_limit = 20):
    count = 0
    t = PrettyTable(['Skill category', '# of skills'])
    for item in ordered_dict:
        t.add_row([item, ordered_dict[item]])
    print(t)

In [11]:
print_table(ordered_categories)
print_table(ordered_sub_categories)

+-------------------------+-------------+
|      Skill category     | # of skills |
+-------------------------+-------------+
|      Games & Trivia     |    12114    |
|      Music & Audio      |     8328    |
|        Lifestyle        |     7340    |
|  Education & Reference  |     5115    |
|           News          |     5025    |
|    Business & Finance   |     3672    |
|     Novelty & Humor     |     2920    |
|        Smart Home       |     2417    |
|           Kids          |     2270    |
|     Health & Fitness    |     2166    |
|          Social         |     1749    |
|       Productivity      |     1362    |
|       Food & Drink      |     1356    |
|          Sports         |     1300    |
|        Utilities        |     1226    |
| Travel & Transportation |     1205    |
|         Weather         |     715     |
|         Shopping        |     390     |
|       Movies & TV       |     381     |
|          Local          |     181     |
|      Connected Car      |     13