In [54]:
# !pip3 install prettytable
# !pip3 install tqdm
# !pip3 install ipywidgets 
# !jupyter nbextension enable --py widgetsnbextension

In [1]:
import utilities
import os
import json
from collections import OrderedDict
from prettytable import PrettyTable
from tqdm.notebook import tqdm

### Directory/file addresses & file read/writes

In [2]:
data_dir = 'data/'
skills_addr = os.path.join(data_dir,'us_skills_raw.json')

grouped_skills_addr = os.path.join(data_dir,'grouped_skills.json')
subgrouped_skills_addr = os.path.join(data_dir,'subgrouped_skills.json')
ungrouped_skills_addr = os.path.join(data_dir,'ungrouped_skills.json')

In [3]:
all_skills_raw = utilities.read_file(skills_addr)

### Processing raw skills

### Sorting skills based on number of ratings

In [4]:
def convert_to_json(all_skills_raw):
    for i in range(len(all_skills_raw)):
        all_skills_raw[i] = json.loads(all_skills_raw[i])
        if all_skills_raw[i]['Total_customer_that_rate_the_skill'] == '':
            all_skills_raw[i]['Total_customer_that_rate_the_skill'] = 0
        else:
            all_skills_raw[i]['Total_customer_that_rate_the_skill'] = int(all_skills_raw[i]['Total_customer_that_rate_the_skill'].replace(',',''))

    return all_skills_raw


def partition(arr, l, h):
    i = ( l - 1 )
    x = arr[h]['Total_customer_that_rate_the_skill']
    
    for j in range(l , h):
        if arr[j]['Total_customer_that_rate_the_skill'] <= x:

            # increment index of smaller element
            i = i+1
            arr[i], arr[j] = arr[j], arr[i]

    arr[i+1], arr[h] = arr[h], arr[i+1]
    return (i+1)


# Function to do Quick sort
# arr[] --> Array to be sorted,
# l --> Starting index,
# h --> Ending index
def quickSortIterative(arr, l, h):
    pbar = tqdm(total=h, position=0, leave=True)
    
    # Create an auxiliary stack
    size = h - l + 1
    stack = [0] * (size)

    # initialize top of stack
    top = -1

    # push initial values of l and h to stack
    top = top + 1
    stack[top] = l
    top = top + 1
    stack[top] = h

    # Keep popping from stack while is not empty
    while top >= 0:
        pbar.update(1)

        # Pop h and l
        h = stack[top]
        top = top - 1
        l = stack[top]
        top = top - 1

        # Set pivot element at its correct position in
        # sorted array
        p = partition(arr, l, h)

        # If there are elements on left side of pivot,
        # then push left side to stack
        if p-1 > l:
            top = top + 1
            stack[top] = l
            top = top + 1
            stack[top] = p - 1

        # If there are elements on right side of pivot,
        # then push right side to stack
        if p+1 < h:
            top = top + 1
            stack[top] = p + 1
            top = top + 1
            stack[top] = h

In [5]:
all_skills_raw = convert_to_json(all_skills_raw)

# Driver code to test above
len_skills = len(all_skills_raw)
quickSortIterative(all_skills_raw, 0, len_skills-1)

  0%|          | 0/88419 [00:00<?, ?it/s]

### Group skills by category/sub-category

### Skill json structure

    'Main Category': 'Business & Finance',
    'SubCategory': 'Business & Finance',
    'Sample_Invocation_Utterances': ['“Alexa, ask PayPal to check my balance”', ... ],
    'Name': 'PayPal',
    'Developer': 'by PayPal',
    'Skill_permission': [],
    'Account_linking': 'Account linking required',
    'Review_Count': None,
    'Rating': '4.4 out of 5 stars',
    'Total_customer_that_rate_the_skill': '217',
    'Total_Customers_Reviews': '',
    'Cost': 'Free to Enable',
    'In_skill_purchase': None,
    'Skill_description': 'Description\nWant to send or request m...',
    'Skill_link': 'https://www.amazon.com/PayPal/dp/B075764QCX/ref=lp_14284819011_1_1?s=digital-skills&ie=UTF8&qid=1602769443&sr=1-1',
    'privacy_policy': 'https://www.paypal.com/us/webapps/mpp/ua/privacy-full',
    'Terms_of_use': 'https://www.paypal.com/us/webapps/mpp/ua/useragreement-full'}

In [6]:
def group_skills(all_skills):
    grouped_skills = {}
    sub_grouped_skills = {}
    ungrouped_skills = {}
    
    pbar = tqdm(total=len(all_skills), position=0, leave=True)
    
    for skill in reversed(all_skills):
        pbar.update(1)
        try:
            skill_id = skill['Skill_link'].split('/')[-2]
            skill_cat = skill['Main Category'].replace(' ','').replace('&','-')
            skill_subcat = skill['SubCategory'].replace(' ','').replace('&','-')

            if skill_cat not in grouped_skills:
                grouped_skills[skill_cat] = {}
            
            if skill_subcat not in sub_grouped_skills:
                sub_grouped_skills[skill_subcat] = {}
            
            grouped_skills[skill_cat][skill_id] = {}
            grouped_skills[skill_cat][skill_id] = skill
            
            sub_grouped_skills[skill_subcat][skill_id] = {}
            sub_grouped_skills[skill_subcat][skill_id] = skill
            
            ungrouped_skills[skill_id] = {}
            ungrouped_skills[skill_id] = skill
        
        except Exception as ex:
            print('Encountered an error while processing:', str(ex))
            print(skill)
            break
            
    return grouped_skills, sub_grouped_skills, ungrouped_skills

In [7]:
grouped_skills, sub_grouped_skills, ungrouped_skills = group_skills(all_skills_raw)

  0%|          | 0/88420 [00:00<?, ?it/s]

In [86]:
utilities.write_json(grouped_skills_addr, grouped_skills)
utilities.write_json(subgrouped_skills_addr, sub_grouped_skills)
utilities.write_json(ungrouped_skills_addr, ungrouped_skills)

### List skill categories and sub categories 

In [8]:
def get_skill_categories(all_skills):
    categories = {}
    sub_categories = {}

    for key in all_skills:
        category = all_skills[key]['Main Category']
        sub_category = all_skills[key]['SubCategory']

        if category not in categories:
            categories[category] = 0

        if sub_category not in sub_categories:
            sub_categories[sub_category] = 0

        categories[category] += 1
        sub_categories[sub_category] += 1
    return categories, sub_categories

In [9]:
categories, sub_categories = get_skill_categories(ungrouped_skills)

ordered_categories = OrderedDict(sorted(categories.items(), key=lambda k: k[1], reverse=True))
ordered_sub_categories = OrderedDict(sorted(sub_categories.items(), key=lambda k: k[1], reverse=True))

In [10]:
def print_table(ordered_dict, count_limit = 20):
    count = 0
    t = PrettyTable(['Skill category', '# of skills'])
    for item in ordered_dict:
        t.add_row([item, ordered_dict[item]])
    print(t)

In [11]:
print_table(ordered_categories)
print_table(ordered_sub_categories)

+-------------------------+-------------+
|      Skill category     | # of skills |
+-------------------------+-------------+
|      Games & Trivia     |    12114    |
|      Music & Audio      |     8328    |
|        Lifestyle        |     7340    |
|  Education & Reference  |     5115    |
|           News          |     5025    |
|    Business & Finance   |     3672    |
|     Novelty & Humor     |     2920    |
|        Smart Home       |     2417    |
|           Kids          |     2270    |
|     Health & Fitness    |     2166    |
|          Social         |     1749    |
|       Productivity      |     1362    |
|       Food & Drink      |     1356    |
|          Sports         |     1300    |
|        Utilities        |     1226    |
| Travel & Transportation |     1205    |
|         Weather         |     715     |
|         Shopping        |     390     |
|       Movies & TV       |     381     |
|          Local          |     181     |
|      Connected Car      |     13

In [92]:
# for skill in ungrouped_skills:
#     print(ungrouped_skills[skill])
#     break
    
#     ID, NAME (' ', ',', replace with '-'), PERM (1/0), skill_link

In [13]:
for key in grouped_skills:
    print(key)
    break

Novelty-Humor


In [61]:
count = 0
key = 'Kids'
for skill in grouped_skills[key]:
    print(grouped_skills[key][skill]['Name'])
    count += 1
    if count > 50:
        break
#     break
    

The SpongeBob Challenge
Poop Poems
Amazon: Call Santa
Animal Sounds
The Name Game (banana-fana)
Santa Claus Poops
Scooby Doo! Mystery Inc. Theater
KidsBrushYourTeethSong
Baby-Shark Sing and Dance
Math Facts - Math Practice for Kids
Amazon Storytime
Choose Your Own Adventure
Santa Claus
Bedtime stories
Math Facts
Short Bedtime Stories
Kids Court
Animal Rescue
Wheels On The Bus
My Kids Are Animals!
Unicorns Rock
NORAD Tracks Santa
Amazon Math
Amazon: Call Easter Bunny
Disney Trivia
Alphabet Song
Santa Tracker
Dog Translator
Bruh Sound Effect #2
Disney Stories
The Story of Lucky Charms
Kids Animal Sounds
Zoo Walk
Chompers
Old MacDonald
Baby Dragon
Animal Game for Kids - Play and Learn
Pinkfong Baby Shark
ABC
You Choose Batman Adventures
Kids Sing Along
Multiplication Dojo
Bedtime Prayers
Nursery Rhymes
Tale of Peter Rabbit
Music Bop Adventures
Star Wars: Choose Your Destiny
Story Time
Bamboo Math
Let's Count
Dear Santa
