### Create json Files for Shiny App in R
Anne Chen  
2016

### Import Modules

In [1]:
import pandas as pd
import numpy as np
import json

### Define Functions

In [2]:
def read_json(filename):
    '''read json file'''
    return json.loads(open(filename).read()) 

def append_two_dict(dict1, dict2):
    '''append two dictionaries based on keys'''
    new_dict = dict1
    for key, val in dict2.items():
        if key not in dict1.keys():
            new_dict[key] = val
    return new_dict

def unicode_to_ascii(lst):
    '''convert unicode to ascii'''
    # avoid raising errors later on while writing data into csv files
    return [item.encode('ascii', 'ignore') for item in lst]

def dic_str_to_num(dic):
    '''convert list of strings in a dictionary into numbers'''
    new_dic = {}
    for key, val_lst in dic.items():
        new_dic[key] = [int(x.replace(',', '')) for x in val_lst]
    return new_dic

def encode_whole_dictionary(dic):
    '''convert whole dictionaty from unicode to ascii'''
    keys = dic.keys()
    values = dic.values()
    encode_key = unicode_to_ascii(keys)
    encode_val = [unicode_to_ascii(val) for val in values]
    
    # create new dictionary with encoded kay and values
    new_dic ={}
    for i in range(len(encode_key)):
        new_dic[encode_key[i]] = encode_val[i]

    return new_dic

def write_json(name, dic):
    '''write dictionary to json file'''
    filename = name + '.json'
    with open(filename, 'w') as f:
        json.dump(dic, f)

def get_tag_cloud_lst(tag_cloud):
    '''get tag cloud list: [(tag name, its frequency)]'''
    tag_cloud_lst = reduce(lambda x,y: x + y, tag_cloud.values(),[])
    freq_dic = {}
    for i in tag_cloud_lst:
        if i not in freq_dic.keys(): 
            freq_dic[i] = 1
        else:
            freq_dic[i] +=1
    tag_freq_lst = freq_dic.items()
    tag_freq_lst.sort(key = lambda x: x[1], reverse=True)
    return tag_freq_lst

def reverse_dic(target, original_dic):
    '''reverse a key-value dictionary to a value-key one'''
    dic = {}
    for i in target:
        dic[i] = []
    for name, item_lst in original_dic.items():
        for item in item_lst:
            if item in target:
                dic[item] += [name]
    return dic

def encode_dict_val(dic):
    '''convert whole dictionaty from unicode to ascii'''
    keys = dic.keys()
    values = dic.values()
    encode_key = unicode_to_ascii(keys)
    # create new dictionary with encoded kay and values
    new_dic ={}
    for i in range(len(encode_key)):
        new_dic[encode_key[i]] = values[i]
    return new_dic

### Read and Merge json Files

In [3]:
us_category = read_json('./Data/museum_categories_USonly.json')
us_tag_cloud = read_json('./Data/tag_clouds_USonly.json')
us_traveler_type = read_json('./Data/traverler_type_USonly.json')
us_img_link = read_json('./Data/img_links_USonly.json')

w_category = read_json('./Data/museum_categories_world.json')
w_tag_cloud = read_json('./Data/tag_clouds_world.json')
w_traveler_type = read_json('./Data/traverler_type_world.json')
w_img_link = read_json('./Data/img_links_world.json')

category = append_two_dict(us_category, w_category)
tag_cloud = append_two_dict(us_tag_cloud, w_tag_cloud)
traveler_type = append_two_dict(us_traveler_type, w_traveler_type)
img_link = append_two_dict(us_img_link, w_img_link)

# convert all dictionaries from unicode to ascii
category = encode_whole_dictionary(category)
tag_cloud = encode_whole_dictionary(tag_cloud)
traveler_type = encode_whole_dictionary(traveler_type)
img_link = encode_dict_val(img_link)

# convert strings in dictionary to number
traveler_type = dic_str_to_num(traveler_type)

# write files
write_json('./app/data/tags_cloud', tag_cloud)
write_json('./app/data/museum_img_link', img_link)

### Create json Files that Will be Needed in Building Shiny App

In [4]:
### get target category
category_lst = reduce(lambda x,y: x + y, category.values(),[])
target_category = [i for i in set(category_lst) if 'Museum' in i \
                   or 'Galleries' in i or 'Historic Sites' in i or 'Landmarks' in i]
print target_category
dic_cat = reverse_dic(target_category, category)
write_json('museum_types', dic_cat)

['History Museums', 'Military Museums', 'Points of Interest & Landmarks', 'Natural History Museums', 'Art Museums', "Children's Museums", 'Historic Sites', 'Science Museums', 'Museums', 'Specialty Museums', 'Art Galleries', 'Sights & Landmarks']


In [5]:
### get target tags
tag_freq_lst = get_tag_cloud_lst(tag_cloud)
target_tags = tag_freq_lst[0:100]
target_tags

[('on display', 792),
 ('gift shop', 415),
 ('rainy day', 300),
 ('couple of hours', 289),
 ('all ages', 287),
 ('special exhibits', 186),
 ('few hours', 180),
 ('two hours', 179),
 ('exhibits', 166),
 ('well worth a visit', 165),
 ('permanent collection', 164),
 ('worth a visit', 161),
 ('free admission', 154),
 ('audio guide', 153),
 ('beautiful building', 138),
 ('great collection', 129),
 ('information', 127),
 ('interesting exhibits', 127),
 ('entrance fee', 121),
 ('great for kids', 120),
 ('interactive exhibits', 117),
 ('hands on activities', 115),
 ('great exhibits', 110),
 ('great place to visit', 99),
 ('interactive displays', 94),
 ('guided tour', 92),
 ('well worth the visit', 91),
 ('whole family', 83),
 ('kids and adults', 83),
 ('traveling exhibits', 82),
 ('his life', 82),
 ('local history', 77),
 ('amazing collection', 77),
 ('free entry', 74),
 ('worth the trip', 74),
 ('great history', 73),
 ('civil war', 72),
 ('take your time', 71),
 ('nice collection', 70),
 ('in

In [7]:
### turn tag_freq_lst into dictionary and save it as a json file 
tag_count_dic ={}
for t in tag_freq_lst:
    tag_count_dic[t[0]] = t[1]
# tag_count_dic
write_json('./app/data/tag_counts', tag_count_dic)

In [8]:
### generate a dictionary with musuem names (value) having tags of interest (key)
tag_of_interest = ['gift shop', 'rainy day', 'all ages', 'beautiful building', 
                   'audio guide', 'great for kids', 'guided tour',
                   'interactive exhibits', 'interactive displays', 'modern art']
dic_tag = reverse_dic(tag_of_interest, tag_cloud)
dic_tag['interactive'] = list(set(dic_tag['interactive exhibits'] + dic_tag['interactive displays']))
# remove two key-value pairs since they have combined into 'interactive'
dic_tag.pop('interactive exhibits') 
dic_tag.pop('interactive displays')
write_json('museum_tags', dic_tag)

In [9]:
### remove 'Museums' from the value and save it as clean_category.json
clean_category = {}
for k,v in category.items():
    if 'Museums' in v:
        v.pop(v.index('Museums'))
    clean_category[k] = v
# clean_category
write_json('clean_category', clean_category)