# Xtern Work Sample Assessment
## Reference Information

In [2]:
# Housing Location: IUPUI -- 415 Porto Alegre St, Indianapolis, IN 46202

# Candidate 1: The Speak Easy -- 5255 Winthrop Ave #110, Indianapolis, IN 46220
# Candidate 2: zWORKS -- 85 E Cedar St #1502, Zionsville, IN 46077
# Candidate 3: Launch Fishers -- 12175 Visionary Way, Fishers, IN 46038
# Candidate 4: Industrious Mass Ave -- 350 Massachusetts Ave Suite 300, Indianapolis, IN 46204
# Candidate 5: Launch Indy -- 525 S Meridian St, Indianapolis, IN 46225

# Google Maps API Key: Redacted
# Documenu API Key: Redacted

In [433]:
# Importing Needed Tools/Libraries
import numpy as np
import pandas as pd
pd.options.display.width = 200
pd.options.display.max_colwidth = 20
import googlemaps
import requests
from requests.auth import HTTPBasicAuth

# Setting up google maps client
G_API_KEY = '' # Redacted
client = googlemaps.Client(G_API_KEY)

In [187]:
# Loading Dataset and gathering google maps information
locations = pd.read_csv('Data.csv')
locations.sort_values(by='Name', inplace=True)
responses = []
for i in locations['Address']:
    responses.append(client.places(i))
print(locations)

                   Name                                            Address       Type
0                 IUPUI        415 Porto Alegre St, Indianapolis, IN 46202    Housing
4  Industrious Mass Ave  350 Massachusetts Ave Suite 300, Indianapolis,...  Coworking
3        Launch Fishers             12175 Visionary Way, Fishers, IN 46038  Coworking
5           Launch Indy          525 S Meridian St, Indianapolis, IN 46225  Coworking
1        The Speak Easy     5255 Winthrop Ave #110, Indianapolis, IN 46220  Coworking
2                zWORKS          85 E Cedar St #1502, Zionsville, IN 46077  Coworking


In [188]:
# Appending latitudes and longitudes as new column in dataset
geoms = []
for i in responses:
    geoms.append(list(i.get('results')[0]['geometry']['location'].values()))
locations['geometry'] = geoms
print(locations[['Name', 'geometry']])

                   Name                   geometry
0                 IUPUI    [39.77389, -86.1849117]
4  Industrious Mass Ave    [39.765831, -86.157617]
3        Launch Fishers  [39.9667636, -86.0088088]
5           Launch Indy  [39.7593486, -86.1580467]
1        The Speak Easy  [39.8488729, -86.1412948]
2                zWORKS  [39.9514923, -86.2608697]


In [262]:
import pgeocode as geo
def get_dist(coordinates): # Calculating Distances from IUPUI
    return np.round(geo.haversine_distance([coordinates], [[39.77389, -86.1849117]]), 2)[0]
locations['distance(km)'] = locations.geometry.map(get_dist)
locations.reset_index(drop=True, inplace=True)
print(locations)

                   Name                                            Address       Type                   geometry  distance(km)  Avg Price  Price Variance  Avg Rating  Rating Variance
0                 IUPUI        415 Porto Alegre St, Indianapolis, IN 46202    Housing    [39.77389, -86.1849117]          0.00       1.25            0.10        4.10             0.40
1  Industrious Mass Ave  350 Massachusetts Ave Suite 300, Indianapolis,...  Coworking    [39.765831, -86.157617]          2.50       1.57            0.24        4.37             0.17
2        Launch Fishers             12175 Visionary Way, Fishers, IN 46038  Coworking  [39.9667636, -86.0088088]         26.19       1.90            0.54        4.12             0.42
3           Launch Indy          525 S Meridian St, Indianapolis, IN 46225  Coworking  [39.7593486, -86.1580467]          2.81       1.58            0.27        4.18             0.30
4        The Speak Easy     5255 Winthrop Ave #110, Indianapolis, IN 46220  Coworking

## Gathering Up To 60 Closest & Most Prominent Nearby Restaurants for Each Location (Biased to 1 Mile Radius)
### Collected restaurants are 5-10 walking minutes from associated location

In [263]:
# https://maps.googleapis.com/maps/api/place/nearbysearch/json?&location=latitude,longitude
nearby_restaurants = []
import time
for i in locations.geometry:
    pages = []
    latitude = i[0]
    longitude = i[1]
    query = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?&location='+str(latitude)+','+str(longitude)+'&type=restaurant&rankby=distance'
    response = requests.get(query + '&key='+G_API_KEY)
    pages.append(response)
    while 'next_page_token' in response.json().keys():
        time.sleep(2) # Accounts for API's delay in processing next page tokens
        newquery = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?&pagetoken=' + response.json()['next_page_token']
        response = requests.get(newquery + '&key='+G_API_KEY)
        pages.append(response)
    nearby_restaurants.append(pages)

In [264]:
flattened_restaurants = [] # Flattening multiple page responses
for i in nearby_restaurants:
    temp = []
    for j in i:
        temp.extend(j.json()['results'])
    flattened_restaurants.append(temp)

### Saving and Importing Query Data

In [441]:
# Logging restaurant queries as json
import json
for i in range(6):
    file_name = locations.Name[i]
    with open(file_name + '.json', 'w') as fp:
        data = []
        for j in range(len(nearby_restaurants[i])):
            data.append(nearby_restaurants[i][j].json())
        json.dump(data, fp)
    fp.close()

In [336]:
# Loading restaurant queries as dicts from json
import json
loaded_flattened_restaurants = []
for i in locations.Name:
    with open(i+'.json', 'r') as fp:
        unflat_data = json.load(fp)
        temp = []
        for j in unflat_data:
            temp.extend(j['results'])
        loaded_flattened_restaurants.append(temp)

## Building DataFrame and Resolving Missing Values

In [399]:
restaurants = pd.DataFrame(columns=['Name', 'Rating', 'Num_Ratings', 'Price', 'Id', 'Status', 'AscLoc', 'BusTypes'])
for idx, loc in enumerate(flattened_restaurants):
    for place in loc:
        place_info = {'Name':place['name'], 'Rating':place['rating'] if 'rating' in place.keys() else np.nan, 'Num_Ratings':place['user_ratings_total'] if 'user_ratings_total' in place.keys() else np.nan, 'Price':place['price_level'] if 'price_level' in place.keys() else np.nan, 'Id':place['place_id'], 'Status':place['business_status'] if 'business_status' in place.keys() else 'null', 'AscLoc':locations.Name[idx], 'BusTypes':place['types']}
        restaurants = restaurants.append(place_info, ignore_index=True)
print(restaurants.head())

                                                Name  Rating Num_Ratings  Price                           Id              Status AscLoc                                           BusTypes
0                     Café Soleil at Eskenazi Health     4.6          37    NaN  ChIJ3Z7ruzdXa4gRNsIfdlPQ5R4  CLOSED_TEMPORARILY  IUPUI  [restaurant, food, point_of_interest, establis...
1                                    Farmer's Fridge     4.5           2    NaN  ChIJtXOEuipXa4gRN_AKkdHuXWI         OPERATIONAL  IUPUI  [restaurant, food, point_of_interest, establis...
2  Ingram Micro Mobility Marketplace at Eskenazi ...     4.6           7    NaN  ChIJoawzbTdXa4gRklpVautN-Ko         OPERATIONAL  IUPUI  [restaurant, food, point_of_interest, establis...
3                                    Farmer's Fridge     NaN         NaN    NaN  ChIJXWWxhStXa4gRLaXQ8DswcMw         OPERATIONAL  IUPUI  [restaurant, food, point_of_interest, establis...
4                                 Boar's Head Bistro     NaN     

In [400]:
print(restaurants.groupby('AscLoc').count()) # Assuring roughly equal # of restaurants and inspecting missing values
print(restaurants.Status.unique()) # Assuring no permanently closed establishments

                      Name  Rating  Num_Ratings  Price  Id  Status  BusTypes
AscLoc                                                                      
IUPUI                   60      53           53     32  60      60        60
Industrious Mass Ave    60      57           57     38  60      60        60
Launch Fishers          60      57           57     44  60      60        60
Launch Indy             60      55           55     39  60      60        60
The Speak Easy          60      57           57     42  60      60        60
zWORKS                  60      59           59     48  60      60        60
['CLOSED_TEMPORARILY' 'OPERATIONAL']


In [401]:
# Imputing missing values using column averages of each group
restaurants['Rating'] = restaurants.groupby('AscLoc').Rating.transform(lambda x: x.fillna(x.mean()))
restaurants['Price'] = restaurants.groupby('AscLoc').Price.transform(lambda x: x.fillna(x.mean()))
restaurants['Num_Ratings'] = restaurants.groupby('AscLoc').Num_Ratings.transform(lambda x: x.fillna(x.mean()))
print(restaurants.groupby('AscLoc').count()) # Assuring no missing values

                      Name  Rating  Num_Ratings  Price  Id  Status  BusTypes
AscLoc                                                                      
IUPUI                   60      60           60     60  60      60        60
Industrious Mass Ave    60      60           60     60  60      60        60
Launch Fishers          60      60           60     60  60      60        60
Launch Indy             60      60           60     60  60      60        60
The Speak Easy          60      60           60     60  60      60        60
zWORKS                  60      60           60     60  60      60        60


In [402]:
locations['Avg Price'] = round(restaurants.groupby('AscLoc').Price.mean(), 2).values.tolist()
locations['Price Variance'] = round(restaurants.groupby('AscLoc').Price.var(), 2).values.tolist()
locations['Avg Rating'] = round(restaurants.groupby('AscLoc').Rating.mean(), 2).values.tolist()
locations['Rating Variance'] = round(restaurants.groupby('AscLoc').Rating.var(), 2).values.tolist()
print(locations)

                   Name                                            Address       Type                   geometry  distance(km)  Avg Price  Price Variance  Avg Rating  Rating Variance  Avg Open  \
0                 IUPUI        415 Porto Alegre St, Indianapolis, IN 46202    Housing    [39.77389, -86.1849117]          0.00       1.25            0.10        4.10             0.40      9.73   
1  Industrious Mass Ave  350 Massachusetts Ave Suite 300, Indianapolis,...  Coworking    [39.765831, -86.157617]          2.50       1.97            0.63        4.14             0.47     11.86   
2        Launch Fishers             12175 Visionary Way, Fishers, IN 46038  Coworking  [39.9667636, -86.0088088]         26.19       1.48            0.22        4.06             0.46      9.71   
3           Launch Indy          525 S Meridian St, Indianapolis, IN 46225  Coworking  [39.7593486, -86.1580467]          2.81       1.90            0.54        4.12             0.42     12.17   
4        The Speak E

In [403]:
# Removing less useful type descriptors
types_to_remove = ['point_of_interest', 'establishment', 'restaurant', 'food', 'meal_takeaway']

def prune_types(existing):
    final = []
    for i in existing:
        if i not in types_to_remove:
            final.append(i)
    return final

restaurants.BusTypes = restaurants.BusTypes.map(prune_types)

In [404]:
# Inspecting most frequent business types by area
all_unique = []
for loc in restaurants.AscLoc.unique():
    unique = {}
    for i in restaurants[restaurants.AscLoc == loc].BusTypes:
        for type in i:
            unique[type] = unique.get(type, 0) + 1
            if type not in all_unique:
                all_unique.append(type)
    freqs = []
    for i in unique.keys():
        freqs.append((i, unique[i]))
    freqs.sort(key = lambda x: x[1], reverse=True)
    print(loc, freqs, '\n')

print(all_unique)

IUPUI [('store', 5), ('bar', 3), ('meal_delivery', 3), ('cafe', 1)] 

Industrious Mass Ave [('bar', 23), ('meal_delivery', 5), ('cafe', 3), ('store', 3), ('night_club', 2), ('bowling_alley', 1), ('bakery', 1), ('casino', 1), ('liquor_store', 1)] 

Launch Fishers [('store', 12), ('bar', 10), ('cafe', 7), ('meal_delivery', 4), ('convenience_store', 3), ('atm', 3), ('gas_station', 3), ('finance', 3), ('health', 3), ('car_wash', 2), ('bakery', 1), ('school', 1), ('night_club', 1)] 

Launch Indy [('bar', 26), ('store', 6), ('cafe', 3), ('meal_delivery', 3), ('night_club', 2), ('bakery', 1), ('bowling_alley', 1)] 

The Speak Easy [('bar', 17), ('store', 8), ('meal_delivery', 2), ('cafe', 2), ('bakery', 2), ('grocery_or_supermarket', 1), ('liquor_store', 1), ('gas_station', 1), ('convenience_store', 1)] 

zWORKS [('store', 11), ('bar', 8), ('cafe', 6), ('bakery', 2), ('meal_delivery', 2), ('gas_station', 1)] 

['cafe', 'store', 'bar', 'meal_delivery', 'bowling_alley', 'night_club', 'bakery', 

## Gathering info on hours

In [357]:
#https://maps.googleapis.com/maps/api/place/details/json?place_id=?fields=opening_hours
def time_format(time):
    return time[:2] + ':' + time[2:]

# Collecting arrays of opening and closing times for each restaurant in dataframe
hours = []
for i in restaurants.Id:
    query = 'https://maps.googleapis.com/maps/api/place/details/json?place_id=' + i + '&fields=opening_hours&key=' + G_API_KEY
    response = requests.get(query)
    if 'opening_hours' in response.json()['result'].keys() and 'periods' in response.json()['result']['opening_hours'].keys():
        periods = response.json()['result']['opening_hours']['periods']
        open_close = [[time_format(i['open']['time']), time_format(i['close']['time']) if 'close' in i.keys() else 'none'] for i in periods]
        hours.append(open_close)
    else:
        hours.append([['none', 'none']] * 7)
restaurants['Hours'] = hours

In [381]:
import math
# Defining function to calculate average close given a series of closing times for single restaurant
def avg_close(hours):
    running_sum = 0
    for i in hours:
        close = i[1]
        if close == 'none':
            if i[0] == 'none':
                return np.nan
            else:
                return 30.0 # This case represents all-night hours; for simplicity's sake replacing with an early morning close. Should not be an issue given opening time is not considered here.
        hour = close[:2]
        minute = math.trunc(int(close[3:]) / .6)
        close_time = float(hour + '.' + str(minute))
        if close_time < 6: # Assume closing late night
            close_time += 24
        running_sum += close_time
    running_sum /= len(hours)
    return running_sum

# Defining function to calculate average open given a series of opening times for a single restaurant
def avg_open(hours):
    running_sum = 0
    for i in hours:
        open = i[0]
        if open == 'none':
            return np.nan
        hour = open[:2]
        minute = math.trunc(int(open[3:]) / .6)
        open_time = float(hour + '.' + str(minute))
        running_sum += open_time
    running_sum /= len(hours)
    return running_sum

In [405]:
restaurants['Hours'] = hours
restaurants['Avg_Close'] = restaurants.Hours.map(avg_close)
restaurants['Avg_Open'] = restaurants.Hours.map(avg_open)
print(restaurants[['Name', 'Hours', 'Avg_Open', 'Avg_Close']][60:70])
print(restaurants.groupby('AscLoc').count()) # Introduced new cols which have missing values

                                          Name                                              Hours   Avg_Open  Avg_Close
60                  The Oceanaire Seafood Room  [[17:00, 21:00], [17:00, 21:00], [17:00, 21:00...  17.000000  21.285714
61                 Pearl Street Pizzeria & Pub  [[12:00, 02:00], [11:00, 02:00], [11:00, 02:00...  11.285714  26.000000
62                                    Tin Roof  [[11:00, 03:00], [16:00, 03:00], [16:00, 03:00...  14.333333  27.000000
63           O'Reilly's Irish Bar & Restaurant  [[11:00, 00:00], [11:00, 00:00], [11:00, 00:00...  11.000000  24.000000
64                                        Nada  [[17:00, 22:00], [17:00, 22:00], [17:00, 22:00...  17.000000  22.000000
65                          Buffalo Wild Wings  [[11:00, 22:00], [11:00, 22:00], [11:00, 22:00...  11.000000  22.285714
66                          Gallery Pastry Bar  [[08:00, 16:00], [08:00, 16:00], [08:00, 16:00...   8.000000  16.000000
67                 Prime 47- Indy's Stea

In [406]:
# Imputing missing values using column averages of each group
restaurants['Avg_Close'] = restaurants.groupby('AscLoc').Avg_Close.transform(lambda x: x.fillna(x.mean()))
restaurants['Avg_Open'] = restaurants.groupby('AscLoc').Avg_Open.transform(lambda x: x.fillna(x.mean()))
print(restaurants.groupby('AscLoc').count()) # All values present, can now analyze by location

                      Name  Rating  Num_Ratings  Price  Id  Status  BusTypes  Hours  Avg_Close  Avg_Open
AscLoc                                                                                                  
IUPUI                   60      60           60     60  60      60        60     60         60        60
Industrious Mass Ave    60      60           60     60  60      60        60     60         60        60
Launch Fishers          60      60           60     60  60      60        60     60         60        60
Launch Indy             60      60           60     60  60      60        60     60         60        60
The Speak Easy          60      60           60     60  60      60        60     60         60        60
zWORKS                  60      60           60     60  60      60        60     60         60        60


In [407]:
locations['Avg Open'] = round(restaurants.groupby('AscLoc').Avg_Open.mean(), 2).values.tolist()
locations['Avg Close'] = round(restaurants.groupby('AscLoc').Avg_Close.mean(), 2).values.tolist()
print(locations[['Name', 'Avg Open', 'Avg Close']])
# As anticipated, Industrious Mass and Launch Indy have the two latest average closing times at roughly 9:08 and 9:30, respectively.
# Average Open times show that Launch Fishers and zWORKS tend to have earlier hours, but this is likely due to the frequency of convenience stores and gas stations as opposed to restaurants and bars.

                   Name  Avg Open  Avg Close
0                 IUPUI      9.73      21.36
1  Industrious Mass Ave     11.86      21.14
2        Launch Fishers      9.71      21.00
3           Launch Indy     12.17      21.50
4        The Speak Easy     11.51      20.83
5                zWORKS     10.05      20.56


## Investigating Other Nearby Businesses

In [391]:
nearby_tourism = []
import time

for i in locations.geometry:
    pages = []
    latitude = i[0]
    longitude = i[1]
    query = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?&location=' + str(latitude) + ',' + str(
        longitude) + '&type=tourist_attraction&rankby=distance'
    response = requests.get(query + '&key=' + G_API_KEY)
    pages.append(response)
    while 'next_page_token' in response.json().keys():
        time.sleep(2)  # Accounts for API's delay in processing next page tokens
        newquery = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?&pagetoken=' + response.json()[
            'next_page_token']
        response = requests.get(newquery + '&key=' + G_API_KEY)
        pages.append(response)
    nearby_tourism.append(pages)

In [392]:
flattened_tourism = []  # Flattening multiple page responses
for i in nearby_tourism:
    temp = []
    for j in i:
        temp.extend(j.json()['results'])
    flattened_tourism.append(temp)

## Logging and Loading Tourism Query Data

In [442]:
# Logging restaurant queries as json
import json

for i in range(6):
    file_name = locations.Name[i]
    with open(file_name + '_Tourism.json', 'w') as fp:
        data = []
        for j in range(len(nearby_tourism[i])):
            data.append(nearby_tourism[i][j].json())
        json.dump(data, fp)
    fp.close()

In [None]:
# Loading restaurant queries as dicts from json
import json

loaded_flattened_tourism = []
for i in locations.Name:
    with open(i + '_Tourism.json', 'r') as fp:
        unflat_data = json.load(fp)
        temp = []
        for j in unflat_data:
            temp.extend(j['results'])
        loaded_flattened_tourism.append(temp)

## Building Tourism Dataframe

In [410]:
tourism = pd.DataFrame(columns=['Name', 'Rating', 'Num_Ratings', 'Price', 'Id', 'Status', 'AscLoc', 'BusTypes'])
for idx, loc in enumerate(flattened_tourism):
    for place in loc:
        place_info = {'Name':place['name'], 'Rating':place['rating'] if 'rating' in place.keys() else np.nan, 'Num_Ratings':place['user_ratings_total'] if 'user_ratings_total' in place.keys() else np.nan, 'Price':place['price_level'] if 'price_level' in place.keys() else np.nan, 'Id':place['place_id'], 'Status':place['business_status'] if 'business_status' in place.keys() else 'null', 'AscLoc':locations.Name[idx], 'BusTypes':place['types']}
        tourism = tourism.append(place_info, ignore_index=True)
print(tourism.head())

                                          Name  Rating Num_Ratings  Price                           Id              Status AscLoc                                           BusTypes
0                                  Ball Garden     4.7           3    NaN  ChIJ335SSTZXa4gRYDAKVmjGMD4         OPERATIONAL  IUPUI  [park, tourist_attraction, point_of_interest, ...
1                              Stringtown Park     4.1          77    NaN  ChIJuasM5UNXa4gRPY5xO4OAH8M         OPERATIONAL  IUPUI  [park, tourist_attraction, point_of_interest, ...
2  Simon Skjodt International Orangutan Center     4.8         172    NaN  ChIJOUBHmU5Xa4gRVKTCu4btNaI         OPERATIONAL  IUPUI  [tourist_attraction, point_of_interest, establ...
3                             Indianapolis Zoo     4.5       14979    NaN  ChIJK1-_KkxXa4gRazRkyTLNKBw         OPERATIONAL  IUPUI  [zoo, aquarium, tourist_attraction, point_of_i...
4                          White River Gardens     4.7         876    NaN  ChIJK1-_KkxXa4gRw-A9

In [411]:
print(tourism.groupby('AscLoc').count()) # Is missing a lot of values. Also evident that this type parameter does not contain price.

                      Name  Rating  Num_Ratings  Price  Id  Status  BusTypes
AscLoc                                                                      
IUPUI                   60      57           57      0  60      60        60
Industrious Mass Ave    54      51           51      0  54      54        54
Launch Fishers          26      26           26      0  26      26        26
Launch Indy             49      46           46      0  49      49        49
The Speak Easy          38      38           38      0  38      38        38
zWORKS                  12      12           12      0  12      12        12


In [425]:
# Getting rid of empty price column
tourism.drop(columns=['Price'], inplace=True)

# Imputing missing values using column averages of each group
tourism['Rating'] = tourism.groupby('AscLoc').Rating.transform(lambda x: x.fillna(x.mean()))
tourism['Num_Ratings'] = tourism.groupby('AscLoc').Num_Ratings.transform(lambda x: x.fillna(x.mean()))
print(tourism.groupby('AscLoc').count())

                      Name  Rating  Num_Ratings  Id  Status  BusTypes
AscLoc                                                               
IUPUI                   60      60           60  60      60        60
Industrious Mass Ave    54      54           54  54      54        54
Launch Fishers          26      26           26  26      26        26
Launch Indy             49      49           49  49      49        49
The Speak Easy          38      38           38  38      38        38
zWORKS                  12      12           12  12      12        12


In [439]:
locations['Avg Tourism Rating'] = round(tourism.groupby('AscLoc').Rating.mean(), 2).values.tolist()
locations['Num Attractions'] = tourism.groupby('AscLoc').Rating.count().values.tolist()
print(locations) # Average Rating doesn't say anything. It is likely dominated by the amount of parks. Instead we'll look at general diversity next.

                  Name              Address       Type             geometry  distance(km)  Avg Price  Price Variance  Avg Rating  Rating Variance  Avg Open  Avg Close  Avg Tourism Rating  \
0                IUPUI  415 Porto Alegre...    Housing  [39.77389, -86.1...          0.00       1.25            0.10        4.10             0.40      9.73      21.36                4.46   
1  Industrious Mass...  350 Massachusett...  Coworking  [39.765831, -86....          2.50       1.97            0.63        4.14             0.47     11.86      21.14                4.55   
2       Launch Fishers  12175 Visionary ...  Coworking  [39.9667636, -86...         26.19       1.48            0.22        4.06             0.46      9.71      21.00                4.57   
3          Launch Indy  525 S Meridian S...  Coworking  [39.7593486, -86...          2.81       1.90            0.54        4.12             0.42     12.17      21.50                4.53   
4       The Speak Easy  5255 Winthrop Av...  Cowor

### Investigating Types of Tourist Attractions and Relative Variety

In [419]:
types_to_remove = ['point_of_interest', 'establishment', 'restaurant', 'food', 'tourist_attraction', 'local_government_office', 'electronics_store', 'home_goods_store', 'lawyer', 'bar', 'school', 'parking', 'funeral_home', 'cemetery', 'store'] # Not interested in this information at the moment

def prune_types(existing):
    final = []
    for i in existing:
        if i not in types_to_remove:
            final.append(i)
    return final

tourism.BusTypes = tourism.BusTypes.map(prune_types)

In [421]:
all_unique = []
for loc in tourism.AscLoc.unique():
    unique = {}
    for i in tourism[tourism.AscLoc == loc].BusTypes:
        for type in i:
            unique[type] = unique.get(type, 0) + 1
            if type not in all_unique:
                all_unique.append(type)
    freqs = []
    for i in unique.keys():
        freqs.append((i, unique[i]))
    freqs.sort(key = lambda x: x[1], reverse=True)
    print(loc, freqs, '\n')

IUPUI [('park', 27), ('museum', 11), ('church', 3), ('place_of_worship', 3), ('art_gallery', 2), ('library', 2), ('zoo', 1), ('aquarium', 1), ('travel_agency', 1)] 

Industrious Mass Ave [('park', 20), ('museum', 10), ('church', 5), ('place_of_worship', 5), ('art_gallery', 2), ('library', 2), ('travel_agency', 1)] 

Launch Fishers [('park', 20), ('museum', 2), ('amusement_park', 1)] 

Launch Indy [('park', 18), ('museum', 8), ('church', 5), ('place_of_worship', 5), ('art_gallery', 2), ('library', 2), ('travel_agency', 1)] 

The Speak Easy [('park', 34), ('museum', 2), ('amusement_park', 1), ('church', 1), ('place_of_worship', 1)] 

zWORKS [('park', 10), ('museum', 1)] 



# Final DataFrames- Locations, Restaurants, and Tourist Attractions

In [440]:
print(locations)

                  Name              Address       Type             geometry  distance(km)  Avg Price  Price Variance  Avg Rating  Rating Variance  Avg Open  Avg Close  Avg Tourism Rating  \
0                IUPUI  415 Porto Alegre...    Housing  [39.77389, -86.1...          0.00       1.25            0.10        4.10             0.40      9.73      21.36                4.46   
1  Industrious Mass...  350 Massachusett...  Coworking  [39.765831, -86....          2.50       1.97            0.63        4.14             0.47     11.86      21.14                4.55   
2       Launch Fishers  12175 Visionary ...  Coworking  [39.9667636, -86...         26.19       1.48            0.22        4.06             0.46      9.71      21.00                4.57   
3          Launch Indy  525 S Meridian S...  Coworking  [39.7593486, -86...          2.81       1.90            0.54        4.12             0.42     12.17      21.50                4.53   
4       The Speak Easy  5255 Winthrop Av...  Cowor

In [437]:
print(restaurants)

                    Name    Rating  Num_Ratings     Price                   Id              Status  AscLoc         BusTypes                Hours  Avg_Close   Avg_Open
0    Café Soleil at E...  4.600000    37.000000  1.250000  ChIJ3Z7ruzdXa4gR...  CLOSED_TEMPORARILY   IUPUI               []  [[none, none], [...  21.363324   9.732601
1        Farmer's Fridge  4.500000     2.000000  1.250000  ChIJtXOEuipXa4gR...         OPERATIONAL   IUPUI               []      [[00:00, none]]  30.000000   0.000000
2    Ingram Micro Mob...  4.600000     7.000000  1.250000  ChIJoawzbTdXa4gR...         OPERATIONAL   IUPUI               []  [[06:00, 02:00],...  26.000000   6.000000
3        Farmer's Fridge  4.098113   286.943396  1.250000  ChIJXWWxhStXa4gR...         OPERATIONAL   IUPUI               []      [[00:00, none]]  30.000000   0.000000
4     Boar's Head Bistro  4.098113   286.943396  1.250000  ChIJHznwGgtXa4gR...         OPERATIONAL   IUPUI               []  [[none, none], [...  21.363324   9.73260

In [438]:
print(tourism)

                    Name  Rating  Num_Ratings                   Id              Status  AscLoc         BusTypes
0            Ball Garden     4.7          3.0  ChIJ335SSTZXa4gR...         OPERATIONAL   IUPUI           [park]
1        Stringtown Park     4.1         77.0  ChIJuasM5UNXa4gR...         OPERATIONAL   IUPUI           [park]
2    Simon Skjodt Int...     4.8        172.0  ChIJOUBHmU5Xa4gR...         OPERATIONAL   IUPUI               []
3       Indianapolis Zoo     4.5      14979.0  ChIJK1-_KkxXa4gR...         OPERATIONAL   IUPUI  [zoo, aquarium]
4    White River Gardens     4.7        876.0  ChIJK1-_KkxXa4gR...  CLOSED_TEMPORARILY   IUPUI           [park]
..                   ...     ...          ...                  ...                 ...     ...              ...
234      Mulberry Fields     4.7        187.0  ChIJQcQU96dVE4gR...         OPERATIONAL  zWORKS           [park]
235  Jill Perelman Pa...     4.8         17.0  ChIJf0_cYIirFIgR...         OPERATIONAL  zWORKS          