## Tastescape Score

- Data Source:
  - `restaurants`
  - `coffee_shops`
  - `supermarkets`

In [11]:
import pandas as pd
import numpy as np

# restaurants
restaurants = pd.read_csv('../../data_cleaned/restaurants.csv')
restaurant_count = restaurants['hood'].value_counts()
print("restaurant:")
print("  mean:", restaurant_count.mean())
print("  median:", restaurant_count.median())
print("  max:", restaurant_count.max())

# coffee shops
coffee_shops = pd.read_csv('../../data_cleaned/coffee_shops.csv')
coffee_shop_count = coffee_shops['hood'].value_counts()
print("coffee shop:")
print("  mean:", coffee_shop_count.mean())
print("  median:", coffee_shop_count.median())
print("  max:", coffee_shop_count.max())

# supermarkets
supermarkets = pd.read_csv('../../data_cleaned/supermarkets.csv')
supermarket_count = supermarkets['hood'].value_counts()
print("supermarket:")
print("  mean:", supermarket_count.mean())
print("  median:", supermarket_count.median())
print("  max:", supermarket_count.max())

restaurant:
  mean: 20.373333333333335
  median: 7.0
  max: 245
coffee shop:
  mean: 3.2962962962962963
  median: 2.0
  max: 22
supermarket:
  mean: 1.4545454545454546
  median: 1.0
  max: 5


#### Tastescape Score Calculation

- According to the number, we find out a good scale of the combination of the amount of restaurants, coffee shops, and supermarkets in the community, that is 7:2:1
- The score is calculated as follows:
  - `Tastescape Score = (restaurants * 2 + coffee_shops * 7 + supermarkets * 14)`
  - We need to normalize the score to 0-100, so we need to find the max and min of the score in the dataset, and calculate the final score

In [14]:
# Union all index
all_hoods = restaurant_count.index.union(
    coffee_shop_count.index
).union(
    supermarket_count.index
)

# create a new dataframe with the counts of each zip code
hood_counts = pd.DataFrame({
    'hood': all_hoods,
    'restaurant_count': restaurant_count.reindex(all_hoods, fill_value=0).values,
    'coffee_shop_count': coffee_shop_count.reindex(all_hoods, fill_value=0).values,
    'supermarket_count': supermarket_count.reindex(all_hoods, fill_value=0).values
})
# print head 10 of the new dataframe
print(hood_counts.head(10))

                hood  restaurant_count  coffee_shop_count  supermarket_count
0   Allegheny Center                 5                  2                  0
1     Allegheny West                12                  2                  0
2          Allentown                 7                  1                  0
3          Arlington                 2                  0                  0
4         Banksville                 8                  2                  1
5  Bedford Dwellings                 1                  0                  0
6          Beechview                14                  0                  1
7        Beltzhoover                 6                  0                  0
8         Bloomfield                76                  4                  2
9              Bluff                20                  2                  0


In [16]:
# calculate the score for each zip code
hood_counts['score'] = (np.log1p(hood_counts['restaurant_count'] * 2) +
                        np.log1p(hood_counts['coffee_shop_count'] * 7) +
                        np.log1p(hood_counts['supermarket_count'] * 14))

# normalize the score to be between 0 and 1
hood_counts['score'] = (hood_counts['score'] - hood_counts['score'].min()) / (hood_counts['score'].max() - hood_counts['score'].min())
# sort the dataframe by score in descending order
hood_counts = hood_counts.sort_values(by='score', ascending=False)
# print head 10 of the new dataframe
print(hood_counts.head(10))

# save the new dataframe to a csv file
tastescape_scores = hood_counts[['hood', 'score']]
print(tastescape_scores.head(10))
tastescape_scores.to_csv('../../data_score/tastescape_scores.csv', index=False)

                         hood  restaurant_count  coffee_shop_count  \
57                  Shadyside                58                 11   
66        Squirrel Hill South                58                  7   
61           South Side Flats               132                  5   
8                  Bloomfield                76                  4   
67             Strip District                67                  1   
15  Central Business District               245                 22   
46              North Oakland                64                  4   
25               East Liberty                24                  1   
65        Squirrel Hill North                22                  2   
12                  Brookline                24                  1   

    supermarket_count     score  
57                  3  1.000000  
66                  2  0.928821  
61                  1  0.914375  
8                   2  0.905355  
67                  5  0.861419  
15                  0  0.