## Tastescape Score

- Data Source:
  - `restaurants`
  - `coffee_shops`
  - `supermarkets`

In [11]:
import pandas as pd
import numpy as np

# restaurants
restaurants = pd.read_csv('../../data_cleaned/restaurants.csv')
restaurant_count = restaurants['hood'].value_counts()
print("restaurant:")
print("  mean:", restaurant_count.mean())
print("  median:", restaurant_count.median())
print("  max:", restaurant_count.max())

# coffee shops
coffee_shops = pd.read_csv('../../data_cleaned/coffee_shops.csv')
coffee_shop_count = coffee_shops['hood'].value_counts()
print("coffee shop:")
print("  mean:", coffee_shop_count.mean())
print("  median:", coffee_shop_count.median())
print("  max:", coffee_shop_count.max())

# supermarkets
supermarkets = pd.read_csv('../../data_cleaned/supermarkets.csv')
supermarket_count = supermarkets['hood'].value_counts()
print("supermarket:")
print("  mean:", supermarket_count.mean())
print("  median:", supermarket_count.median())
print("  max:", supermarket_count.max())

restaurant:
  mean: 20.373333333333335
  median: 7.0
  max: 245
coffee shop:
  mean: 3.2962962962962963
  median: 2.0
  max: 22
supermarket:
  mean: 1.4545454545454546
  median: 1.0
  max: 5


#### Tastescape Score Calculation

- According to the number, we find out a good scale of the combination of the amount of restaurants, coffee shops, and supermarkets in the community, that is 7:2:1
- The score is calculated as follows:
  - `Tastescape Score = (restaurants * 2 + coffee_shops * 7 + supermarkets * 14)`
  - We need to normalize the score to 0-100, so we need to find the max and min of the score in the dataset, and calculate the final score

In [7]:
# create a new dataframe with the counts of each zip code
hood_counts = pd.DataFrame({
    'hood': restaurant_count.index,
    'restaurant_count': restaurant_count.values,
    'coffee_shop_count': coffee_shop_count.reindex(restaurant_count.index, fill_value=0).values,
    'supermarket_count': supermarket_count.reindex(restaurant_count.index, fill_value=0).values
})
# print head 10 of the new dataframe
print(hood_counts.head(10))

                        hood  restaurant_count  coffee_shop_count  \
0  Central Business District               245                 22   
1           South Side Flats               132                  5   
2                 Bloomfield                76                  4   
3                North Shore                74                  0   
4            Central Oakland                71                  4   
5             Strip District                67                  1   
6              North Oakland                64                  4   
7                  Troy Hill                59                  0   
8                  Shadyside                58                 11   
9        Squirrel Hill South                58                  7   

   supermarket_count  
0                  0  
1                  1  
2                  2  
3                  0  
4                  0  
5                  5  
6                  1  
7                  0  
8                  3  
9         

In [13]:
# calculate the score for each zip code
hood_counts['score'] = (np.log1p(hood_counts['restaurant_count']) * 2 +
                        np.log1p(hood_counts['coffee_shop_count']) * 7 +
                        np.log1p(hood_counts['supermarket_count']) * 14)

# normalize the score to be between 0 and 1
hood_counts['score'] = (hood_counts['score'] - hood_counts['score'].min()) / (hood_counts['score'].max() - hood_counts['score'].min())
# sort the dataframe by score in descending order
hood_counts = hood_counts.sort_values(by='score', ascending=False)
# print head 10 of the new dataframe
print(hood_counts.head(10))

# save the new dataframe to a csv file
tastescape_scores = hood_counts[['hood', 'score']]
print(tastescape_scores.head(10))
tastescape_scores.to_csv('../../data_score/tastescape_scores.csv', index=False)

                         hood  restaurant_count  coffee_shop_count  \
8                   Shadyside                58                 11   
5              Strip District                67                  1   
9         Squirrel Hill South                58                  7   
2                  Bloomfield                76                  4   
0   Central Business District               245                 22   
1            South Side Flats               132                  5   
6               North Oakland                64                  4   
17               East Liberty                24                  1   
20        Squirrel Hill North                22                  2   
11                    Carrick                31                  0   

    supermarket_count     score  
8                   3  1.000000  
5                   5  0.848940  
9                   2  0.842424  
2                   2  0.779137  
0                   0  0.724625  
1                   1  0.