## Tastescape Score

- Data Source:
  - `restaurants`
  - `coffee_shops`
  - `supermarkets`

In [4]:
import pandas as pd

# restaurants
restaurants = pd.read_csv('../../data_cleaned/assets/restaurants.csv')
restaurant_count = restaurants['zip_code'].value_counts()
print("restaurant:")
print("  mean:", restaurant_count.mean())
print("  median:", restaurant_count.median())
print("  max:", restaurant_count.max())

# coffee shops
coffee_shops = pd.read_csv('../../data_cleaned/assets/coffee_shops.csv')
coffee_shop_count = coffee_shops['zip_code'].value_counts()
print("coffee shop:")
print("  mean:", coffee_shop_count.mean())
print("  median:", coffee_shop_count.median())
print("  max:", coffee_shop_count.max())

# supermarkets
supermarkets = pd.read_csv('../../data_cleaned/assets/supermarkets.csv')
supermarket_count = supermarkets['zip_code'].value_counts()
print("supermarket:")
print("  mean:", supermarket_count.mean())
print("  median:", supermarket_count.median())
print("  max:", supermarket_count.max())

restaurant:
  mean: 35.67272727272727
  median: 23.5
  max: 246
coffee shop:
  mean: 3.7083333333333335
  median: 3.0
  max: 18
supermarket:
  mean: 2.3793103448275863
  median: 2.0
  max: 8


#### Tastescape Score Calculation

- According to the number, we find out a good scale of the combination of the amount of restaurants, coffee shops, and supermarkets in the community, that is 30:3:2
- The score is calculated as follows:
  - `Tastescape Score = (restaurants + coffee_shops * 10 + supermarkets * 5) / 30`
  - We need to normalize the score to 0-100, so we need to find the max and min of the score in the dataset, and calculate the final score

In [5]:
# create a new dataframe with the counts of each zip code
zip_code_counts = pd.DataFrame({
    'zip_code': restaurant_count.index,
    'restaurant_count': restaurant_count.values,
    'coffee_shop_count': coffee_shop_count.reindex(restaurant_count.index, fill_value=0).values,
    'supermarket_count': supermarket_count.reindex(restaurant_count.index, fill_value=0).values
})
# print head 10 of the new dataframe
print(zip_code_counts.head(10))

   zip_code  restaurant_count  coffee_shop_count  supermarket_count
0     15222               246                 18                  4
1     15212               213                  6                  3
2     15213               157                 10                  1
3     15219               153                  7                  0
4     15237               139                  9                  8
5     15203               133                  5                  1
6     15146               128                  4                  7
7     15205               125                  5                  6
8     15108               109                  6                  4
9     15201                87                  1                  2


In [6]:
# calculate the score for each zip code
zip_code_counts['score'] = (zip_code_counts['restaurant_count'] +
                             zip_code_counts['coffee_shop_count'] * 10 +
                             zip_code_counts['supermarket_count'] * 15) / 30

# normalize the score to be between 0 and 1
zip_code_counts['score'] = (zip_code_counts['score'] - zip_code_counts['score'].min()) / (zip_code_counts['score'].max() - zip_code_counts['score'].min())
# sort the dataframe by score in descending order
zip_code_counts = zip_code_counts.sort_values(by='score', ascending=False)
# print head 10 of the new dataframe
print(zip_code_counts.head(10))

# save the new dataframe to a csv file
tastescape_scores = zip_code_counts[['zip_code', 'score']]
print(tastescape_scores.head(10))
tastescape_scores.to_csv('../../data_score/tastescape_scores.csv', index=False)

    zip_code  restaurant_count  coffee_shop_count  supermarket_count     score
0      15222               246                 18                  4  1.000000
4      15237               139                  9                  8  0.717526
1      15212               213                  6                  3  0.653608
6      15146               128                  4                  7  0.560825
2      15213               157                 10                  1  0.558763
7      15205               125                  5                  6  0.544330
10     15206                83                 10                  4  0.498969
8      15108               109                  6                  4  0.470103
12     15217                76                  9                  4  0.463918
3      15219               153                  7                  0  0.457732
    zip_code     score
0      15222  1.000000
4      15237  0.717526
1      15212  0.653608
6      15146  0.560825
2      15213  0.