In [94]:
import pandas as pd
import numpy as np
import re

In [95]:
data = pd.read_csv('Michelin_Details.csv', index_col = 0)

In [96]:
df = data[data['country_alpha2'] == 'US'].copy()
df.reset_index(drop = True, inplace = True)
df.shape

(206, 23)

In [97]:
df['coast'] = pd.cut(df.lon, 2, labels=False)

# Find Restaurant by Subregion:

In [98]:
def has_num(text):
    if re.search(r'[\d]+', text) != None:
        return True
    return False

def has_apt(text):
    if re.search(r'Ste.|Unit', text) != None:
        return True
    return False

In [99]:
re.search(r'[\d]*', 'does this work 1234?')

<re.Match object; span=(0, 0), match=''>

In [100]:
split_address = df['address'].apply(lambda x: x.split(','))
split_address

0      [16573 Ventura Blvd.,  Encino,  91436,  United...
1      [1320 E. 7th St.,  Ste. 126,  Los Angeles,  90...
2      [132 The Embarcadero,  San Francisco,  94101, ...
3                      [5600 CA-1,  Elk,  United States]
4      [5200 Grand Del Mar Way,  San Diego,  92130,  ...
                             ...                        
201    [123 N. Jefferson St.,  Chicago,  60661,  Unit...
202    [2419 W. 14th St.,  Chicago,  60618,  United S...
203    [1466 N. Ashland Ave.,  Chicago,  60662,  Unit...
204    [2610 N. Cannon Dr.,  Chicago,  60614,  United...
205    [933 N. Ashland Ave.,  Chicago,  60622,  Unite...
Name: address, Length: 206, dtype: object

In [101]:
cities = []
for address in split_address:
    temp = address[1]
    if has_apt(temp):
        cities.append(address[2].strip())
    else:
        cities.append(temp.strip())
        
set(cities)

{'Beverly Hills',
 'Brooklyn',
 'Burlingame',
 'Carmel',
 'Chicago',
 'Costa Mesa',
 'Culver City',
 'Elk',
 'Encino',
 'Forestville',
 'Healdsburg',
 'Hollywood',
 'Long Island City',
 'Los Angeles',
 'Los Gatos',
 'Menlo Park',
 'Mountain View',
 'Napa',
 'New York',
 'Oakland',
 'Palo Alto',
 'Rutherford',
 'Sacramento',
 'San Anselmo',
 'San Diego',
 'San Francisco',
 'San Mateo',
 'Santa Monica',
 'Saratoga',
 'St. Helena',
 'Tarrytown',
 'Temple City',
 'Washington',
 'West Los Angeles',
 'Woodside',
 'Yountville'}

In [107]:
df['city'] = cities

# Sub-region

In [108]:
ca_list = df[df['coast'] == 0]
ca_split = pd.cut(ca_list['lat'], bins = 2, labels = ['SC', 'NC'])

In [109]:
ea_list = df[df['coast'] == 1]
ea_split = pd.cut(ea_list.lon, bins = [-90, -80, -75, 0], labels = ['CHI', 'DC', 'NY'])

In [110]:
pd.set_option('display.max_rows', 200)

ea_list[['address', 'lon']]

Unnamed: 0,address,lon
88,"458 W. 17th St., New York, 10011, United States",-74.00633
89,"212 Greene Ave., Brooklyn, 11238, United States",-73.962976
90,"9 W. 53rd St., New York, 10019, United States",-73.97628
91,"295 Grand St., New York, 11211, United States",-73.9575
92,"73 Thompson St., New York, 10012, United States",-74.003006
93,"89 E. 42nd St., New York, 10017, United States",-73.977864
94,"11 Madison Ave., New York, 10010, United States",-73.98712
95,"47 E. Houston St., New York, 10012, United States",-73.9948
96,"381 Lenox Ave., New York, 10027, United States",-73.943726
97,"72 University Pl., New York, 10003, United States",-73.993515


In [111]:
pd.concat([ea_split, ea_list.address], axis = 1)

Unnamed: 0,lon,address
88,NY,"458 W. 17th St., New York, 10011, United States"
89,NY,"212 Greene Ave., Brooklyn, 11238, United States"
90,NY,"9 W. 53rd St., New York, 10019, United States"
91,NY,"295 Grand St., New York, 11211, United States"
92,NY,"73 Thompson St., New York, 10012, United States"
93,NY,"89 E. 42nd St., New York, 10017, United States"
94,NY,"11 Madison Ave., New York, 10010, United States"
95,NY,"47 E. Houston St., New York, 10012, United States"
96,NY,"381 Lenox Ave., New York, 10027, United States"
97,NY,"72 University Pl., New York, 10003, United States"


In [112]:
region = pd.concat([ca_split, ea_split], axis = 0)
df['region'] = region

In [113]:
set(df['region'])

{'CHI', 'DC', 'NC', 'NY', 'SC'}

In [114]:
df.to_csv('US_Michelin.csv')

# San Francisco

In [20]:
import re

In [21]:
sf_data = df['address'].apply(lambda x: re.search('San Francisco', x))
sf_data = sf_data.apply(lambda x: x if x != None else 0)
sf_index = sf_data[sf_data != 0].index

In [22]:
sf_data = df.loc[sf_index].copy()

In [23]:
sf_data

Unnamed: 0,name,address,min_price,max_price,currency,cuisine,description,star,comfort,delightful,...,lon,url,country,country_alpha2,continent_code,USD_min_price,USD_max_price,aggregate_cuisine,coast,region
2,Angler,"132 The Embarcadero, San Francisco, 94101, Uni...",75.0,150.0,USD,CONTEMPORARY,Pitched as a more casual counterpart to the st...,1.0,3.0,0.0,...,-122.39213,https://guide.michelin.com/en/california/san-f...,United States,US,,75.0,150.0,CONTEMPORARY,0,NC
32,Bar Crenn,"3131 Fillmore St., San Francisco, 94101, Unite...",50.0,75.0,USD,FRENCH,It may be located next to Chef Dominique Crenn...,1.0,2.0,0.0,...,-122.43581,https://guide.michelin.com/en/california/san-f...,United States,US,,50.0,75.0,FRENCH,0,NC
34,Birdsong,"1085 Mission St., San Francisco, 94101, United...",75.0,150.0,USD,AMERICAN,"The front windows are stacked with logs, dried...",1.0,2.0,0.0,...,-122.41048,https://guide.michelin.com/en/california/san-f...,United States,US,,75.0,150.0,AMERICAN,0,NC
35,Nico,"710 Montgomery St., San Francisco, 94101, Unit...",50.0,75.0,USD,CONTEMPORARY,"After moving to the Financial District, Chef N...",1.0,2.0,0.0,...,-122.40333,https://guide.michelin.com/en/california/san-f...,United States,US,,50.0,75.0,CONTEMPORARY,0,NC
36,Sorrel,"3228 Sacramento St., San Francisco, 94101, Uni...",50.0,75.0,USD,CALIFORNIAN,"Perched on the edge of Pacific Heights, this s...",1.0,2.0,0.0,...,-122.44614,https://guide.michelin.com/en/california/san-f...,United States,US,,50.0,75.0,CALIFORNIAN,0,NC
38,Kinjo,"2206 Polk St., San Francisco, 94109, United St...",75.0,150.0,USD,JAPANESE,With Chef/co-owner Billy Kong and consultant c...,1.0,2.0,0.0,...,-122.422005,https://guide.michelin.com/en/california/san-f...,United States,US,,75.0,150.0,JAPANESE,0,NC
41,In Situ,"151 Third St., San Francisco, 94103, United St...",75.0,150.0,USD,INTERNATIONAL,Like the SF Museum of Modern Art in which it’s...,1.0,2.0,1.0,...,-122.40113,https://guide.michelin.com/en/california/san-f...,United States,US,,75.0,150.0,FUSION,0,NC
42,Hashiri,"4 Mint Plaza, San Francisco, 94103, United States",75.0,150.0,USD,JAPANESE,This omakase-only sparkler in Mint Plaza may b...,1.0,3.0,0.0,...,-122.40754,https://guide.michelin.com/en/california/san-f...,United States,US,,75.0,150.0,JAPANESE,0,NC
43,Mister Jiu's,"28 Waverly Pl., San Francisco, 94108, United S...",50.0,75.0,USD,CHINESE,Chef/owner Brandon Jew has brought some of the...,1.0,2.0,0.0,...,-122.406654,https://guide.michelin.com/en/california/san-f...,United States,US,,50.0,75.0,CHINESE,0,NC
44,jū-ni,"1335 Fulton St., San Francisco, 94117, United ...",75.0,150.0,USD,JAPANESE,"“J?-ni” is Japanese for “twelve,” which also h...",1.0,2.0,0.0,...,-122.43886,https://guide.michelin.com/en/california/san-f...,United States,US,,75.0,150.0,JAPANESE,0,NC


In [24]:
sf_data.to_csv('SF_Michelin.csv')

# Chicago