# The Michelin Guide #

Import modules

In [1]:
import pandas as pd

Initial inspection

In [4]:
michelin = pd.read_csv('michelin_my_maps.csv')
print(f"Columns: {michelin.columns.tolist()}\n")
print(f"Head:\n{michelin.head()}")

Columns: ['Name', 'Address', 'Location', 'Price', 'Cuisine', 'Longitude', 'Latitude', 'PhoneNumber', 'Url', 'WebsiteUrl', 'Award', 'FacilitiesAndServices']

Head:
            Name                                        Address  \
0         Quince    470 Pacific Ave., San Francisco, 94133, USA   
1  Atelier Crenn   3127 Fillmore St., San Francisco, 94123, USA   
2        Addison  5200 Grand Del Mar Way, San Diego, 92130, USA   
3        Manresa         320 Village Ln., Los Gatos, 95030, USA   
4           Benu    22 Hawthorne St., San Francisco, 94105, USA   

             Location Price                    Cuisine   Longitude   Latitude  \
0  San Francisco, USA  $$$$  Contemporary, Californian -122.403261  37.797505   
1  San Francisco, USA  $$$$       Contemporary, French -122.435860  37.798350   
2      San Diego, USA  $$$$  Contemporary, Californian -117.198891  32.941297   
3      Los Gatos, USA  $$$$  Contemporary, Californian -121.980710  37.227610   
4  San Francisco, USA  $$$$  

I will lose the 'FacilitiesAndServices column.

'WebsiteUrl' doesn't seem particularly useful but will keep it for now

In [5]:
michelin = michelin[['Name', 'Address', 'Location', 'Price', 'Cuisine',
                     'Longitude', 'Latitude', 'PhoneNumber', 'Url', 'WebsiteUrl', 'Award']]
print(f"\nNew Head:\n{michelin.head()}")


New Head:
            Name                                        Address  \
0         Quince    470 Pacific Ave., San Francisco, 94133, USA   
1  Atelier Crenn   3127 Fillmore St., San Francisco, 94123, USA   
2        Addison  5200 Grand Del Mar Way, San Diego, 92130, USA   
3        Manresa         320 Village Ln., Los Gatos, 95030, USA   
4           Benu    22 Hawthorne St., San Francisco, 94105, USA   

             Location Price                    Cuisine   Longitude   Latitude  \
0  San Francisco, USA  $$$$  Contemporary, Californian -122.403261  37.797505   
1  San Francisco, USA  $$$$       Contemporary, French -122.435860  37.798350   
2      San Diego, USA  $$$$  Contemporary, Californian -117.198891  32.941297   
3      Los Gatos, USA  $$$$  Contemporary, Californian -121.980710  37.227610   
4  San Francisco, USA  $$$$        Asian, Contemporary -122.399060  37.785376   

    PhoneNumber                                                Url  \
0  1.415776e+10  https://guid

Will convert all columns to lowercase

In [6]:
michelin.columns = michelin.columns.str.lower()
print(f"\nNew Columns: {michelin.columns.tolist()}\n")


New Columns: ['name', 'address', 'location', 'price', 'cuisine', 'longitude', 'latitude', 'phonenumber', 'url', 'websiteurl', 'award']



Rename

In [7]:
michelin.rename({'phonenumber': 'phone_number', 'websiteurl': 'website_url'}, axis=1, inplace=True)
print(f"\nNew Columns: {michelin.columns.tolist()}\n")


New Columns: ['name', 'address', 'location', 'price', 'cuisine', 'longitude', 'latitude', 'phone_number', 'url', 'website_url', 'award']



I'd like to see the range unique values in each column & the number of said values
Not interested in 'cuisine' for now

In [9]:
for i in michelin:
    if i == 'cuisine':
        pass
    else:
        print(f"\nUnique {i}s: {michelin[i].unique()}.\nTotal unique = {len(michelin[i].unique())} values")


Unique names: ['Quince' 'Atelier Crenn' 'Addison' ... 'Zazie' 'Butchery & Wine'
 'alewino'].
Total unique = 6637 values

Unique addresss: ['470 Pacific Ave., San Francisco, 94133, USA'
 '3127 Fillmore St., San Francisco, 94123, USA'
 '5200 Grand Del Mar Way, San Diego, 92130, USA' ...
 'ul. Józefa 34, Cracow, 32 056, Poland'
 'ul. Żurawia 22, Warsaw, 00 515, Poland'
 'ul. Mokotowska 48, Warsaw, 00 543, Poland'].
Total unique = 6666 values

Unique locations: ['San Francisco, USA' 'San Diego, USA' 'Los Gatos, USA' ...
 'Almuñécar, Spain' 'Vitoria-Gasteiz, Spain' 'Warsaw, Poland'].
Total unique = 2668 values

Unique prices: ['$$$$' '€€€€' '¥¥¥¥' '₩₩₩₩' '¥¥¥' '££££' '$$$' '฿฿฿฿' '€€€' '₺₺₺₺' '$$'
 '¥¥' nan '฿฿฿' '฿฿' '€€' '¥' '₩₩₩' '₺₺' '₺₺₺' '$' '£££' '££' '€' '฿' '₩'
 '₩₩' '₺' '£'].
Total unique = 29 values

Unique longitudes: [-122.4032607 -122.43586   -117.1988908 ...   19.9469489   21.0154954
   21.022057 ].
Total unique = 6776 values

Unique latitudes: [37.7975054 37.79835   32.9412

#### Next task is to separate the 'location' column into; ####
- 'country' column. eg, USA
- 'city' column. eg, San Francisco
- Try and retain the postal code and attach it to the main address

This will good practice of the regex principles that I have just learned (If nothing else)

In [10]:
location_split = michelin['location'].str.split(', ')

michelin["city"] = location_split.str.get(0)
michelin['country'] = location_split.str.get(1)

print(f"\nNew Columns: {michelin.columns.tolist()}\n")


New Columns: ['name', 'address', 'location', 'price', 'cuisine', 'longitude', 'latitude', 'phone_number', 'url', 'website_url', 'award', 'city', 'country']



In [11]:
michelin = michelin[['name', 'city', 'country', 'address', 'price', 'cuisine', 'longitude', 'latitude', 'phone_number', 'url', 'website_url', 'award', ]]
print(f"\nNew Columns: {michelin.columns.tolist()}\n")


New Columns: ['name', 'city', 'country', 'address', 'price', 'cuisine', 'longitude', 'latitude', 'phone_number', 'url', 'website_url', 'award']



In [12]:
print(f"\nUnique countries: {michelin['country'].unique()}.\nTotal unique = {len(michelin['country'].unique())} values\n")
print(f"\nUnique cities: {michelin['city'].unique()}.\nTotal unique = {len(michelin['city'].unique())} values")


Unique countries: ['USA' 'Spain' 'Japan' 'Italy' 'Switzerland' 'South Korea'
 'Taipei & Taichung' nan 'Norway' 'Denmark' 'Sweden' 'Netherlands'
 'Belgium' 'France' 'Germany' 'United Kingdom' 'Austria' 'China Mainland'
 'Thailand' 'Portugal' 'Hungary' 'Türkiye' 'Greece' 'Canada' 'Slovenia'
 'Finland' 'Luxembourg' 'Ireland' 'Brazil' 'Malaysia' 'Andorra' 'Iceland'
 'Croatia' 'Estonia' 'Malta' 'Czech Republic' 'Poland' 'Serbia'].
Total unique = 38 values


Unique cities: ['San Francisco' 'San Diego' 'Los Gatos' ... 'Almuñécar' 'Vitoria-Gasteiz'
 'Warsaw'].
Total unique = 2665 values


In [29]:
michelin_uk = michelin[michelin['country'] == 'United Kingdom']
print(f"\nUnique cities: {michelin_uk['city'].unique()}.\nTotal unique = {len(michelin_uk['city'].unique())} values\n")

print(f"UK Head: {michelin_uk.head()}")


Unique cities: ['Cartmel' 'Bray' 'London' 'Machynlleth' 'Summerhouse' 'Auchterarder'
 'Great Milton' 'Nottingham' 'Cambridge' 'Aughton' 'Marlow' 'Hunstanton'
 'Anstruther' 'Menai Bridge' 'Lympstone' 'Whitebrook' 'Lower Beeding'
 'Ripley' 'Wall' 'Holt' 'Bridge' 'Dorking' 'Seasalter' 'Morston' 'Fence'
 'Castle Combe' 'Murcott' 'Birkenhead' 'Whatcote' 'South Dalton'
 'Mountsorrel' 'Hambleton' 'Malmesbury' 'Askham' 'Peat Inn' 'Colerne'
 'Skelton' 'Bagshot' 'Aberystwyth' 'Oldstead' 'Hetton' 'Tenbury Wells'
 'Fordwich' 'Saltwood' 'Bury St Edmunds' 'Stratford-on-Avon'
 'Winteringham' 'Ripon' 'Braithwaite' 'Langho' 'East Wallhouses' 'Torquay'
 'Saint Helier' 'Broadstairs' 'Bowness-on-Windermere' 'Knowstone'
 'Port Isaac' 'Harome' 'Llanddewi Skirrid' 'Grasmere' 'Penarth'
 'Isle of Skye' 'Newbury' 'Hampton in Arden' 'Lichfield' 'Amersham'
 'East Grinstead' 'Leith' 'Oxwich' 'Bath' 'Ascot' 'Manchester' 'York'
 'Padstow' 'Birmingham' 'Newcastle Upon Tyne' 'Ambleside' 'Leeds'
 'Belfast' 'Glasgow Ci