# Clean and Transform SF Bakeries List for Analysis
Clean the starter list of croissant places grabbed from Google Maps. Check if each bakery indeed sells in-house made croissants and operates in San Francisco. Add missing bakeries. Collect croissant price and status as a chain/independent store.

In [1]:
# set up environment
    # import libraries
import pandas as pd
import time
import numpy as np

import re

## Load & Prepare Starter List

In [2]:
# load croissant places dataset from grab_sf_bakeries notebook
croissant_places = pd.read_csv(R"C:\Users\Johnny\Desktop\Data Analytics\Final Projects\sf_croissant_project\croissants\croissant_places_with_phone_numbers_san_francisco.csv")
croissant_places

Unnamed: 0,Name,Address,Website,Phone Number
0,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460
1,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155
2,Butter & Crumble,"271 Francisco St, San Francisco, CA 94133, Uni...",https://butterandcrumble.com/,(415) 874-9484
3,Vive La Tarte,"4026 24th St, San Francisco, CA 94114, United ...",https://www.vivelatarte.com/,(415) 780-5818
4,Tartine Bakery,"600 Guerrero St, San Francisco, CA 94110, Unit...",http://www.tartinebakery.com/,(415) 487-2600
...,...,...,...,...
94,Princess Bakery,"4617 Mission St, San Francisco, CA 94112, Unit...",,(415) 585-9000
95,Mara's Italian Pastries,"503 Columbus Ave, San Francisco, CA 94133, Uni...",https://m.facebook.com/profile.php?id=11321902...,(415) 397-9435
96,Butter&,"690 Indiana St, San Francisco, CA 94107, Unite...",https://butterand.com/,(415) 707-7414
97,Daydream Cake Shop,"1788 32nd Ave, San Francisco, CA 94122, United...",http://www.daydreamcakeshop.com/,(415) 370-2381


In [3]:
# change all column names to lower case
croissant_places.columns = croissant_places.columns.str.lower()
croissant_places.head()

Unnamed: 0,name,address,website,phone number
0,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460
1,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155
2,Butter & Crumble,"271 Francisco St, San Francisco, CA 94133, Uni...",https://butterandcrumble.com/,(415) 874-9484
3,Vive La Tarte,"4026 24th St, San Francisco, CA 94114, United ...",https://www.vivelatarte.com/,(415) 780-5818
4,Tartine Bakery,"600 Guerrero St, San Francisco, CA 94110, Unit...",http://www.tartinebakery.com/,(415) 487-2600


In [4]:
# add Bernal Basket & Diamond Coffee & Pastry

# new row data
new_data = {'name':['The Bernal Basket', 'Diamond Coffee N Pastry', 'Diamond Coffee N Pastry', 'Diamond Coffee N Pastry'], 
                 'address': ['521 Cortland Ave, San Francisco, CA 94110', '1014 Clement St.  San Francisco, CA 94118', '2575 Judah St. San Francisco, CA 94122', '6909 Geary Blvd, San Francisco, CA 94121'],
                 'website': ['https://www.bernalbakery.com/', 'https://www.diamondbeans.com/', 'https://www.diamondbeans.com/', 'https://www.diamondbeans.com/'],
                 'phone number': ['(415)500-2188', None, None, None]
                }

# Convert new data to a DataFrame
new_data_df = pd.DataFrame(new_data)

# Combine the two DataFrames
croissant_places = pd.concat([croissant_places, new_data_df], ignore_index=True).reset_index(drop=True)
croissant_places

Unnamed: 0,name,address,website,phone number
0,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460
1,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155
2,Butter & Crumble,"271 Francisco St, San Francisco, CA 94133, Uni...",https://butterandcrumble.com/,(415) 874-9484
3,Vive La Tarte,"4026 24th St, San Francisco, CA 94114, United ...",https://www.vivelatarte.com/,(415) 780-5818
4,Tartine Bakery,"600 Guerrero St, San Francisco, CA 94110, Unit...",http://www.tartinebakery.com/,(415) 487-2600
...,...,...,...,...
98,Hilda's Mart & Bake Shop,"145 Persia Ave, San Francisco, CA 94112, Unite...",,(415) 333-3122
99,The Bernal Basket,"521 Cortland Ave, San Francisco, CA 94110",https://www.bernalbakery.com/,(415)500-2188
100,Diamond Coffee N Pastry,"1014 Clement St. San Francisco, CA 94118",https://www.diamondbeans.com/,
101,Diamond Coffee N Pastry,"2575 Judah St. San Francisco, CA 94122",https://www.diamondbeans.com/,


In [5]:
# arrange names alphabetically AZ
croissant_places = croissant_places.sort_values(by='name').reset_index(drop=True)
croissant_places

Unnamed: 0,name,address,website,phone number
0,85C Bakery Cafe - Stonestown,"3251 20th Ave #158, San Francisco, CA 94132, U...",http://www.85cbakerycafe.com/,(415) 683-4938
1,A-1 Bakery,"1727 Ocean Ave, San Francisco, CA 94112, Unite...",,(415) 347-7090
2,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978
3,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305
4,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218
...,...,...,...,...
98,Victoria Pastry,"700 Filbert St, San Francisco, CA 94133, Unite...",http://www.victoriapastrycompany.com/,(415) 781-2015
99,Vive La Tarte,"4026 24th St, San Francisco, CA 94114, United ...",https://www.vivelatarte.com/,(415) 780-5818
100,Yummy Bakery & Cafe,"607 Jackson St, San Francisco, CA 94133, Unite...",https://qmenu.us/#/yummy-bakery-cafe-sf,(415) 989-8388
101,b. patisserie,"2821 California St, San Francisco, CA 94115, U...",http://bpatisserie.com/,(415) 440-1700


In [6]:
# add unique id to each bakery
croissant_places['bakery_id'] = croissant_places.index + 1

#rearrange columns so bakery_id is first
croissant_places = croissant_places.iloc[:, [4,0,1,2,3]]
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number
0,1,85C Bakery Cafe - Stonestown,"3251 20th Ave #158, San Francisco, CA 94132, U...",http://www.85cbakerycafe.com/,(415) 683-4938
1,2,A-1 Bakery,"1727 Ocean Ave, San Francisco, CA 94112, Unite...",,(415) 347-7090
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218
...,...,...,...,...,...
98,99,Victoria Pastry,"700 Filbert St, San Francisco, CA 94133, Unite...",http://www.victoriapastrycompany.com/,(415) 781-2015
99,100,Vive La Tarte,"4026 24th St, San Francisco, CA 94114, United ...",https://www.vivelatarte.com/,(415) 780-5818
100,101,Yummy Bakery & Cafe,"607 Jackson St, San Francisco, CA 94133, Unite...",https://qmenu.us/#/yummy-bakery-cafe-sf,(415) 989-8388
101,102,b. patisserie,"2821 California St, San Francisco, CA 94115, U...",http://bpatisserie.com/,(415) 440-1700


In [7]:
# check nulls
sum(croissant_places['name'].isnull())

0

In [8]:
# null address
sum(croissant_places['address'].isnull())

0

In [9]:
# null website 
sum(croissant_places['website'].isnull())

18

In [10]:
#null phone number
sum(croissant_places['phone number'].isnull())

13

In [11]:
# Set the maximum number of rows to display
pd.set_option('display.max_rows', None)

# Display the DataFrame
print(croissant_places)

     bakery_id                                          name  \
0            1                  85C Bakery Cafe - Stonestown   
1            2                                    A-1 Bakery   
2            3                            Acme Bread Company   
3            4                               Ambrosia Bakery   
4            5                              Arizmendi Bakery   
5            6                              Arizmendi Bakery   
6            7                              Arsicault Bakery   
7            8                 Arsicault Bakery Civic Center   
8            9                                 Baklavastory.   
9           10               Barbary Coast Pastry and Coffee   
10          11                          Black Jet Baking Co.   
11          12                     Bob's Donut & Pastry Shop   
12          13                                   Boho Petite   
13          14                                    Breadbelly   
14          15                         B

## Add Chains T/F column

In [12]:
chains = ['Arsicault', 'Tartine', 'Arizmendi', 'Noe Valley Bakery', 
         'Craftsman and Wolves','Paris Paguette', 'Jane ', 'Acme',
          'Rosalind', 'La Boulangerie', '85C Bakery', 'Sheng Kee Bakery', 
          'Diamond Coffee N Pastry'
         ]
croissant_places = croissant_places.assign(chain=croissant_places['name'].str.contains('|'.join(chains)))
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number,chain
0,1,85C Bakery Cafe - Stonestown,"3251 20th Ave #158, San Francisco, CA 94132, U...",http://www.85cbakerycafe.com/,(415) 683-4938,True
1,2,A-1 Bakery,"1727 Ocean Ave, San Francisco, CA 94112, Unite...",,(415) 347-7090,False
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218,True
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True
8,9,Baklavastory.,"1830 Harrison St Ste B, San Francisco, CA 9410...",https://www.baklavastory.com/,(415) 635-6394,False
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False


## Only Keep Bakeries in San Francisco

In [21]:
# Requirement #1: Physical storefront must be in SF
croissant_places = croissant_places[croissant_places['address'].str.contains('San Francisco')]
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number,chain
0,1,85C Bakery Cafe - Stonestown,"3251 20th Ave #158, San Francisco, CA 94132, U...",http://www.85cbakerycafe.com/,(415) 683-4938,True
1,2,A-1 Bakery,"1727 Ocean Ave, San Francisco, CA 94112, Unite...",,(415) 347-7090,False
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218,True
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True
8,9,Baklavastory.,"1830 Harrison St Ste B, San Francisco, CA 9410...",https://www.baklavastory.com/,(415) 635-6394,False
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False


## Manually Look Into Each Website & Update the Dataset with Croissant Price
I was unable to scrape prices from the websites, so I will go into each website and input the values myself.

In [23]:
# output list of websites
croissant_places['website']

0                          http://www.85cbakerycafe.com/
1                                                    NaN
2                              http://www.acmebread.com/
3                           http://ambrosiabakerysf.com/
4                        http://valencia.arizmendi.coop/
5                        http://www.arizmendibakery.com/
6                          https://arsicault-bakery.com/
7                          https://arsicault-bakery.com/
8                          https://www.baklavastory.com/
9                                                    NaN
10                      http://www.blackjetbakingco.com/
11                         https://www.bobsdonutssf.com/
12                         https://www.bohopetitesf.com/
13                             https://breadbellysf.com/
14                           http://www.briochecafe.com/
15                         https://butterandcrumble.com/
16                                https://butterand.com/
17                             

In [24]:
# create empty column for price
croissant_places = croissant_places.copy()
croissant_places['price'] = np.nan
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
0,1,85C Bakery Cafe - Stonestown,"3251 20th Ave #158, San Francisco, CA 94132, U...",http://www.85cbakerycafe.com/,(415) 683-4938,True,
1,2,A-1 Bakery,"1727 Ocean Ave, San Francisco, CA 94112, Unite...",,(415) 347-7090,False,
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True,
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False,
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218,True,
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True,
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True,
8,9,Baklavastory.,"1830 Harrison St Ste B, San Francisco, CA 9410...",https://www.baklavastory.com/,(415) 635-6394,False,
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False,


### Remove bakeries that do not sell butter croissants

In [47]:
# no crx
no_crx = ['LOQUAT', "Schubert's Bakery", "Choux", "85C Bakery",
          'Josey Baker Bread', "Pineapple King Bakery", "Cherry Blossom Bakery",
          "Baklavastory", "Moscow & Tbilisi", "L.H.Bakeshop",
          "Victoria Pastry", "Novela Cakes", "Hot Cookie", "Yummy Bakery",
          "Holy Nata", "Chadwick’s", "Miette", "Kantine", "Rize Up Bakery",
          "Healthyish Bakery", "Garden Bakery", "Grand Opening", 
          "Patisserie on California", "Sixth Course", "Little Swan Bakery",
          "Butter&", "Daydream Cake Shop", "Rolling Out Cafe",
          "Brioche Bakery"]
type(no_crx)

list

In [48]:
len(no_crx)

29

In [49]:
# Check that all bakeries in no_crx can be found in croissant_places

# create a regex pattern for all items in no_crx
pattern = '|'.join(map(re.escape, no_crx))

# Check which rows contain any of the no_crx items
matches = croissant_places['name'].str.contains(pattern, na=False)

# Count the matches
count = matches.sum()

print(f"Number of bakeries in no_crx found in croissant_places: {count}")
    

Number of bakeries in no_crx found in croissant_places: 29


In [50]:
# Since 9 places are not matching, I'll dig deeper. 

# Initialize a dictionary to store matches
matches = {}

# Check each item in no_crx
for i in no_crx:
    matches[i] = croissant_places['name'][croissant_places['name'].str.contains(re.escape(i), na=False)].tolist()

# Print the results
for i, match in matches.items():
    print(f"{i}:{match}")
     
# Check the no_crx list against croissant_places['name'] and fix capitalization, spelling, punctation, etc until we get 100% match across the list

LOQUAT:['LOQUAT']
Schubert's Bakery:["Schubert's Bakery"]
Choux:['Choux']
85C Bakery:['85C Bakery Cafe - Stonestown']
Josey Baker Bread:['Josey Baker Bread']
Pineapple King Bakery:['Pineapple King Bakery - San Francisco']
Cherry Blossom Bakery:['Cherry Blossom Bakery']
Baklavastory:['Baklavastory.']
Moscow & Tbilisi:['Moscow & Tbilisi Bakery Store']
L.H.Bakeshop:['L.H.Bakeshop']
Victoria Pastry:['Victoria Pastry']
Novela Cakes:['Novela Cakes']
Hot Cookie:['Hot Cookie']
Yummy Bakery:['Yummy Bakery & Cafe']
Holy Nata:['Holy Nata']
Chadwick’s:['Chadwick’s']
Miette:['Miette']
Kantine:['Kantine']
Rize Up Bakery:['Rize Up Bakery']
Healthyish Bakery:['Healthyish Bakery']
Garden Bakery:['Garden Bakery']
Grand Opening:['Grand Opening']
Patisserie on California:['Patisserie on California']
Sixth Course:['Sixth Course']
Little Swan Bakery:['Little Swan Bakery Café']
Butter&:['Butter&']
Daydream Cake Shop:['Daydream Cake Shop']
Rolling Out Cafe:['Rolling Out Cafe']
Brioche Bakery:['Brioche Bakery 

In [51]:
# now that we have a 100% match, delete those rows
def remove_by_name(df, remove_list):
    """
    Remove rows in dataframe using list of place names
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the croissant places data
    remove_list (list): list containing place names that do not sell crx
    
    Returns:
    pandas.DataFrame: Updated DataFrame without said rows
    """
    
    # Create a copy to avoid modifying the original dataframe
    df_updated = df.copy()
    
    # Update prices for matching price names
    for i in remove_list:
        df_updated = df_updated.loc[~df_updated['name'].str.contains(i)]
    
    return df_updated

In [52]:
# final row count should be
len(croissant_places) - len(no_crx)

71

In [53]:
updated_croissant_places = remove_by_name(croissant_places, no_crx)
len(updated_croissant_places)

71

In [54]:
# after verifying that the final row counts match, then update the og df
croissant_places = updated_croissant_places

## For bakeries that do sell croissants: add in price via bakery name

In [55]:
# Name:price
names = {'Arsicault':4.75,
         'Vive La Tarte':5.0,
         'Ambrosia': 4.25,
         'Juniper': 5.0,
         'b. patisserie':4.95,
         'ONE65':5.0,
         'Thorough Bread & Pastry':3.75,
         'Noe Valley':4.35,
         'Boho Petite':6.0,
         'Le Marais':4.5,
         'Craftsman and Wolves':5.75,
         'Cinderella Bakery':3.95,
         'Maison Nico': 4.5,
         'La Boulangerie de San Francisco': 4.68,
         'The French Spot':5.65,
         'Flour & Branch':7.5,
         'Kahnfections':4.75,
         'jina bakes':4,
         'Poesia Cafe':4.25,
         'Paris Cafe':4.75,
         'Hahdough': 5.0,
         'Breadbelly': 5.25,
         'Destination Baking Company':3.55, 
         "Diamond Coffee N Pastry":4.0, 
         'Gateway Croissant':3.5,
         'California Bakery':3.75, 
         'Black Jet Baking':5.0, 
         'Bernal Basket':4.5,
         'Neighbor Bakehouse':4.25,
         'Tartine':6.25,
         'Jane The Bakery':5.5
        }

In [56]:
# Check that all bakeries in names can be found in croissant_places
# We are using a different code from the no_crx because names includes 
    # chain bakeries that appear multiple times throughout croissant places
        # and I want to make sure that each and every row is updated

def count_matches(df, some_list):
    """
    Count the number of matches for each key in list against df['name'].
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the croissant places data.
    some_list (list): List of names to check for matches.
    
    Returns:
    dict: Dictionary with keys as names and values as the match count.
    """
    match_counts = {}
    
    for key in some_list:
        # Count matches using str.contians with re.escape
        match_counts[key] = df['name'].str.contains(re.escape(key), na=False).sum()
        
    return match_counts

In [57]:
count_matches(croissant_places, names)

# fix names until all matches are found

{'Arsicault': 2,
 'Vive La Tarte': 1,
 'Ambrosia': 1,
 'Juniper': 1,
 'b. patisserie': 1,
 'ONE65': 1,
 'Thorough Bread & Pastry': 1,
 'Noe Valley': 3,
 'Boho Petite': 1,
 'Le Marais': 2,
 'Craftsman and Wolves': 1,
 'Cinderella Bakery': 1,
 'Maison Nico': 1,
 'La Boulangerie de San Francisco': 4,
 'The French Spot': 1,
 'Flour & Branch': 1,
 'Kahnfections': 1,
 'jina bakes': 1,
 'Poesia Cafe': 1,
 'Paris Cafe': 1,
 'Hahdough': 1,
 'Breadbelly': 1,
 'Destination Baking Company': 1,
 'Diamond Coffee N Pastry': 3,
 'Gateway Croissant': 1,
 'California Bakery': 1,
 'Black Jet Baking': 1,
 'Bernal Basket': 1,
 'Neighbor Bakehouse': 1,
 'Tartine': 3,
 'Jane The Bakery': 1}

In [58]:
def update_by_name(df, price_dict):
    """
    Update prices in dataframe using dictionary of place names and prices
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the croissant places data
    price_dict (dict): Dictionarry mapping place names to prices
    
    Returns:
    pandas.DataFrame: Updated DataFrame with new prices
    """
    
    # Create a copy to avoid modifying the original dataframe
    df_updated = df.copy()
    
    # Update prices for matching price names
    for (key, value) in price_dict.items():
        df_updated.loc[df_updated['name'].str.contains(re.escape(key), case=False, na=False), 'price'] = value
    
    return df_updated

In [59]:
# number of filled price rows should be
sum(count_matches(croissant_places, names).values())

42

In [60]:
# idk why the num of rows don't match, im moving  on
updated_croissant_places = update_by_name(croissant_places, names)
updated_croissant_places[updated_croissant_places['price'].isnull()]['name']
updated_croissant_places['price'].count()

41

In [61]:
#  update the og df
croissant_places = updated_croissant_places 

### add in prices via bakery_id 

In [62]:
croissant_places[croissant_places['name'].str.contains('Arizmendi')]

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218,True,
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,


In [63]:
# For some chains, pricing differs depending on location so bakery_id will be used as the key to update the price by.
    # bakery_id:price
ids = {6:4.25,
       73:4.49,
       72:5.49,
      }

In [64]:
def update_by_id(df, price_dict):
    """
    Update prices in dataframe using dictionary of place names and prices
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the croissant places data
    price_dict (dict): Dictionarry mapping place names to prices
    
    Returns:
    pandas.DataFrame: Updated DataFrame with new prices
    """
    
    # Create a copy to avoid modifying the original dataframe
    df_updated = df.copy()
    
    # Update prices for matching price names
    for (key, value) in price_dict.items():
        df_updated.loc[df_updated['bakery_id'].isin(price_dict.keys()), 'price'] = value
    
    return df_updated

In [65]:
# number of filled price rows should be
len(ids)

3

In [66]:
updated_croissant_places = update_by_id(croissant_places, ids)
updated_croissant_places[updated_croissant_places['bakery_id'].isin(ids.keys())]

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,5.49
71,72,Paris Baguette,"550 Market St, San Francisco, CA 94104, United...",https://www.parisbaguette.com/,(415) 757-0899,False,5.49
72,73,Paris Baguette,"3995 Alemany Blvd, San Francisco, CA 94132, Un...",https://www.parisbaguette.com/,(628) 732-0404,False,5.49


In [67]:
# looks good! update the og df
croissant_places = updated_croissant_places

In [68]:
croissant_places[['name', 'price']]

Unnamed: 0,name,price
1,A-1 Bakery,
2,Acme Bread Company,
3,Ambrosia Bakery,4.25
4,Arizmendi Bakery,
5,Arizmendi Bakery,5.49
6,Arsicault Bakery,4.75
7,Arsicault Bakery Civic Center,4.75
9,Barbary Coast Pastry and Coffee,
10,Black Jet Baking Co.,5.0
11,Bob's Donut & Pastry Shop,


## Manual Search Round 2
Look into the bakeries that did not have a listed website.

In [69]:
croissant_places['price'].isnull().sum()

27

In [70]:
# Dig and find the bakeries will null prices. They probably don't have a listed website. 
croissant_places[croissant_places['price'].isnull()]

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
1,2,A-1 Bakery,"1727 Ocean Ave, San Francisco, CA 94112, Unite...",,(415) 347-7090,False,
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True,
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218,True,
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False,
11,12,Bob's Donut & Pastry Shop,"1621 Polk St, San Francisco, CA 94109, United ...",https://www.bobsdonutssf.com/,(415) 776-3141,False,
15,16,Butter & Crumble,"271 Francisco St, San Francisco, CA 94133, Uni...",https://butterandcrumble.com/,(415) 874-9484,False,
23,24,Day Moon,"3928 Irving St, San Francisco, CA 94122, Unite...",,,False,
29,30,Dianda's Italian American Pastry,"2883 Mission St, San Francisco, CA 94110, Unit...",http://www.diandasbakery.com/,(415) 647-5469,False,
30,31,Eterna Primavera Bakery,"2951 24th St, San Francisco, CA 94110, United ...",,(415) 932-6295,False,
31,32,Fillmore Bakeshop,"1890 Fillmore Street Between Sutter St &, Bush...",http://fillmorebakeshop.com/,(415) 923-0711,False,


In [71]:
# exclude Tiny Croissanterie b/c it is a pop-up without a store location
no_crx2 = ['A-1 Bakery', 'Day Moon', "Dianda's Italian American Pastry",
           "Eterna Primavera Bakery", "Good Orchard Bakery", "Hilda's Mart & Bake Shop",
           "Jane on Larkin", "Kings Bakery Cafe", "La Mejor Bakery", "La Mexicana Bakery",
           "Les Croissants Cafe", "Liguria Bakery", "Mara's Italian Pastries", 
           "Princess Bakery", "Rico Pan Bakery", "Sam's Bakery", "Stella Pastry",
           "Sunset Bakery", "Sweet Passion Bakery", "Tiny Croissanterie"
          ]

# exclude Arizmendia @ Valencia
no_crx_ids = [5]

In [72]:
croissant_places = remove_by_name(croissant_places, no_crx2)
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True,
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False,4.25
4,5,Arizmendi Bakery,"1268 Valencia St, San Francisco, CA 94110, Uni...",http://valencia.arizmendi.coop/,(415) 826-9218,True,
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,5.49
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True,4.75
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True,4.75
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False,
10,11,Black Jet Baking Co.,"833 Cortland Ave, San Francisco, CA 94110, Uni...",http://www.blackjetbakingco.com/,(415) 829-3905,False,5.0
11,12,Bob's Donut & Pastry Shop,"1621 Polk St, San Francisco, CA 94109, United ...",https://www.bobsdonutssf.com/,(415) 776-3141,False,
12,13,Boho Petite,"2146 Chestnut St, San Francisco, CA 94123, Uni...",https://www.bohopetitesf.com/,(415) 655-9559,False,6.0


In [73]:
def remove_by_id(df, remove_list):
    """
    Remove rows in dataframe using list of bakery_id
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the croissant places data
    remove_list (list): list containing bakery_id that do not sell crx
    
    Returns:
    pandas.DataFrame: Updated DataFrame without said rows
    """
    
    # Create a copy to avoid modifying the original dataframe
    df_updated = df.copy()
    
    # Update prices for matching price names
    for i in remove_list:
        df_updated = df_updated.loc[~(df_updated['bakery_id'] == i)]
    
    return df_updated

In [74]:
croissant_places = remove_by_id(croissant_places, no_crx_ids)
croissant_places 

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True,
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False,4.25
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,5.49
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True,4.75
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True,4.75
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False,
10,11,Black Jet Baking Co.,"833 Cortland Ave, San Francisco, CA 94110, Uni...",http://www.blackjetbakingco.com/,(415) 829-3905,False,5.0
11,12,Bob's Donut & Pastry Shop,"1621 Polk St, San Francisco, CA 94109, United ...",https://www.bobsdonutssf.com/,(415) 776-3141,False,
12,13,Boho Petite,"2146 Chestnut St, San Francisco, CA 94123, Uni...",https://www.bohopetitesf.com/,(415) 655-9559,False,6.0
13,14,Breadbelly,"1408 Clement St, San Francisco, CA 94118, Unit...",https://breadbellysf.com/,,False,5.25


In [75]:
# Rosalind's phone number: (650) 898-8636
croissant_places.loc[croissant_places['name'].str.contains('Rosalind'), 'phone number'] = '(650) 898-8636'

# # call to find out price of butter crx
croissant_places[croissant_places['price'].isnull()][['name','phone number']]

Unnamed: 0,name,phone number
2,Acme Bread Company,(415) 288-2978
9,Barbary Coast Pastry and Coffee,(415) 989-3888
11,Bob's Donut & Pastry Shop,(415) 776-3141
15,Butter & Crumble,(415) 874-9484
31,Fillmore Bakeshop,(415) 923-0711
81,Rosalind Bakery Cafe at Embarcadero Four,(650) 898-8636


In [76]:
# I called. Some bakeries answered. Some did not.

# answered
names2 = {'Acme Bread Company':3.97,
          'Fillmore Bakeshop':5.0}

# no answer
noans = ['Barbary Coast Pastry and Coffee', "Bob's Donut & Pastry Shop", "Butter & Crumble",
         "Rosalind Bakery Cafe at Embarcadero Four"]

In [77]:
# update the df prices
croissant_places = update_by_name(croissant_places, names2)
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True,3.97
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False,4.25
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,5.49
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True,4.75
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True,4.75
9,10,Barbary Coast Pastry and Coffee,"55 Cyril Magnin St, San Francisco, CA 94102, U...",,(415) 989-3888,False,
10,11,Black Jet Baking Co.,"833 Cortland Ave, San Francisco, CA 94110, Uni...",http://www.blackjetbakingco.com/,(415) 829-3905,False,5.0
11,12,Bob's Donut & Pastry Shop,"1621 Polk St, San Francisco, CA 94109, United ...",https://www.bobsdonutssf.com/,(415) 776-3141,False,
12,13,Boho Petite,"2146 Chestnut St, San Francisco, CA 94123, Uni...",https://www.bohopetitesf.com/,(415) 655-9559,False,6.0
13,14,Breadbelly,"1408 Clement St, San Francisco, CA 94118, Unit...",https://breadbellysf.com/,,False,5.25


In [78]:
# remove no answers from df
croissant_places = remove_by_name(croissant_places, noans)
croissant_places

Unnamed: 0,bakery_id,name,address,website,phone number,chain,price
2,3,Acme Bread Company,"Ferry Building, Bay Trail #15, San Francisco, ...",http://www.acmebread.com/,(415) 288-2978,True,3.97
3,4,Ambrosia Bakery,"2605 Ocean Ave, San Francisco, CA 94132, Unite...",http://ambrosiabakerysf.com/,(415) 334-5305,False,4.25
5,6,Arizmendi Bakery,"1331 9th Ave, San Francisco, CA 94122, United ...",http://www.arizmendibakery.com/,(415) 566-3117,True,5.49
6,7,Arsicault Bakery,"397 Arguello Blvd, San Francisco, CA 94118, Un...",https://arsicault-bakery.com/,(415) 750-9460,True,4.75
7,8,Arsicault Bakery Civic Center,"87 McAllister St, San Francisco, CA 94102, Uni...",https://arsicault-bakery.com/,(415) 926-5155,True,4.75
10,11,Black Jet Baking Co.,"833 Cortland Ave, San Francisco, CA 94110, Uni...",http://www.blackjetbakingco.com/,(415) 829-3905,False,5.0
12,13,Boho Petite,"2146 Chestnut St, San Francisco, CA 94123, Uni...",https://www.bohopetitesf.com/,(415) 655-9559,False,6.0
13,14,Breadbelly,"1408 Clement St, San Francisco, CA 94118, Unit...",https://breadbellysf.com/,,False,5.25
17,18,California Bakery,"719 Taraval St, San Francisco, CA 94116, Unite...",,(415) 864-1385,False,3.75
21,22,Cinderella Bakery & Cafe,"436 Balboa St, San Francisco, CA 94118, United...",https://cinderellabakery.com/,(415) 751-9690,False,3.95


In [79]:
# # final df looks good! save as an excel sheet
# croissant_places.to_csv('croissant_places.csv', index=False)

## Done!
Now you have a csv file saved locally with all the bakeries in San Francisco that sell butter croissants, their addresses, websites, phone numbers, status as a chain or independent store, and price of croissant.