# Starbucks Stores Analysis

In [58]:
# Housekeeping
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [59]:
# Import datasets
popURL = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2017/cities/totals/sub-est2017_all.csv'

starbucks = pd.read_csv('data/directory.csv')
income = pd.read_csv('data/archive/kaggle_income.csv', encoding='cp1252')
population = pd.read_csv(popURL, encoding='cp1252')
demographic = pd.read_csv('data/demo.csv', encoding='cp1252')
cities = pd.read_csv('data/uscities.csv')

***

## Data Cleaning

Data Constraints:
- Both Starbucks and US datasets published in 2017.
- Starbucks store locations limited to US country. 
- Starbucks store limited to Starbucks brand (no Teavana)
- Exclude Puerto Rico from US datasets

### Starbucks Dataset

In [60]:
starbucks = starbucks.query("Brand == 'Starbucks'").query("Country == 'US'")
starbucks = starbucks.drop(columns=["Brand", "Store Name", "Ownership Type", "Street Address","Phone Number","Timezone", "Postcode", "Country"])
starbucks = starbucks.rename(columns={'State/Province' : 'State'})
starbucks

Unnamed: 0,Store Number,City,State,Longitude,Latitude
11964,3513-125945,Anchorage,AK,-149.78,61.21
11965,74352-84449,Anchorage,AK,-149.84,61.14
11966,12449-152385,Anchorage,AK,-149.85,61.11
11967,24936-233524,Anchorage,AK,-149.89,61.13
11968,8973-85630,Anchorage,AK,-149.86,61.14
...,...,...,...,...,...
25567,74385-87621,Laramie,WY,-105.59,41.32
25568,73320-24375,Laramie,WY,-105.56,41.31
25569,22425-219024,Laramie,WY,-105.56,41.31
25570,10849-103163,Rock Springs,WY,-109.25,41.58


### US City <--> County Mapping

In [61]:
cities = cities[["city", "state_id", "state_name", "county_name"]]
cities

Unnamed: 0,city,state_id,state_name,county_name
0,New York,NY,New York,New York
1,Los Angeles,CA,California,Los Angeles
2,Chicago,IL,Illinois,Cook
3,Miami,FL,Florida,Miami-Dade
4,Dallas,TX,Texas,Dallas
...,...,...,...,...
28333,Gross,NE,Nebraska,Boyd
28334,Lotsee,OK,Oklahoma,Tulsa
28335,The Ranch,MN,Minnesota,Mahnomen
28336,Shamrock,OK,Oklahoma,Creek


### US County Demographic Dataset

In [62]:
demographic = demographic[demographic['State'] != 'Puerto Rico']
demographic["County"] = demographic["County"].apply(lambda x: ' '.join(x.split()[0:-1]))
demographic

Unnamed: 0.1,Unnamed: 0,CountyId,State,County,TotalPop,Men,Women,Hispanic,White,Black,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,0,1001,Alabama,Autauga,55036,26899,28137,2.7,75.4,18.9,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
1,1,1003,Alabama,Baldwin,203360,99527,103833,4.4,83.1,9.5,...,0.8,1.1,5.6,27.0,89527,80.7,12.9,6.3,0.1,5.5
2,2,1005,Alabama,Barbour,26201,13976,12225,4.2,45.7,47.8,...,2.2,1.7,1.3,23.4,8878,74.1,19.1,6.5,0.3,12.4
3,3,1007,Alabama,Bibb,22580,12251,10329,2.4,74.6,22.0,...,0.3,1.7,1.5,30.0,8171,76.0,17.4,6.3,0.3,8.2
4,4,1009,Alabama,Blount,57667,28490,29177,9.0,87.4,1.5,...,0.4,0.4,2.1,35.0,21380,83.9,11.9,4.0,0.1,4.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,3137,56037,Wyoming,Sweetwater,44527,22981,21546,16.0,79.6,0.8,...,2.8,1.3,1.5,20.5,22739,78.4,17.8,3.8,0.0,5.2
3138,3138,56039,Wyoming,Teton,22923,12169,10754,15.0,81.5,0.5,...,11.7,3.8,5.7,14.3,14492,82.1,11.4,6.5,0.0,1.3
3139,3139,56041,Wyoming,Uinta,20758,10593,10165,9.1,87.7,0.1,...,1.1,1.3,2.0,19.9,9528,71.5,21.5,6.6,0.4,6.4
3140,3140,56043,Wyoming,Washakie,8253,4118,4135,14.2,82.2,0.3,...,6.9,1.3,4.4,14.3,3833,69.8,22.0,8.1,0.2,6.1


### Merge Data 

In [72]:
mapping = pd.merge(demographic, cities, left_on=["County", "State"], right_on=["county_name", "state_name"]).drop(columns=["state_name", "county_name"])
mapping
data = pd.merge(starbucks, mapping, how="right", left_on=["State", "City"], right_on=["state_id","city"]).drop(columns=['state_id','city'])
data = data.rename(columns={"Longitude":"Starbucks Lon", "Latitude":"Starbucks Lat",\
                            "State_ab":"State"}).drop_duplicates()
data


Unnamed: 0.1,Store Number,City,State_x,Starbucks Lon,Starbucks Lat,Unnamed: 0,CountyId,State_y,County,TotalPop,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,20585-202066,Prattville,AL,-86.39,32.46,0,1001,Alabama,Autauga,55036,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
1,76796-108570,Prattville,AL,-86.43,32.47,0,1001,Alabama,Autauga,55036,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
2,,,,,,0,1001,Alabama,Autauga,55036,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
6,13211-104121,Daphne,AL,-87.91,30.65,1,1003,Alabama,Baldwin,203360,...,0.8,1.1,5.6,27.0,89527,80.7,12.9,6.3,0.1,5.5
7,9971-96354,Foley,AL,-87.68,30.38,1,1003,Alabama,Baldwin,203360,...,0.8,1.1,5.6,27.0,89527,80.7,12.9,6.3,0.1,5.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37340,16502-171825,Jackson,WY,-110.79,43.47,3138,56039,Wyoming,Teton,22923,...,11.7,3.8,5.7,14.3,14492,82.1,11.4,6.5,0.0,1.3
37341,,,,,,3138,56039,Wyoming,Teton,22923,...,11.7,3.8,5.7,14.3,14492,82.1,11.4,6.5,0.0,1.3
37349,,,,,,3139,56041,Wyoming,Uinta,20758,...,1.1,1.3,2.0,19.9,9528,71.5,21.5,6.6,0.4,6.4
37356,,,,,,3140,56043,Wyoming,Washakie,8253,...,6.9,1.3,4.4,14.3,3833,69.8,22.0,8.1,0.2,6.1


***



## Data Analysis

### Number of Starbucks per City

#### Cities with Starbucks (descending order)

#### Number of cities without Starbucks

***

## Data Visualization

### Distribution of Number of Starbucks Locations per County

### Starbucks Location vs. County Income

### Starbucks Location vs. County Population