# Starbucks Stores Analysis

In [1]:
# Housekeeping
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Import datasets
demoURL = 'https://storage.googleapis.com/kagglesdsdata/datasets/7001/312628/acs2017_county_data.csv?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20211112%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20211112T041604Z&X-Goog-Expires=259199&X-Goog-SignedHeaders=host&X-Goog-Signature=36991b214fbd2eaa350a196e3ba92eedf89209b288630a3a84157a58d7ee14a3749d5b94748ee93465ebc5386cd6d5ebe9cd7a72bec9e9ddf03c7a7217e357aa7965b2060acd66834edd1147c111e61eb050a9f31f6382591ab9683434ec1949682b4703908fa4fd209def11513f8a5cde172ae68077f69131e2bdefc746ddcb0fa85c43d9c3391c76da17ae959169657cf0f833a0a3de80644ddf72c257f9a7b63a0a7b9849e579fab907b2629d36070d0eee2c17cfaa6a38c7c368846c1a5b9cf2c8c82a6a1e531a329d6176095e42098c7318901414afcc82423c7fb091ad7e172f212a59f2ec655e2f01bea8d0bdd0ab48447a81302de3f7c5379362ab4d'
popURL = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2017/cities/totals/sub-est2017_all.csv'

starbucks = pd.read_csv('data/directory.csv')
income = pd.read_csv('data/archive/kaggle_income.csv', encoding='cp1252')
population = pd.read_csv(popURL, encoding='cp1252')
demographic = pd.read_csv(demoURL, encoding='cp1252')

***

## Data Cleaning

Data Constraints:
- Both Starbucks and US datasets published in 2017.
- Starbucks store locations limited to US country. 
- Starbucks store limited to Starbucks brand (no Teavana)
- Exclude Puerto Rico from US datasets

### Starbucks Dataset

In [3]:
starbucks = starbucks.query("Brand == 'Starbucks'").query("Country == 'US'")
starbucks = starbucks.drop(columns=['Brand', 'Store Name', 'Ownership Type', 'Street Address','Phone Number','Timezone', 'Postcode', 'Country'])
starbucks = starbucks.rename(columns={'State/Province' : 'State'})
starbucks

Unnamed: 0,Store Number,City,State,Longitude,Latitude
11964,3513-125945,Anchorage,AK,-149.78,61.21
11965,74352-84449,Anchorage,AK,-149.84,61.14
11966,12449-152385,Anchorage,AK,-149.85,61.11
11967,24936-233524,Anchorage,AK,-149.89,61.13
11968,8973-85630,Anchorage,AK,-149.86,61.14
...,...,...,...,...,...
25567,74385-87621,Laramie,WY,-105.59,41.32
25568,73320-24375,Laramie,WY,-105.56,41.31
25569,22425-219024,Laramie,WY,-105.56,41.31
25570,10849-103163,Rock Springs,WY,-109.25,41.58


### US County Income Dataset

In [4]:
income = income[["State_Name","State_ab","County","City","Median"]]
income

Unnamed: 0,State_Name,State_ab,County,City,Median
0,Alabama,AL,Mobile County,Chickasaw,30506
1,Alabama,AL,Barbour County,Louisville,19528
2,Alabama,AL,Shelby County,Columbiana,31930
3,Alabama,AL,Mobile County,Satsuma,52814
4,Alabama,AL,Mobile County,Dauphin Island,67225
...,...,...,...,...,...
32521,Puerto Rico,PR,Adjuntas Municipio,Guaynabo,13729
32522,Puerto Rico,PR,Adjuntas Municipio,Aguada,9923
32523,Puerto Rico,PR,Adjuntas Municipio,Aguada,34054
32524,Puerto Rico,PR,Adjuntas Municipio,Aguada,0


### US County Population Dataset

In [5]:
population = population[["NAME", "STNAME", "POPESTIMATE2017"]]
population = population.loc[~population['NAME'].str.contains("County")]
population = population.loc[population['NAME'].str.contains(" ")]
population["NAME"] = population["NAME"].apply(lambda x: ' '.join(x.split()[0:-1]))
population
data = pd.merge(income, population, left_on=['State_Name','City'], right_on=['STNAME', 'NAME']).drop(columns=['NAME','STNAME'])
data = pd.merge(starbucks, data, how='right', left_on=["State", "City"], right_on=["State_ab","City"]).drop(columns=['State_Name','State'])
data = data.rename(columns={"Longitude":"Starbucks Lon", "Latitude":"Starbucks Lat"})
data

Unnamed: 0,Store Number,City,Starbucks Lon,Starbucks Lat,State_ab,County,Median,POPESTIMATE2017
0,3513-125945,Anchorage,-149.78,61.21,AK,Matanuska-Susitna Borough,84804,294356
1,74352-84449,Anchorage,-149.84,61.14,AK,Matanuska-Susitna Borough,84804,294356
2,12449-152385,Anchorage,-149.85,61.11,AK,Matanuska-Susitna Borough,84804,294356
3,24936-233524,Anchorage,-149.89,61.13,AK,Matanuska-Susitna Borough,84804,294356
4,8973-85630,Anchorage,-149.86,61.14,AK,Matanuska-Susitna Borough,84804,294356
...,...,...,...,...,...,...,...,...
870622,,Lyman,,,WY,Albany County,70155,2067
870623,,Worland,,,WY,Albany County,300000,5189
870624,,Worland,,,WY,Albany County,300000,5189
870625,,Diamondville,,,WY,Albany County,70752,758


### US County Demographic Dataset

In [6]:
demographic

Unnamed: 0,CountyId,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,1001,Alabama,Autauga County,55036,26899,28137,2.7,75.4,18.9,0.3,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
1,1003,Alabama,Baldwin County,203360,99527,103833,4.4,83.1,9.5,0.8,...,0.8,1.1,5.6,27.0,89527,80.7,12.9,6.3,0.1,5.5
2,1005,Alabama,Barbour County,26201,13976,12225,4.2,45.7,47.8,0.2,...,2.2,1.7,1.3,23.4,8878,74.1,19.1,6.5,0.3,12.4
3,1007,Alabama,Bibb County,22580,12251,10329,2.4,74.6,22.0,0.4,...,0.3,1.7,1.5,30.0,8171,76.0,17.4,6.3,0.3,8.2
4,1009,Alabama,Blount County,57667,28490,29177,9.0,87.4,1.5,0.3,...,0.4,0.4,2.1,35.0,21380,83.9,11.9,4.0,0.1,4.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,72145,Puerto Rico,Vega Baja Municipio,54754,26269,28485,96.7,3.1,0.1,0.0,...,1.4,0.6,0.9,31.6,14234,76.2,19.3,4.3,0.2,16.8
3216,72147,Puerto Rico,Vieques Municipio,8931,4351,4580,95.7,4.0,0.0,0.0,...,5.0,0.0,1.7,14.9,2927,40.7,40.9,18.4,0.0,12.8
3217,72149,Puerto Rico,Villalba Municipio,23659,11510,12149,99.7,0.2,0.1,0.0,...,2.1,0.0,2.8,28.4,6873,59.2,30.2,10.4,0.2,24.8
3218,72151,Puerto Rico,Yabucoa Municipio,35025,16984,18041,99.9,0.1,0.0,0.0,...,1.4,1.8,0.1,30.5,7878,62.7,30.9,6.3,0.0,25.4


***



## Data Analysis

### Starbucks dataset

Number of Starbucks per State

In [7]:
storesPerState = starbucks.groupby('State')['Store Number'].count().to_frame().reset_index()
storesPerState = storesPerState.rename(columns={'Store Number' : 'Total Stores'}).sort_values('Total Stores', ascending=False)

State with the most Starbucks

In [8]:
storesPerState.loc[storesPerState['Total Stores'].idxmax()]

State             CA
Total Stores    2782
Name: 4, dtype: object

State with the least Starbucks

In [9]:
storesPerState.loc[storesPerState['Total Stores'].idxmin()]

State           VT
Total Stores     7
Name: 46, dtype: object

***

## Data Visualization

### Distribution of Number of Starbucks Locations per County

### Starbucks Location vs. County Income

### Starbucks Location vs. County Population