In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from config import  password, user
import psycopg2

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

In [2]:
# source reference
#https://blog.panoply.io/connecting-jupyter-notebook-with-postgresql-for-python-data-analysis
# Postgres username, password, and database name
POSTGRES_ADDRESS = 'localhost' ## INSERT YOUR DB ADDRESS IF IT'S NOT ON PANOPLY
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = user ## CHANGE THIS TO YOUR PANOPLY/POSTGRES USERNAME
POSTGRES_PASSWORD = password ## CHANGE THIS TO YOUR PANOPLY/POSTGRES PASSWORD 
POSTGRES_DBNAME = 'starbucks_etl' ## CHANGE THIS TO YOUR DATABASE NAME
# A long string that contains the necessary Postgres login information
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
.format(username=POSTGRES_USERNAME,password=POSTGRES_PASSWORD,ipaddress=POSTGRES_ADDRESS,port=POSTGRES_PORT,dbname=POSTGRES_DBNAME))
# Create the connection
conn = create_engine(postgres_str)

In [3]:
# Read Starbucks store locations into a dataframe
file = "Resources/locations.csv"
locations_df = pd.read_csv(file)
locations_df.head()

Unnamed: 0,Brand,Store Number,Store Name,Ownership Type,Street Address,City,State/Province,Country,Postcode,Phone Number,Timezone,Longitude,Latitude
0,Starbucks,47370-257954,"Meritxell, 96",Licensed,"Av. Meritxell, 96",Andorra la Vella,7,AD,AD500,376818720.0,GMT+1:00 Europe/Andorra,1.53,42.51
1,Starbucks,22331-212325,Ajman Drive Thru,Licensed,"1 Street 69, Al Jarf",Ajman,AJ,AE,,,GMT+04:00 Asia/Dubai,55.47,25.42
2,Starbucks,47089-256771,Dana Mall,Licensed,Sheikh Khalifa Bin Zayed St.,Ajman,AJ,AE,,,GMT+04:00 Asia/Dubai,55.47,25.39
3,Starbucks,22126-218024,Twofour 54,Licensed,Al Salam Street,Abu Dhabi,AZ,AE,,,GMT+04:00 Asia/Dubai,54.38,24.48
4,Starbucks,17127-178586,Al Ain Tower,Licensed,"Khaldiya Area, Abu Dhabi Island",Abu Dhabi,AZ,AE,,,GMT+04:00 Asia/Dubai,54.54,24.51


In [4]:
# filter on US Starbucks locations
us_locations_df = locations_df[locations_df['Country']=='US']
us_locations_df.dropna()

Unnamed: 0,Brand,Store Number,Store Name,Ownership Type,Street Address,City,State/Province,Country,Postcode,Phone Number,Timezone,Longitude,Latitude
11964,Starbucks,3513-125945,Safeway-Anchorage #1809,Licensed,5600 Debarr Rd Ste 9,Anchorage,AK,US,995042300,907-339-0900,GMT-09:00 America/Anchorage,-149.78,61.21
11965,Starbucks,74352-84449,Safeway-Anchorage #2628,Licensed,1725 Abbott Rd,Anchorage,AK,US,995073444,907-339-2800,GMT-09:00 America/Anchorage,-149.84,61.14
11966,Starbucks,12449-152385,Safeway - Anchorage #1813,Licensed,1501 Huffman Rd,Anchorage,AK,US,995153596,907-339-1300,GMT-09:00 America/Anchorage,-149.85,61.11
11967,Starbucks,24936-233524,100th & C St - Anchorage,Company Owned,"320 W. 100th Ave, 100, Southgate Shopping Ctr ...",Anchorage,AK,US,99515,(907) 227-9631,GMT-09:00 America/Anchorage,-149.89,61.13
11968,Starbucks,8973-85630,Old Seward & Diamond,Company Owned,1005 E Dimond Blvd,Anchorage,AK,US,995152050,907-344-4160,GMT-09:00 America/Anchorage,-149.86,61.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,Starbucks,74385-87621,Safeway-Laramie #2466,Licensed,554 N 3rd St,Laramie,WY,US,820723012,307-721-5107,GMT-07:00 America/Denver,-105.59,41.32
25568,Starbucks,73320-24375,Ridley's - Laramie #1131,Licensed,3112 E. Grand,Laramie,WY,US,820705141,307-742-8146,GMT-07:00 America/Denver,-105.56,41.31
25569,Starbucks,22425-219024,Laramie - Grand & 30th,Company Owned,3021 Grand Ave,Laramie,WY,US,82070,307-742-3262,GMT-07:00 America/Denver,-105.56,41.31
25570,Starbucks,10849-103163,I-80 & Dewar Dr-Rock Springs,Company Owned,118 Westland Way,Rock Springs,WY,US,829015751,307-362-7145,GMT-07:00 America/Denver,-109.25,41.58


In [5]:
# check dataframe counts
us_locations_df.count()

Brand             13608
Store Number      13608
Store Name        13608
Ownership Type    13608
Street Address    13608
City              13608
State/Province    13608
Country           13608
Postcode          13607
Phone Number      13122
Timezone          13608
Longitude         13608
Latitude          13608
dtype: int64

In [6]:
# Identify nearest city for each starbucks lat, lng combination
cities = []
store_number =[]
for index, row in us_locations_df.iterrows():
    city = citipy.nearest_city(row[11], row[12]).city_name
    cities.append(city)
    store_number.append(row[1])


In [7]:
# check the type of cities
type(cities)

list

In [8]:
# build dataframe
starbucks_city_df = pd.DataFrame(zip(store_number,cities),columns=['store_number', 'city'])

In [9]:
# check type to be sure we created dataframe
type(starbucks_city_df)

pandas.core.frame.DataFrame

In [10]:
# title case on city
starbucks_city_df['city'] = starbucks_city_df['city'].str.title()

In [11]:
# take a peak that the cleaned city names
starbucks_city_df

Unnamed: 0,store_number,city
0,3513-125945,Port Alfred
1,74352-84449,Port Alfred
2,12449-152385,Port Alfred
3,24936-233524,Port Alfred
4,8973-85630,Port Alfred
...,...,...
13603,74385-87621,Port Elizabeth
13604,73320-24375,Port Elizabeth
13605,22425-219024,Port Elizabeth
13606,10849-103163,Port Elizabeth


In [12]:
#Export the starbucks city data to csv
output_data_file = "Clean_Data/starbucks_cities.csv"
starbucks_city_df.to_csv(output_data_file, index = None, header=True)

In [13]:
starbucks_city_df.to_sql(name="store_city", con=conn,
                  if_exists="append", index=False)

IntegrityError: (psycopg2.errors.ForeignKeyViolation) insert or update on table "store_city" violates foreign key constraint "fk_store_number"
DETAIL:  Key (store_number)=(3513-125945) is not present in table "store_location".

[SQL: INSERT INTO store_city (store_number, city) VALUES (%(store_number)s, %(city)s)]
[parameters: ({'store_number': '3513-125945', 'city': 'Port Alfred'}, {'store_number': '74352-84449', 'city': 'Port Alfred'}, {'store_number': '12449-152385', 'city': 'Port Alfred'}, {'store_number': '24936-233524', 'city': 'Port Alfred'}, {'store_number': '8973-85630', 'city': 'Port Alfred'}, {'store_number': '72788-84447', 'city': 'Port Alfred'}, {'store_number': '79549-106150', 'city': 'Port Alfred'}, {'store_number': '75988-107245', 'city': 'Port Alfred'}  ... displaying 10 of 13608 total bound parameter sets ...  {'store_number': '10849-103163', 'city': 'Port Elizabeth'}, {'store_number': '10769-102454', 'city': 'Port Alfred'})]
(Background on this error at: http://sqlalche.me/e/gkpj)