## Importing dependencies and environmental variables

In [3]:
# Dependencies
import csv as csv
import http.client 
import json
import numpy as np
import os
import pandas as pd
import pprint
import psycopg2
import requests
import sqlalchemy
from bs4 import BeautifulSoup as bs
from dotenv import load_dotenv
from selenium import webdriver
from sodapy import Socrata
from splinter import Browser
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, Float, Date

In [4]:
# Loading homeowrk7.env so that the environment variables can be used in the engine object url
load_dotenv("project3.env")
# Initializing variables to hold each environmet varaible
username=os.environ.get("USERNAME")
password=os.environ.get("PASSWORD")
host=os.environ.get("HOST")
port=os.environ.get("PORT")
database=os.environ.get("DATABASE")
google_key = os.environ.get('GOOGLE_API_KEY')
yelp_client_id = os.environ.get('YELP_CLIENT_ID')
yelp_key = os.environ.get('YELP_API_KEY')
sf_data_key = os.environ.get('SFDATAAPPTOKEN')

In [5]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Querying the Seattle Registered Business API for Startbucks locations

In [4]:
client = Socrata("data.seattle.gov", sf_data_key)

In [31]:
starbucks_results = client.get("wnbq-64tb", where="business_legal_name ='STARBUCKS CORPORATION'")
starbucks_results_df = pd.DataFrame.from_records(starbucks_results)

## Querying the Seattle Registered Business API for Peet's Coffee locations

In Seattle Peet's coffee is sold at a coffee shop and bakery called Specialty's Cafe and Bakery. We will have to decide whether we want to include this in our data set.

In [53]:
specialtys_coffee_results = client.get("wnbq-64tb", where="trade_name LIKE'%SPECIALTYS CAFE%'")
specialtys_coffee_results_df = pd.DataFrame.from_records(specialtys_coffee_results)

## Querying the Seattle Registered Business API for bourgie coffee shops

In [104]:
bourgie_coffee_shop_list = ["PRESERVE AND GATHER",
                               "MAKEDA AND MINGUS",
                               "HERKIMER COFFEE",
                               "ZOKA",
                               "VIF",
                               "LIGHTHOUSE ROASTERS",
                               "MILSTEAD",
                               "CAFFE LADRO",
                               "ESPRESSO VIVACE",
                               "LA MARZOCCO EXPERIENCE",
                               "ANALOG COFFEE",
                               "MR WEST",
                               "VICTROLA",
                               "CAFFE VITA",
                               "TOUGO",
                               "SEATTLE COFFEE WORKS",
                               "ELM COFFEE",
                               "CAFFE UMBRIA",
                               "SOUND & FOG",
                               "EMPIRE ESPRESSO"
                              ]

In [107]:
bourgie_coffee_df = pd.DataFrame()
for coffee_shop in bourgie_coffee_shop_list:
    query = client.get("wnbq-64tb", where=f"trade_name LIKE '%{coffee_shop}%'")
    results_df = pd.DataFrame.from_records(query)
    bourgie_coffee_df = pd.concat([bourgie_coffee_df, results_df], ignore_index=True, sort=True)

## Concatenating all coffee shop data frames together and cleaning them

In [108]:
complete_coffee_df = pd.DataFrame()
complete_coffee_df = pd.concat([starbucks_results_df, 
                                bourgie_coffee_df
                               ], ignore_index=True, sort=True)

In [110]:
complete_coffee_df.to_csv("data/seattle_coffee_shop_data.csv", index=False, header=True)

## Formatting all result .csv's to have the same headers so they can be concatanated

In [39]:
complete_coffee_df = pd.read_csv("data/seattle_coffee_shop_data.csv")

In [40]:
complete_coffee_df.columns

Index(['business_legal_name', 'business_phone', 'city_account_number', 'city_state_zip', 'license_start_date', 'naics_code', 'naics_description', 'ownership_type', 'state', 'street_address', 'trade_name', 'ubi', 'zip'], dtype='object')

In [41]:
complete_coffee_df["location"] = None
complete_coffee_df["location_end_date"] = None

In [42]:
cleaned_complete_coffee_df = complete_coffee_df[["zip",
                                                 "trade_name",
                                                 "business_legal_name",
                                                 "city_state_zip",
                                                 "street_address",
                                                 "location",
                                                 "license_start_date",   
                                                 "location_end_date"
                                                ]]

In [44]:
cleaned_complete_coffee_df = cleaned_complete_coffee_df.rename(columns={"zip": "zip_code",
                                                                        "trade_name": "dba_name",
                                                                        "business_legal_name": "ownership_name",
                                                                        "city_state_zip": "city",
                                                                        "license_start_date": "location_start_date"
                                                                        })

In [49]:
cleaned_complete_coffee_df["dba_name"] = cleaned_complete_coffee_df["dba_name"].str.title()
cleaned_complete_coffee_df["ownership_name"] = cleaned_complete_coffee_df["ownership_name"].str.title()
cleaned_complete_coffee_df["city"] = cleaned_complete_coffee_df["city"].str.title()
cleaned_complete_coffee_df["street_address"] = cleaned_complete_coffee_df["street_address"].str.title()

In [None]:
for row in cleaned_complete_coffee_df.iterrows():
    row[1][0] = int(row[1][0][:5])

In [60]:
cleaned_complete_coffee_df.to_csv("data/cleaned_seattle_coffee_shop_data.csv", index=False, header=True)