## Importing dependencies and environmental variables

In [1]:
# Dependencies
import csv as csv
import http.client 
import json
import numpy as np
import os
import pandas as pd
import pprint
import psycopg2
import requests
import sqlalchemy
from bs4 import BeautifulSoup as bs
from dotenv import load_dotenv
from selenium import webdriver
from sodapy import Socrata
from splinter import Browser
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, Float, Date

In [2]:
# Loading homeowrk7.env so that the environment variables can be used in the engine object url
load_dotenv("project3.env")
# Initializing variables to hold each environmet varaible
username=os.environ.get("USERNAME")
password=os.environ.get("PASSWORD")
host=os.environ.get("HOST")
port=os.environ.get("PORT")
database=os.environ.get("DATABASE")
google_key = os.environ.get('GOOGLE_API_KEY')
yelp_client_id = os.environ.get('YELP_CLIENT_ID')
yelp_key = os.environ.get('YELP_API_KEY')
sf_data_key = os.environ.get('SFDATAAPPTOKEN')

In [118]:
#pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Flask app Heroku DB connection stuff

In [None]:
app = Flask(__name__)

uri = os.environ.get("URI")
print(uri)
app.config["SQLALCHEMY_DATABASE_URI"] = 'postgres://gaoafzhoycjoin:3e7bfe74080d2238fa6ef14ee67e403af421b3d7d5cb45f12aa5df5fdbf1968b@ec2-174-129-43-40.compute-1.amazonaws.com:5432/dfu7vggjmve1rn'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
# # Binding the instance of flask_sqlalchemy.SQLAlchemy to this specific flask app
db = SQLAlchemy(app)
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(db.engine, reflect=True)
# Initializing a variable with the created engine
engine = db.engine
# Initializing a variable with the connection resource
connection = engine.connect()
# Create our session (link) from Python to the DB
session = Session(engine)
inspector = inspect(engine)

## Querying the San Francisco Registered Business API for Startbucks locations

In [3]:
client = Socrata("data.sfgov.org", sf_data_key)

In [149]:
starbucks_results = client.get("g8m3-pdis", where="ownership_name = 'Starbucks Corporation'")

In [150]:
starbucks_results_df = pd.DataFrame.from_records(starbucks_results)

## Querying the San Francisco Registered Business API for Peet's Coffee locations


In [158]:
peets_coffee_results = client.get("g8m3-pdis", where='dba_name LIKE "%Peet\'s Coffee%"')

In [159]:
peets_coffee_results_df = pd.DataFrame.from_records(peets_coffee_results)

## Querying the San Francisco Registered Business API for Philz Coffee locations

In [63]:
philz_coffee_results = client.get("g8m3-pdis", where="ownership_name LIKE '%Philz%'")

In [64]:
philz_coffee_results_df = pd.DataFrame.from_records(philz_coffee_results)

## Querying the San Francisco Registered Business API for Blue Bottle Coffee locations

In [71]:
blue_btl_coffee_results = client.get("g8m3-pdis", where="dba_name LIKE '%Blue Bottle%'")

In [72]:
blue_btl_coffee_results_df = pd.DataFrame.from_records(blue_btl_coffee_results)

## Querying the San Francisco Registered Business API for Sightglass Coffee locations

In [77]:
sightglass_coffee_results = client.get("g8m3-pdis", where="dba_name LIKE '%Sightglass Coffee%'")

In [78]:
sightglass_coffee_results_df = pd.DataFrame.from_records(sightglass_coffee_results)

## Querying the San Francisco Registered Business API for Ritual Coffee locations

In [80]:
ritual_coffee_results = client.get("g8m3-pdis", where="dba_name LIKE '%Ritual Coffee%'")

In [81]:
ritual_coffee_results_df = pd.DataFrame.from_records(ritual_coffee_results)

## Querying the San Francisco Registered Business API for Four Barrel Coffee locations

In [90]:
four_barrel_coffee_results = client.get("g8m3-pdis", where="dba_name LIKE '%Fourbarrel%'")

In [91]:
four_barrel_results_df = pd.DataFrame.from_records(four_barrel_coffee_results)

## Querying the San Francisco Registered Business API for bourgie coffee shops

In [143]:
bourgie_coffee_shop_list = ["Trouble Coffee",
                            "Andytown Coffee",
                            "Garden House Coffee",
                            "Snowbird Coffee",
                            "Flywheel Coffee",
                            "Fifty/fifty",
                            "The Mill ",
                            "Wrecking Ball Coffee",
                            "Lady Falcon Coffee",
                            "Saint Frank",
                            "Linea Caffe",
                            "George and Lennie",
                            "Coffee Cultures",
                            "Beacon Coffee",
                            "Sextant Coffee",
                            "Equator Coffee",
                            "Caffe Trieste",
                            "Chapel Hill Coffee",
                            "Mazarine Coffee",
                            "La Capra Coffee",
                            "Provender",
                            "Farleys",
                            "Reveille Coffee",
                            "Caffe Puccini"
                           ]

In [144]:
bourgie_coffee_df = pd.DataFrame()
for coffee_shop in bourgie_coffee_shop_list:
    query = client.get("g8m3-pdis", where=f"dba_name LIKE '%{coffee_shop}%'")
    results_df = pd.DataFrame.from_records(query)
    bourgie_coffee_df = pd.concat([bourgie_coffee_df, results_df], ignore_index=True, sort=True)

Trouble Coffee
Andytown Coffee
Garden House Coffee
Snowbird Coffee
Flywheel Coffee
Fifty/fifty
The Mill 
Wrecking Ball Coffee
Lady Falcon Coffee
Saint Frank
Linea Caffe
George and Lennie
Coffee Cultures
Beacon Coffee
Sextant Coffee
Equator Coffee
Caffe Trieste
Chapel Hill Coffee
Mazarine Coffee
La Capra Coffee
Provender
Farleys
Reveille Coffee
Caffe Puccini


## Finding the correct "The Mill" coffee shop

In [112]:
the_mill_coffee_results = client.get("g8m3-pdis", where="dba_name LIKE '%Mill%' AND full_business_address LIKE '%Divis%'")

In [113]:
the_mill_coffee_results_df = pd.DataFrame.from_records(the_mill_coffee_results)

## Cleaning bourgie_coffee_df

In [147]:
bourgie_coffee_df = bourgie_coffee_df.drop([7, 11, 12, 13])

## Concatenating all coffee shop data frames together and cleaning them

In [161]:
complete_coffee_df = pd.DataFrame()
complete_coffee_df = pd.concat([starbucks_results_df, 
                                peets_coffee_results_df,
                                philz_coffee_results_df, 
                                blue_btl_coffee_results_df, 
                                sightglass_coffee_results_df,
                                ritual_coffee_results_df,
                                four_barrel_results_df,
                                the_mill_coffee_results_df,
                                bourgie_coffee_df
                               ], ignore_index=True, sort=True)

In [162]:
complete_coffee_df.columns

Index(['business_zip', 'certificate_number', 'city', 'dba_end_date', 'dba_name', 'dba_start_date', 'full_business_address', 'lic', 'lic_code_description', 'location', 'location_end_date', 'location_start_date', 'mail_city', 'mail_state', 'mail_zipcode', 'mailing_address_1', 'naic_code', 'naic_code_description', 'neighborhoods_analysis_boundaries', 'ownership_name', 'parking_tax', 'state', 'supervisor_district', 'transient_occupancy_tax', 'ttxid'], dtype='object')

In [164]:
cleaned_complete_coffee_df = complete_coffee_df[["business_zip",
                                             "city",
                                             "full_business_address",
                                             "neighborhoods_analysis_boundaries",
                                             "location",
                                             "location_start_date",
                                             "location_end_date",
                                             "lic_code_description"
                                            ]]

In [165]:
for row in cleaned_complete_coffee_df.iterrows():
    if row[1]["city"] == "San+francisco":
        row[1]["city"] = row[1]["city"].replace("+f", " F")

In [169]:
cleaned_complete_coffee_df.to_csv("coffee_shop_data.csv", index=False, header=True)