## Importing dependencies and environmental variables

In [1]:
# Dependencies
import csv as csv
import http.client 
import json
import numpy as np
import os
import pandas as pd
import pprint
import psycopg2
import requests
import sqlalchemy
from bs4 import BeautifulSoup as bs
from dotenv import load_dotenv
from selenium import webdriver
from sodapy import Socrata
from splinter import Browser
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, Float, Date

In [2]:
# Loading homeowrk7.env so that the environment variables can be used in the engine object url
load_dotenv("project3.env")
# Initializing variables to hold each environmet varaible
username=os.environ.get("USERNAME")
password=os.environ.get("PASSWORD")
host=os.environ.get("HOST")
port=os.environ.get("PORT")
database=os.environ.get("DATABASE")
google_key = os.environ.get('GOOGLE_API_KEY')
yelp_client_id = os.environ.get('YELP_CLIENT_ID')
yelp_key = os.environ.get('YELP_API_KEY')
sf_data_key = os.environ.get('SFDATAAPPTOKEN')

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Querying the Los Angeles Active Business API for Startbucks locations

In [16]:
client = Socrata("data.lacity.org", sf_data_key)

In [17]:
starbucks_results = client.get("6rrh-rzua", where="business_name LIKE '%STARBUCKS%'")
starbucks_results_df = pd.DataFrame.from_records(starbucks_results)

## Querying the Los Angeles Active Business API for bourgie coffee shops

In [70]:
bourgie_coffee_shop_list = ["ALFRED COFFEE",
                            "BLUE BOTTLE COFFEE",
                            "PEETS COFFEE",
                            "COFFEE COMMISSARY",
                            "COGNOSCENTI COFFEE",
                            "ENDORFFEINE",
                            "G&B COFFEE",
                            "MENOTTI",
                            "GO GET EM TIGER",
                            "PARAMOUNT COFFEE PROJECT",
                            "RUBIES AND DIAMONDS",
                            "VERVE COFFEE ROASTERS",
                            "FOUND COFFEE",
                            "THE BOY & THE BEAR - COFFEE ROASTERY",
                            "HILLTOP COFFEE + KITCHEN",
                            "TACTILE COFFEE",
                            "NOSSA FAMILIA COFFEE",
                            "TIERRA MIA COFFEE COMPANY"
                           ]

In [71]:
bourgie_coffee_df = pd.DataFrame()
for coffee_shop in bourgie_coffee_shop_list:
    query = client.get("6rrh-rzua", where=f"dba_name LIKE '%{coffee_shop}%'")
    results_df = pd.DataFrame.from_records(query)
    bourgie_coffee_df = pd.concat([bourgie_coffee_df, results_df], ignore_index=True, sort=True)

## Concatenating all coffee shop data frames together and cleaning them

In [72]:
complete_coffee_df = pd.DataFrame()
complete_coffee_df = pd.concat([starbucks_results_df, 
                                bourgie_coffee_df
                               ], ignore_index=True, sort=True)

In [74]:
complete_coffee_df.to_csv("data/los_angeles_coffee_shop_data.csv", index=False, header=True)

## Formatting all result .csv's to have the same headers so they can be concatanated

In [12]:
complete_coffee_df = pd.read_csv("data/los_angeles_coffee_shop_data.csv")

In [13]:
complete_coffee_df.columns

Index(['business_name', 'city', 'council_district', 'dba_name', 'location_1', 'location_account', 'location_description', 'location_start_date', 'mailing_address', 'mailing_city', 'mailing_zip_code', 'naics', 'primary_naics_description', 'street_address', 'zip_code'], dtype='object')

In [14]:
complete_coffee_df["location"] = None
complete_coffee_df["location_end_date"] = None

In [15]:
cleaned_complete_coffee_df = complete_coffee_df[["zip_code",
                                                 "dba_name",
                                                 "business_name",
                                                 "city",
                                                 "street_address",
                                                 "location_1",                                                 
                                                 "location_start_date",     
                                                 "location_end_date"
                                                ]]

In [18]:
cleaned_complete_coffee_df = cleaned_complete_coffee_df.rename(columns={"business_name": "ownership_name",
                                           "location_1": "location",
                                          })

In [21]:
cleaned_complete_coffee_df["dba_name"] = cleaned_complete_coffee_df["dba_name"].str.title()
cleaned_complete_coffee_df["ownership_name"] = cleaned_complete_coffee_df["ownership_name"].str.title()
cleaned_complete_coffee_df["city"] = cleaned_complete_coffee_df["city"].str.title()
cleaned_complete_coffee_df["street_address"] = cleaned_complete_coffee_df["street_address"].str.title()

In [24]:
for row in cleaned_complete_coffee_df.iterrows():
    row[1][0] = int(row[1][0][:5])

In [26]:
cleaned_complete_coffee_df.to_csv("data/cleaned_los_angeles_coffee_shop_data.csv", index=False, header=True)