In [403]:
import numpy as np 
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')
import os

from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine

from psycopg2.extensions import register_adapter, AsIs

def addapt_numpy_float64(numpy_float64):
    return AsIs(numpy_float64)

def addapt_numpy_int64(numpy_int64):
    return AsIs(numpy_int64)

register_adapter(np.float64, addapt_numpy_float64)
register_adapter(np.int64, addapt_numpy_int64)

In [404]:
from config import yelp_api_key, darksky_api_key, PGHOST, PGDATABASE, PGUSER, PGPASSWORD
from restaurant_info import restaurantLocation
from weather import Weather

### Get Latitude & Longitude from Yelp API

In [405]:
search_business = 'The Counting Room' # Not the actual Restaurant 
location = 'Brooklyn, NY'

In [406]:
# Make Yelp API Call to get Latitude & Longitude for Business
rest_loc = restaurantLocation(search_business, location)
lat, long = rest_loc.get_lat_long()

Weather Location: The Counting Room


### Import / Clean / Prep File

In [407]:
w_start = '2017-01-01'
w_end = '2019-06-30'

# Restaurant File
sales_file = 'csv/rest_1_both_pos_by_check.csv'

# Complete Daily File
resy_file = 'csv/rest_1_sales_weather_merged_through_2019-07-01.csv'

# Weather File
weather_csv_file = f'csv/weather_{w_start}_to_{w_end}.csv'

In [408]:
 # Read in Sales File
data = pd.read_csv(sales_file, index_col = 'date', parse_dates=True)
sales_df = pd.DataFrame(data)

In [409]:
def prep_sales_df(df):
    
        # Dinner Only
        df = df[df.day_part == 'Dinner']
        
        # Fill NaN
        df.fillna(0, inplace=True)
        
        # Filter Out $0 Sales
        df = df[df.net_sales > 0]
        
        # Create Unique Check ID
        df['check_id'] = df.index.strftime('%Y%m%d') + '-' + df.index.strftime('%H%M') + '-' + \
                    df['check_no'].astype(int).astype(str) + '-' + df['revenue_center'].str[0] + '-' + pd.factorize(df['net_sales'])[0].astype(str)
         
        # Drop Covers, Day Part, & Check_No
        df = df.drop(['covers', 'day_part', 'check_no'], axis=1)
    
        df.index = df.index.normalize()
        
        return df
    
sales_df = prep_sales_df(sales_df)

In [410]:
sales_df.head()

Unnamed: 0_level_0,net_sales,revenue_center,check_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-02,7.34,Window,20170102-0511-3364-W-0
2017-01-02,30.0,Window,20170102-0525-3367-W-1
2017-01-02,111.0,PDR,20170102-0528-3369-P-2
2017-01-02,112.0,Dining Room,20170102-0532-3370-D-3
2017-01-02,148.0,Dining Room,20170102-0533-3371-D-4


In [411]:
# Send this Long Format File to CSV for Tableau
sales_df.to_csv('csv/rest_1_sales_by_check_long_0117_0619.csv')

In [412]:
sales_df.groupby([sales_df.index.year, 'revenue_center']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,net_sales
date,revenue_center,Unnamed: 2_level_1
2017,Bar,1080561.72
2017,Dining Room,2800349.31
2017,Outside,866862.75
2017,PDR,1210661.82
2017,Window,151524.71
2018,Bar,1074150.34
2018,Dining Room,2791638.47
2018,Outside,759544.82
2018,PDR,1225295.67
2018,Window,98340.87


In [413]:
dfw = pd.read_csv(weather_csv_file, index_col='date', parse_dates=True)

In [414]:
dfw.head()

Unnamed: 0_level_0,apparent_temperature,humidity,precip_intensity_max,precip_max_time,precip_prob,precip_type,pressure,summary,temperature,day_of_week,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-01-01,37.83,0.54,0.0,05:01AM,0.0,none,1028.26,clear-night,37.83,6,1
2017-01-02,35.58,0.92,0.0242,03:00PM,0.79,rain,1027.98,rain,39.06,0,1
2017-01-03,41.5,0.94,0.0913,05:00PM,0.77,rain,1000.08,rain,43.19,1,1
2017-01-04,36.81,0.4,0.0176,12:00AM,0.0,rain,1002.55,clear-night,42.29,2,1
2017-01-05,27.11,0.48,0.0,05:01AM,0.0,none,1014.61,cloudy,31.36,3,1


In [415]:
dfr = pd.read_csv(resy_file, index_col='date', parse_dates=True)
dfr.head()

Unnamed: 0_level_0,inside_sales,outside_sales,inside_covers,outside_covers,reserved_covers,walkin_covers,waitlist_covers,no_show_covers,no_show_parties,apparent_temperature,humidity,precip_intensity_max,precip_max_time,precip_prob,precip_type,pressure,summary,temperature
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-02,13159.84,0.0,174,0,106,26,42,17,6,35.58,0.92,0.0242,15:00:00,0.79,rain,1027.98,rain,39.06
2017-01-03,12442.11,0.0,181,0,119,31,31,14,4,41.5,0.94,0.0913,17:00:00,0.77,rain,1000.08,rain,43.19
2017-01-04,12927.64,0.0,174,0,131,17,26,5,2,36.81,0.4,0.0176,00:00:00,0.0,rain,1002.55,clear-night,42.29
2017-01-05,14457.79,0.0,191,0,138,25,28,4,2,27.11,0.48,0.0,05:01:00,0.0,none,1014.61,cloudy,31.36
2017-01-06,15331.97,0.0,200,0,130,16,54,6,3,24.74,0.48,0.0019,12:00:00,0.0,snow,1022.8,partly-cloudy-night,29.42


### Connect Notebook to AWS Postgres Instance

In [416]:
# Connect to RDS-Postgres DB

def connect():
    
    # Set up a connection to the postgres server.
    conn_string = "host="+ PGHOST +" port="+ "5432" +" dbname="+ PGDATABASE +" user=" + PGUSER \
                  +" password="+ PGPASSWORD
    
    conn = psycopg2.connect(conn_string)
    print("Connected!")

    # Create a cursor object
    cursor = conn.cursor()
    
    return conn, cursor

conn, cursor = connect()

Connected!


### Create Sales & Reservations Tables

In [376]:
create_rev_center_table = """
    CREATE TABLE IF NOT EXISTS rev_center(
        id INTEGER PRIMARY KEY NOT NULL,
        name VARCHAR NOT NULL)
    """

create_check_table = """
    CREATE TABLE IF NOT EXISTS checks(
        check_id VARCHAR(36) PRIMARY KEY NOT NULL,
        date DATE,
        rev_center_id INTEGER REFERENCES rev_center(id),
        net_sales NUMERIC (7, 2)
    )
    """

create_resy_table = """
    CREATE TABLE IF NOT EXISTS reservations (
        id INTEGER PRIMARY KEY NOT NULL,
        date DATE,
        inside_covers INTEGER,
        outside_covers INTEGER,
        reserved_covers INTEGER,
        walkin_covers INTEGER,
        waitlist_covers INTEGER,
        no_show_covers INTEGER,
        no_show_parties INTEGER
    )
    """

cursor.execute(create_rev_center_table)
cursor.execute(create_check_table)
cursor.execute(create_resy_table)
conn.commit()

In [108]:
create_weather_table = """
        CREATE TABLE IF NOT EXISTS weather ( 
            DATE DATE, 
            date_id INT PRIMARY KEY, 
            apparent_temperature NUMERIC (4, 2), 
            humidity NUMERIC (3, 2), 
            precip_intensity_max NUMERIC (5, 4), 
            precip_max_time TIME, 
            precip_prob NUMERIC (3, 2), 
            precip_type TEXT, 
            pressure NUMERIC (6,2), 
            summary TEXT, 
            temperature NUMERIC (4, 2)
            )
            """

cursor.execute(create_weather_table)
conn.commit()

### Populate Databases

In [None]:
rev_centers = {'Dining Room': 1, 'Bar': 2, 'PDR': 3, 'Window': 4, 'Outside': 5}

In [80]:
def populate_rev_center(df):
    
    for r, i in rev_centers.items():
        cursor.execute("INSERT INTO rev_center (id, name) VALUES (%s, %s)", (i, r) )
        conn.commit()
    
populate_rev_center(sales_df)
                

In [377]:
def populate_checks(df):
    
    for row in range(len(df)):
        
        cursor.execute("INSERT INTO checks (check_id, date, rev_center_id, net_sales) VALUES (%s, %s, %s, %s)",
                      (df.iloc[row]['check_id'],
                       pd.to_datetime(df.index[row]),
                       rev_centers[df.iloc[row]['revenue_center']],
                       df.iloc[row]['net_sales']) )
        conn.commit()
        
populate_checks(sales_df)

In [420]:
def populate_reservations(df):
    
    for row in range(len(df)):
        
        cursor.execute("""
            INSERT INTO reservations (id, date, inside_covers, outside_covers, reserved_covers,
            walkin_covers, waitlist_covers, no_show_covers, no_show_parties) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
                (df.index[row].strftime('%Y%m%d'),
                pd.to_datetime(df.index[row]),
                df.iloc[row]['inside_covers'],
                df.iloc[row]['outside_covers'],
                df.iloc[row]['reserved_covers'],
                df.iloc[row]['walkin_covers'],
                df.iloc[row]['waitlist_covers'],
                df.iloc[row]['no_show_covers'],
                df.iloc[row]['no_show_parties']) )
        conn.commit()
        
populate_reservations(dfr)

In [96]:
def populate_weather(df):
    
    for row in range(len(df)):
        
        cursor.execute("""
            INSERT INTO weather (date, date_id, apparent_temperature, humidity, precip_intensity_max, precip_max_time,
            precip_prob, precip_type, pressure, summary, temperature) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
                (pd.to_datetime(df.index[row]),
                 df.iloc[row]['id'],
                 df.iloc[row]['apparent_temperature'],
                 df.iloc[row]['humidity'],
                 df.iloc[row]['precip_intensity_max'],
                 df.iloc[row]['precip_max_time'],
                 df.iloc[row]['precip_prob'],
                 df.iloc[row]['precip_type'],
                 df.iloc[row]['pressure'],
                 df.iloc[row]['summary'],
                 df.iloc[row]['temperature']) )
        conn.commit()

populate_weather(dfw)
        

In [98]:
conn.close()