## Library Imports

In [1]:
# data
from pathlib import Path
import sqlite3
from datetime import datetime
import datetime as dt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

# offline charting
import plotly.io as pio

pio.renderers.default = "notebook"
pio.kaleido.scope.default_format = "svg"

# visuals

# date and time

# sql

# system

In [2]:
%%html
<style >
td {
    font-size: 18px
}
</style >

In [3]:
# create our initial db file
Path("coffee.db").touch()

# create a db connection
connection = sqlite3.connect("coffee.db")

# cursor - database iterator
c = connection.cursor()

# create a sample table to verify
# c.execute('''CREATE TABLE sample_table (u_id int, email text)''')

# load the data into a dataframe
customers = pd.read_csv("customer.csv")

# write df to a sqlite table
customers.to_sql(
    "customers", connection, if_exists="append", index=False
)  # 2246 results

# data for our second table/df
sales = pd.read_csv("2019_04_sales_reciepts.csv")

# second table
sales.to_sql("sales", connection, if_exists="append",
             index=False)  # 49894 results

# verify
# fetchall returns an array of tuples
# c.execute('''SELECT *  FROM customers''').fetchall()

# verify second table
# c.execute('''SELECT *  FROM sales''').fetchall()

# join our two tables
c.execute(
    """SELECT * FROM sales s LEFT JOIN customers c ON c.customer_id = s.customer_id"""
)
# c.fetchall()

<sqlite3.Cursor at 0x7f6cc2b21ab0>

In [4]:
# import our joined sql tables into a dataframe
coffee = pd.read_sql(
    """SELECT * FROM sales s LEFT JOIN customers c ON c.customer_id = s.customer_id""",
    connection,
)

# delete duplicate column
coffee = coffee.loc[:, ~coffee.columns.duplicated()]

In [5]:
coffee = coffee.fillna(0)

In [6]:
coffee.rename(
    columns={
        "sales_outlet_id": "shop_id",
        "instore_yn": "instore_purchase",
        "promo_item_yn": "promo_item",
        "customer_first-name": "customer_name",
        "customer_email": "email",
        "loyalty_card_number": "loyalty_id",
    },
    inplace=True,
)
# coffee.columns

Index(['transaction_id', 'transaction_date', 'transaction_time', 'shop_id',
       'staff_id', 'customer_id', 'instore_purchase', 'order', 'line_item_id',
       'product_id', 'quantity', 'line_item_amount', 'unit_price',
       'promo_item', 'home_store', 'customer_name', 'email', 'customer_since',
       'loyalty_id', 'birthdate', 'gender', 'birth_year'],
      dtype='object')

In [7]:
coffee.loc[coffee["instore_purchase"] == "Y", "instore_purchase"] = True
coffee.loc[coffee["instore_purchase"] == "N", "instore_purchase"] = False
coffee["instore_purchase"] = coffee["instore_purchase"].astype(bool)

In [8]:
coffee["transaction_date"] = pd.to_datetime(coffee["transaction_date"])
coffee["transaction_time"] = pd.to_datetime(coffee["transaction_time"])
coffee["birthdate"] = pd.to_datetime(coffee["birthdate"])

# special formating for year
coffee["birth_year"] = coffee["birth_year"].astype(int)
coffee.loc[
    coffee["birth_year"] == 0, "birth_year"
] = 1900  # placeholder for Missing Values
coffee["birth_year"] = pd.to_datetime(coffee["birth_year"], format="%Y")

# verify
#coffee[["transaction_date", "transaction_time",
#        "birthdate", "birth_year"]].head(5)

In [9]:
# Misc Cleaning - converting home store datatype
coffee["home_store"] = coffee["home_store"].astype(int)

In [10]:
# Stores
# coffee["shop_id"].unique()

In [11]:
coffee_inperson = coffee[coffee["instore_purchase"] == True]
coffee_online = coffee[coffee["instore_purchase"] == False]


store3 = coffee[coffee["shop_id"] == 3]
store3_inperson = coffee.loc[
    (coffee["shop_id"] == 3) & (coffee["instore_purchase"] == True)
]
store3_online = coffee.loc[
    (coffee["shop_id"] == 3) & (coffee["instore_purchase"] == False)
]

store5 = coffee[coffee["shop_id"] == 5]
store5_inperson = coffee.loc[
    (coffee["shop_id"] == 5) & (coffee["instore_purchase"] == True)
]
store5_online = coffee.loc[
    (coffee["shop_id"] == 5) & (coffee["instore_purchase"] == False)
]

store8 = coffee[coffee["shop_id"] == 8]
store8_inperson = coffee.loc[
    (coffee["shop_id"] == 8) & (coffee["instore_purchase"] == True)
]
store8_online = coffee.loc[
    (coffee["shop_id"] == 8) & (coffee["instore_purchase"] == False)
]

In [12]:
# inperson transaction_date
dvolume_ip_fig = make_subplots(
    rows=2,
    cols=2,
    start_cell="top-left",
    subplot_titles=("Store 3", "Store 5", "Store 8", "All Stores"),
)

dvolume_ip_fig.add_trace(
    go.Histogram(x=store3_inperson["transaction_date"].dt.day_name()), row=1, col=1
)

dvolume_ip_fig.add_trace(
    go.Histogram(x=store5_inperson["transaction_date"].dt.day_name()), row=1, col=2
)

dvolume_ip_fig.add_trace(
    go.Histogram(x=store8_inperson["transaction_date"].dt.day_name()), row=2, col=1
)

dvolume_ip_fig.add_trace(
    go.Histogram(x=coffee_inperson["transaction_date"].dt.day_name()), row=2, col=2
)

dvolume_ip_fig.update_layout(
    title_text="Store In-Person Daily Volume", title_x=0.5)

dvolume_ip_fig.update_layout(
    yaxis=dict(range=[0, 1800]),
    yaxis2=dict(range=[0, 1800]),
    yaxis3=dict(range=[0, 1800]),
    yaxis4=dict(range=[0, 4500]),
)


dvolume_ip_fig = dvolume_ip_fig.update_layout(
    yaxis=dict(range=[0, 1800]),
    yaxis2=dict(range=[0, 1800]),
    yaxis3=dict(range=[0, 1800]),
    yaxis4=dict(range=[0, 4500]),)

# dvolume_ip_fig.show("notebook")  # interactive nbviewer
# dvolume_ip_fig.show(renderer="svg", width=1000, height=800) # svg static for github

In [13]:
# inperson transaction_date
dvolume_ol_fig = make_subplots(
    rows=2,
    cols=2,
    start_cell="top-left",
    subplot_titles=("Store 3", "Store 5", "Store 8", "All Stores"),
)

dvolume_ol_fig.add_trace(
    go.Histogram(x=store3_online["transaction_date"].dt.day_name()), row=1, col=1
)

dvolume_ol_fig.add_trace(
    go.Histogram(x=store5_online["transaction_date"].dt.day_name()), row=1, col=2
)

dvolume_ol_fig.add_trace(
    go.Histogram(x=store8_online["transaction_date"].dt.day_name()), row=2, col=1
)

dvolume_ol_fig.add_trace(
    go.Histogram(x=coffee_online["transaction_date"].dt.day_name()), row=2, col=2
)

dvolume_ol_fig.update_layout(
    title_text="Store Online Daily Volume", title_x=0.5)

dvolume_ol_fig = dvolume_ol_fig.update_layout(
    yaxis=dict(range=[0, 1800]),
    yaxis2=dict(range=[0, 1800]),
    yaxis3=dict(range=[0, 1800]),
    yaxis4=dict(range=[0, 4500]),
)

# dvolume_ol_fig.show("notebook")  # interactive nbviewer
# dvolume_ol_fig.show(renderer="svg", width=1000, height=800) # svg static for github

In [14]:
# inperson transaction_time
hvolume_ip_fig = make_subplots(
    rows=2,
    cols=2,
    start_cell="top-left",
    subplot_titles=("Store 3", "Store 5", "Store 8", "All Stores"),
)


hvolume_ip_fig.add_trace(go.Histogram(
    x=store3_inperson["transaction_time"].dt.hour), row=1, col=1)

hvolume_ip_fig.add_trace(go.Histogram(
    x=store5_inperson["transaction_time"].dt.hour), row=1, col=2)

hvolume_ip_fig.add_trace(go.Histogram(
    x=store8_inperson["transaction_time"].dt.hour), row=2, col=1)

hvolume_ip_fig.add_trace(go.Histogram(
    x=coffee_inperson["transaction_time"].dt.hour), row=2, col=2)

hvolume_ip_fig.update_layout(
    title_text="Store In-Person Hourly Volume",
    title_x=0.5,
    xaxis={"dtick": 1},
    xaxis2={"dtick": 1},
    xaxis3={"dtick": 1},
)

hvolume_ip_fig = hvolume_ip_fig.update_layout(
    yaxis=dict(range=[0, 1200]),
    yaxis2=dict(range=[0, 1200]),
    yaxis3=dict(range=[0, 1200]),
    yaxis4=dict(range=[0, 3500]),
)


# hvolume_ip_fig.show("notebook")  # interactive nbviewer
# hvolume_ip_fig.show(renderer="svg", width=1000, height=800) # svg static for github

In [15]:
# online transaction_time
hvolume_ol_fig = make_subplots(
    rows=2,
    cols=2,
    start_cell="top-left",
    subplot_titles=("Store 3", "Store 5", "Store 8", "All Stores"),
)

hvolume_ol_fig.add_trace(go.Histogram(
    x=store3_online["transaction_time"].dt.hour), row=1, col=1)

hvolume_ol_fig.add_trace(go.Histogram(
    x=store5_online["transaction_time"].dt.hour), row=1, col=2)

hvolume_ol_fig.add_trace(go.Histogram(
    x=store8_online["transaction_time"].dt.hour), row=2, col=1)

hvolume_ol_fig.add_trace(go.Histogram(
    x=coffee_online["transaction_time"].dt.hour), row=2, col=2)

hvolume_ol_fig.update_layout(
    title_text="Store Online Hourly Volume",
    title_x=0.5,
    xaxis={"dtick": 1},
    xaxis2={"dtick": 1},
    xaxis3={"dtick": 1},
)

hvolume_ol_fig = hvolume_ol_fig.update_layout(
    yaxis=dict(range=[0, 1200]),
    yaxis2=dict(range=[0, 1200]),
    yaxis3=dict(range=[0, 1200]),
    yaxis4=dict(range=[0, 3500]),
)


# hvolume_ol_fig.show("notebook")  # interactive nbviewer
# hvolume_ol_fig.show(renderer="svg", width=1000, height=800) # svg static for github

## Single Store Comparison - Store 3

In [16]:
# store 3 all comparison
store3_all = make_subplots(
    rows=2,
    cols=2,
    start_cell="top-left",
    subplot_titles=(
        "In-Person Hourly",
        "In-Person Daily Volume",
        "Online Hourly Volume",
        "Online Daily Volume",
    ),
)


store3_all.add_trace(go.Histogram(
    x=store3_inperson["transaction_time"].dt.hour), row=1, col=1)

store3_all.add_trace(
    go.Histogram(x=store3_inperson["transaction_date"].dt.day_name()), row=1, col=2
)

store3_all.add_trace(go.Histogram(
    x=store3_online["transaction_time"].dt.hour), row=2, col=1)

store3_all.add_trace(
    go.Histogram(x=store3_online["transaction_date"].dt.day_name()), row=2, col=2
)

store3_all.update_layout(
    title_text="Store 3", title_x=0.5, xaxis={"dtick": 1}, xaxis3={"dtick": 1}
)

store3_all = store3_all.update_layout(yaxis=dict(range=[0, 1200]), yaxis3=dict(range=[0, 1200]))


# store3_all.show("notebook")  # interactive nbviewer
# store3_all.show(renderer="svg", width=1000, height=800) # svg static for github

## Staff contribution ranking
<a id='staff_rank'></a>

In [17]:
# load in our staff data
staff = pd.read_csv("staff.csv")
# drop two misformed empty columns
staff.drop(staff.columns[[6, 7]], axis=1, inplace=True)

# combine our staff name columns into one
staff["staff_name"] = staff["first_name"] + " " + staff["last_name"]
# drop redundant naming columns
staff.drop(staff.columns[[1, 2]], axis=1, inplace=True)

# merge
coffee = pd.merge(left=coffee, right=staff)
# coffee.columns

In [18]:
# get all staff ids
# coffee["staff_id"].unique()

## Finding Total Sales 

In [19]:
total_sales = []
names = []
pie = {}

for staff_id in coffee["staff_id"].unique():
    """
    Takes an array/series of staff id's and calculates the total amount of sales by that staff memeber
    uses quantity x unit_price
    """
    # get staff id
    current_staff = coffee["staff_id"] == staff_id

    # get staff name & store in list
    name = coffee[current_staff]["staff_name"]
    names.append(name.iloc[0])
    name = name.iloc[0]

    # find total sales by staff
    cs_sales = coffee[current_staff]["unit_price"] * \
        coffee[current_staff]["quantity"]
    cs_sales_total = cs_sales.sum()

    # calculate sum & store in list
    total_sales.append(cs_sales_total)
    pie[name] = cs_sales_total


# convert into a dataframe
total_sales_by_staff = pd.DataFrame(
    {"staff_name": names, "total_sales": total_sales})
# verify
# total_sales_by_staff

In [20]:
# setup up our chart
total_sales_fig = make_subplots(rows=1, cols=1)

total_sales_fig.add_trace(
    go.Bar(
        x=total_sales_by_staff["staff_name"],
        y=total_sales_by_staff["total_sales"],
        marker={
            "color": total_sales_by_staff["total_sales"], "colorscale": "agsunset"},
    )
)

total_sales_fig = total_sales_fig.update_layout(
    title_text="Total Sales Per Staff Member",
    title_x=0.5,
    barmode="stack",
    xaxis={"categoryorder": "total descending"},
)

# total_sales_fig.show("notebook")  # interactive nbviewer
# total_sales_fig.show(renderer="svg", width=1000, height=800) # svg static for github

In [21]:
top_staff_fig = make_subplots(rows=1, cols=1)

top_staff_fig.add_trace(
    go.Bar(
        x=total_sales_by_staff["staff_name"][0:5],
        y=total_sales_by_staff["total_sales"][0:5],
        marker={
            "color": total_sales_by_staff["total_sales"][0:5],
            "colorscale": "agsunset",
        },
    )
)

top_staff_fig = top_staff_fig.update_layout(
    title_text="Top 5 Staff By Total Sales",
    title_x=0.5,
    barmode="stack",
    xaxis={"categoryorder": "total descending"},
)

# top_staff_fig.show("notebook")  # interactive nbviewer
# top_staff_fig.show(renderer="svg", width=1000, height=800) # svg static for github

In [22]:
last_sales = total_sales_by_staff.sort_values(by="total_sales")

bottom_staff_fig = make_subplots(rows=1, cols=1)

bottom_staff_fig.add_trace(
    go.Bar(
        x=last_sales["staff_name"][0:5],
        y=last_sales["total_sales"][0:5],
        marker={"color": last_sales["total_sales"]
                [0:5], "colorscale": "agsunset"},
    )
)

bottom_staff_fig = bottom_staff_fig.update_layout(
    title_text="Bottom 5 Staff By Total Sales",
    title_x=0.5,
    barmode="stack",
    xaxis={"categoryorder": "total descending"},
)

# bottom_staff_fig.show("notebook")  # interactive nbviewer
# bottom_staff_figshow(renderer="svg", width=1000, height=800) # svg static for github

In [23]:
# fig = px.pie(df, values='pop', names='country', title='Population of European continent')

staff_percents_fig = make_subplots(rows=1, cols=1)

staff_percents_fig.add_trace(go.Pie(values=total_sales, labels=names))

staff_percents_fig.update_traces(hoverinfo="label+value", textinfo="percent")

staff_percents_fig = staff_percents_fig.update_layout(
    title_text="Percentage of Sales By Staff Member",
    title_x=0.5,
    barmode="stack",
    xaxis={"categoryorder": "total descending"},
)

# staff_percents_fig.show("notebook")  # interactive nbviewer