In [52]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
import plotly.graph_objects as go

In [60]:
engine = create_engine('postgresql://postgres:n0import4@localhost:5432/final-project')
sales_df = pd.read_sql('SELECT * FROM sales', engine.connect(), parse_dates=['date'])
sales_df['month_year'] = sales_df['date'].apply(lambda x: x.strftime("%Y-%m"))

In [79]:
def runQuery(sql):
    result = engine.connect().execute((text(sql)))
    return pd.DataFrame(result.fetchall(), columns=result.keys())

def filter_df(df, start_date, end_date):
    mask1 = df.date > start_date
    mask2 = df.date < end_date
    return df[mask1 & mask2]

In [63]:
office_names_query = '''
    SELECT DISTINCT name from locations
    JOIN sales ON sales.office_id = locations.id
'''
office_names = runQuery(office_names_query)

In [76]:
start_date = pd.to_datetime('2015-10-26')
end_date = pd.to_datetime('2018-05-05')

In [80]:
dff = filter_df(sales_df, start_date, end_date)

### Plotting stacked bar chart for number of sales per office

In [48]:
dates = dff.month_year.sort_values().unique()
office_ids = dff.office_id.unique()
sells = dff.groupby('office_id').month_year.value_counts()
sells[0].sort_index().values

array([2407, 2233, 2190, 2356, 2289, 2496, 2406, 2673, 2459, 2517, 2517,
       2420, 2715, 2353, 2349, 2355, 2679, 2468, 2685, 2333, 2976, 2619,
       2358, 2484, 2530, 2743, 2608, 1907, 1816, 1796])

In [69]:
fig = go.Figure(data=[
    go.Bar(name=office_names.loc[idx, 'name'], x=dates, y=sells[idx].sort_index().values) for idx in sorted(office_ids)
])
fig.update_layout(barmode='stack')
fig.show()

### Plotting revenue per office

In [85]:
dff.groupby(['office_id', 'month_year'])['sale_amount'].sum()

office_id  month_year
0          2015-11       297258301.0
           2015-12       291886847.0
           2016-01       288330546.0
           2016-02       324817246.0
           2016-03       360589217.0
                            ...     
3          2017-12       169163668.0
           2018-01        78680367.0
           2018-02        96149422.0
           2018-03        88538786.0
           2018-04        90432857.0
Name: sale_amount, Length: 120, dtype: float64

In [89]:
dates = dff.month_year.sort_values().unique()
office_ids = dff.office_id.unique()
revenue = dff.groupby(['office_id', 'month_year'])['sale_amount'].sum()
revenue[0].sort_index().values

array([2.97258301e+08, 2.91886847e+08, 2.88330546e+08, 3.24817246e+08,
       3.60589217e+08, 3.72525395e+08, 3.35436750e+08, 3.80056829e+08,
       3.22399278e+08, 3.89619815e+08, 3.89619815e+08, 3.47302312e+08,
       3.72794991e+08, 3.35921870e+08, 3.37689284e+08, 3.16948587e+08,
       4.46356376e+08, 3.39401876e+08, 4.18228725e+08, 3.16495319e+08,
       2.90981509e+08, 3.65174272e+08, 3.11467644e+08, 2.85386216e+08,
       3.25810725e+08, 4.20731522e+08, 4.72524731e+08, 3.82208715e+08,
       3.09866036e+08, 3.67908297e+08])

In [90]:
fig = go.Figure(data=[
    go.Bar(name=office_names.loc[idx, 'name'], x=dates, y=revenue[idx].sort_index().values) for idx in sorted(office_ids)
])
fig.update_layout(barmode='stack')
fig.show()