In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
plt.style.available

In [None]:
plt.style.use('bmh')

In [None]:
from datetime import datetime

In [None]:
from IPython.core.display import HTML, display

CSS = """
@import url("https://fonts.googleapis.com/css?family=Fira Mono");

.container {
    width: 97% !important;
    font-size: 100%;
}

span {
    font-family: 'Fira Mono'; color: black;
}

div.output_area pre {
    font-family: 'Fira Mono'; font-size:100%; color:blue;
}

table.dataframe {
    border-collapse: collapse;
    border: none;
}
table.dataframe tr {
    border: none;
}
table.dataframe td, table.dataframe th {
    margin: 0;
    border: 2px solid #ccf;
    background-color: #f4f4ff;
    padding-left: 0.25em;
    padding-right: 0.25em;
}
table.dataframe th: not(:empty) {
    border: 2px solid #ccf;
    background-color: #f4f4ff;
    text-align: center;
    font-weight: normal;   
}
table.dataframe tr:nth-child(2) th empty {
    border-left: none;
    border-right: 2px dashed #888;
}
table.dataframe td {
    border: 2px solid #ccf;
    background-color: #f4f4ff;
}
"""

HTML(f'<style>{CSS}</style>')

In [None]:
start_date, end_date = '2010-01-01', '2021-01-01'

product_list = ['television', 'book', 'hoodie', 'sandals', 'kale chips', 'rug', 'cutlery set', 'mouse pad', 'toy train', 'gravel']

location_list = ['DE', 'CT', 'NY', 'NJ']
location_prob = [.18, .23, .25, .34]

employee_list = ['Sam', 'Leslie', 'Jes', 'Pat', 'Jo']
employee_prob = [.3, .3, .05, .2, .15]

In [None]:
current_date = datetime.now()
print(current_date)

In [None]:
N = 37_200

In [None]:
np.random.RandomState(seed=7)

df = pd.DataFrame.from_dict(
    {
        'trans_date': np.random.choice(np.arange(start_date, end_date, dtype='datetime64[D]'), size=N),
        'location': np.random.choice(location_list, size=(N,), p=location_prob),
        'employee': np.random.choice(employee_list, size=(N,), p=employee_prob),
        'product': np.random.choice(product_list, size=(N,),),    
        'sales_hrs': np.random.choice(np.arange(1, 25, 1), size=N),
        'sales_tot': np.random.normal(loc=100, scale=2, size=N)
    }
)

df['doc_date'] = current_date.strftime("%Y-%m-%d %H:%M:%S")
df['actuals'] = np.where(df['trans_date'] <= current_date, True, False)
df.loc[df['trans_date'].dt.year == 2015, ['sales_tot']] = df['sales_tot'] * .82
df.loc[(df['employee'] == 'Jes') & (df['trans_date'].between("2012-01-01", "2014-06-01")), ['sales_hrs']] = 0
df.dropna(axis=0, inplace=True)

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.sample()

In [None]:
df.describe()

In [None]:
df['actuals'].value_counts(dropna=False)

In [None]:
df['actuals'].value_counts(dropna=False).plot.pie();

In [None]:
df['employee'].value_counts(dropna=False).plot.bar(rot=0);

In [None]:
df['product'].value_counts(dropna=False).plot.bar(rot=0);

In [None]:
df['sales_tot'].plot.hist(grid=False);

In [None]:
df.boxplot(column='sales_hrs', by='employee');

In [None]:
df.groupby('trans_date').sum()

In [None]:
df.groupby('trans_date').sum().plot();

In [None]:
df.groupby('trans_date').sum().plot(subplots=True, figsize=(8,10), lw=.5);

In [None]:
df.groupby(pd.Grouper(key='trans_date', freq='Y')).sum()\
.plot(subplots=True, marker='o', markerfacecolor='k', lw=3, ls='--', figsize=(8,10));

In [None]:
df.groupby([pd.Grouper(key='trans_date', freq='Y'), 'product'])['sales_tot'].sum().unstack('product')\
.plot(subplots=True, marker='o', markerfacecolor='k', lw=3, ls='--', figsize=(8,16));