In [1]:
from piper import piper
from piper.verbs import *
from piper.pandas import *

In [2]:
# https://youtu.be/FI4HkCzMaIk

# Data import

## Read multiple excel sheets within a workbook

In [4]:
xl_file = 'inputs/Current_Workbook_Append_XelPlus.xlsx'

sheets = read_excel_sheets(xl_file, return_type='list')
logger.info(f'Sheets: {sheets}')

Sheets: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Report']


In [5]:
df = read_excel_sheets(xl_file, sheets=sheets[:-1], usecols='A:F', 
                       include_sheet=False, info=False, skiprows=2)
head(df)

329 rows, 6 columns


Unnamed: 0,Sales Document,Customer Name,Article Description,Document Date,Quantity,Sales Value
0,71102,Dellicia,Unisex tank top (white),2020-01-11,1,1458.4
1,71107,Dellicia,Men type T (Brown) simple,2020-01-01,341,4433.28
2,71108,Erma,Men type T simple (white),2020-01-21,265,3180.4
3,71110,Erma,Men dress shirt (black),2020-01-11,351,8775.34


## Filter and add calculated columns (assign)

In [6]:
%%piper
df <- df 
>> clean_columns()
>> where("sales_document != 'Grand Total'") 
>> assign(sales_document = lambda x: x.sales_document.ffill(),
         customer_name = lambda x: x.customer_name.ffill(),
         month=lambda x: x.document_date.dt.strftime('%Y-%m'))
.reset_index(drop=True)

## Check for duplicates

In [7]:
%%piper 
df
>> duplicated(['customer_name', 'article_description'])
>> head()

322 rows, 8 columns


Unnamed: 0,duplicate,sales_document,customer_name,article_description,document_date,quantity,sales_value,month
20,True,71137,Dellicia,Laptop bag (black),2020-01-11,370,22200.2,2020-01
63,True,70330,Dellicia,Laptop bag (black),2020-02-22,326,19560.1,2020-02
69,True,70450,Dellicia,Laptop bag (black),2020-02-02,275,16500.4,2020-02
125,True,69706,Dellicia,Laptop bag (black),2020-03-13,397,23820.37,2020-03


## Perform a lookup of a secondary (table)

In [8]:
data = {'customer': ['Dellicia', 'Erma'], 'department': ['Sales', 'IT']}
xref = pd.DataFrame(data)
xref

Unnamed: 0,customer,department
0,Dellicia,Sales
1,Erma,IT


In [9]:
%%piper
df 
>> lookup(df2=xref, left_on='customer_name', right_on='customer')
>> move_column('department', 'after', 'customer_name')
>> where("month == '2020-02'")
>> head(5)

merge function 'how' parameter defaulted to 'left'
46 rows, 9 columns


Unnamed: 0,sales_document,customer_name,department,article_description,document_date,quantity,sales_value,month,customer
46,70003,Erma,IT,Women crop top (black),2020-02-21,411,4110.08,2020-02,Erma
47,70007,Erma,IT,Women dress (black) long,2020-02-12,187,11594.4,2020-02,Erma
48,70043,Dellicia,Sales,Women crop top (black),2020-02-11,254,2540.19,2020-02,Dellicia
49,70049,Erma,IT,Smartphone case simple,2020-02-11,439,8780.27,2020-02,Erma
50,70055,Dellicia,Sales,Laptop bag (red),2020-02-12,377,22620.4,2020-02,Dellicia


## Summary, count number of rows retrieved by 'month'

In [10]:
%%piper 
counts(df, 'month', sort_values=True)
>> adorn(ignore_row_index=True)

Unnamed: 0,n
0,46
1,46
2,46
3,46
4,46
5,46
6,46
7,All


## Summary, count by customer

In [11]:
count(df, 'customer_name', add_total=True)

Unnamed: 0,index,n
0,Dellicia,175
1,Erma,147
2,Total,322


## Summary, count of sales, unique sales docs by month

In [12]:
%%piper

df.groupby(['month']).agg(total_sales=('sales_value', 'sum'),
                          sales_doc_unique_count=('sales_document', 'nunique'))
>> clean_columns(replace_char=('_', ' '), title=True)
>> adorn(axis='row').T.astype(int)

Unnamed: 0,2020-01,2020-02,2020-03,2020-04,2020-05,2020-06,2020-07,All
Total Sales,353193,344040,318961,319205,248480,381421,381421,2346723
Sales Doc Unique Count,36,45,44,44,45,36,36,286
