# Setup

In [1]:
# import pandas, numpy and additional dfxl functions
from piper import piper
from piper.verbs import *

piper version 0.0.7, last run: Friday, 26 February 2021 14:55:58


# Import CSV and 'clean'

In [2]:
f = 'inputs/JDE selected order data.tsv'
df = pd.read_csv(f, sep='\t', parse_dates=['orderdate'])
head(df, 2, info=False)

Unnamed: 0,co#,ordertype,ordertypedesc,order,orderdate,ordercount,linecount,shippedorders,shippedlines,invoicedorders,invoicedlines,sales_(eur)
0,8300,DC,Deemed Commission Invoice,20000387,2020-05-19,1,114,1,114,1,114,14973
1,8300,SA,Sample Order,20072588,2020-05-19,1,1,1,1,1,1,0


## Review column data meta information

In [3]:
%piper info(df) >> head()

Dataframe with (rows, cols) (8179, 12) consumes 1.64 Mb
12 rows, 6 columns


Unnamed: 0,columns,data_type,unique,isna,isnull,total_count
0,co#,int64,3,0,0,8179
1,ordertype,object,20,0,0,8179
2,ordertypedesc,object,20,0,0,8179
3,order,int64,8018,0,0,8179


## Cleaning column names

In [4]:
columns(df, astype='text')

"['co#', 'ordertype', 'ordertypedesc', 'order', 'orderdate', 'ordercount', 'linecount', 'shippedorders', 'shippedlines', 'invoicedorders', 'invoicedlines', 'sales_(eur)']"

In [5]:
f = lambda x: re.sub('(?=(desc|type|count|orders|lines|date))', '_', x)
# f = lambda x: re.sub('(?<=order)(?=[^s])', '_', x)

y = lambda x: re.sub(r'[()]', '', x)

new_cols = list(map(f, df.columns.tolist()))
new_cols = list(map(y, new_cols))
', '.join(new_cols)

'co#, order_type, order_type_desc, order, order_date, order_count, line_count, shipped_orders, shipped_lines, invoiced_orders, invoiced_lines, sales_eur'

## Create numeric company code, merge(link) with jde_countries

In [6]:
head(df, 2)

8179 rows, 12 columns


Unnamed: 0,co#,ordertype,ordertypedesc,order,orderdate,ordercount,linecount,shippedorders,shippedlines,invoicedorders,invoicedlines,sales_(eur)
0,8300,DC,Deemed Commission Invoice,20000387,2020-05-19,1,114,1,114,1,114,14973
1,8300,SA,Sample Order,20072588,2020-05-19,1,1,1,1,1,1,0


In [7]:
df.rename(columns={'co#': 'code'}, inplace=True)
head(df, 2)

8179 rows, 12 columns


Unnamed: 0,code,ordertype,ordertypedesc,order,orderdate,ordercount,linecount,shippedorders,shippedlines,invoicedorders,invoicedlines,sales_(eur)
0,8300,DC,Deemed Commission Invoice,20000387,2020-05-19,1,114,1,114,1,114,14973
1,8300,SA,Sample Order,20072588,2020-05-19,1,1,1,1,1,1,0


In [8]:
%%piper

df2 <- df 
>> assign(code=lambda x: x.code.astype(int))
# >> where("code == 8300")

In [9]:
companies = count(df2, 'code')
companies

Unnamed: 0,code,n
0,8300,4125
1,8450,3426
2,8480,628


In [10]:
jde_countries = pd.read_excel('inputs/jde_countries.xlsx')
jde_countries.rename(columns={'company': 'code', 'alpha_name': 'country'}, inplace=True)
head(jde_countries)

491 rows, 2 columns


Unnamed: 0,code,country
0,1,Item's restrictions
1,22,EURO VENDOR TO LINK EURO BASIS
2,36,***DO NOT USE*BHC PR MEDIVAC
3,47,BAXTER HCR CORP RENAL MOUNTAIN HOME


In [11]:
%%piper

companies <- companies >> inner_join(jde_countries, on='code')

# Export data in Excel

## Filter & create sheet, dataframes using dict comprehension

In [12]:
dataframes = {r['country']: r['code'] for _, r in companies.iterrows()}
for country, df in dataframes.items():
    logger.info(f'Country: {country}  \tshape (rows, cols): {df2.shape}')

dx = pd.DataFrame([dataframes])
dx

Country: BAXTER SAS (FRANCE)  	shape (rows, cols): (8179, 12)
Country: BAXTER SL SPAIN  	shape (rows, cols): (8179, 12)
Country: BAXTER MEDICO FARMACEUTICA LDA  	shape (rows, cols): (8179, 12)


Unnamed: 0,BAXTER SAS (FRANCE),BAXTER SL SPAIN,BAXTER MEDICO FARMACEUTICA LDA
0,8300,8450,8480


In [13]:
WorkBook('outputs/JDE Orders (May to July 2020)', date_format=False, sheets=dx);

Workbook: outputs/20210226_JDE Orders (May to July 2020).xlsx
<< mult-sheet mode >>
Sheet (range): sheet1 ($A$1:$C$2)
Completed.
