# Setup

In [1]:
from piper.defaults import *
from piper.utils import get_config

piper v0.1.2: Friday, 09 April 2021 18:49:31


## config details

In [2]:
config = get_config('config.json')
config

{'meta': {'project': 'notebook project'},
 'folders': ['inputs', 'src', 'sql', 'docs', 'outputs', 'reports'],
 'variables': {'dummy_variable': 'dummy'},
 'connections': {'location': 'connections.json'},
 'excel': {'meta': 'xl_meta.json', 'formats': 'formats.json'},
 'mail_config': {'server': 'localhost',
  'port': 25,
  'sender': 'someone@acme.com'},
 'project': 'notebook project'}

# DataFrame - Transformations

In [3]:
xl_file = "inputs/Test excel workbook.xlsx"
df_original = pd.read_excel(xl_file)
df = df_original.copy(deep=True)

logger.info(df.shape)
display(df.columns.values)
df.iloc[:3, :]

(12, 10)


array(['Gropuing cde_', 'Order_NBR', 'This column name is too long',
       'Second column ', 'Quantity', 'Price', 'Effective', 'Expired',
       'TranSACTion DATE', 'A delimitted LIsT'], dtype=object)

Unnamed: 0,Gropuing cde_,Order_NBR,This column name is too long,Second column,Quantity,Price,Effective,Expired,TranSACTion DATE,A delimitted LIsT
0,A100,23899001,First row,"Scally, Aidan",14,123,21.10.2015,31.12.2019,20.08.2018,123;456;789
1,A101,23899002,SECOnd Row,"McAllister, Eoin",103,432,21.10.2016,31.12.2020,20.08.2017,ss;11;33
2,A101,23899003,Thrid Row,"Tarpey, Mike",1,"3 4,32",21.10.2017,31.12.2021,20.08.2020,A;b;C


## piper functions (clean)

In [4]:
%%piper

df
>> clean_names()
>> rename(columns={'gropuing_cde': 'grp',
                   'this_column_name_is_too_long': 'column_1',
                  'second_column': 'column_2',
                  'transaction_date': 'trans_dt'})
>> across(['column_1', 'column_2'], lambda x: x.str.replace('[\,\.\s]+', ' ', regex=True),
          series_obj=True)
>> across(['column_1', 'column_2'], lambda x: x.str.title(), series_obj=True)
>> across(['effective', 'expired', 'trans_dt'], pd.to_datetime)
>> str_split('a_delimitted_list', columns=['a', 'b', 'c'], pat=';', n=2, drop=True)
>> str_split('column_2', columns=['lastname', 'firstname'], pat=' ', drop=True)
>> str_join(['firstname', 'lastname'], sep=' ', column='name')
>> assign(quantity2=lambda x: str_clean_number(x.quantity, dtype='float'))
>> relocate('quantity2', 'after', 'quantity')
>> assign(price2=lambda x: str_clean_number(x.price, dtype='float'))
>> str_trim()
>> relocate('price2', 'after', 'price')

Unnamed: 0,grp,order_nbr,column_1,name,quantity,quantity2,price,price2,effective,expired,trans_dt,a,b,c
0,A100,23899001,First Row,Aidan Scally,14,14.0,123,123.0,2015-10-21,2019-12-31,2018-08-20,123,456,789
1,A101,23899002,Second Row,Eoin Mcallister,103,103.0,432,432.0,2016-10-21,2020-12-31,2017-08-20,ss,11,33
2,A101,23899003,Thrid Row,Mike Tarpey,1,1.0,"3 4,32",3432.0,2017-10-21,2021-12-31,2020-08-20,A,b,C
3,A102,23899004,Fourth Row,Alan Denton,13,13.0,49,49.0,2018-10-21,2022-12-31,2021-08-20,cat,dog,books
4,A103,23899005,Fifth Row,Theo Dallis,19,19.0,"45.7,98",45.798,2019-10-21,2023-12-31,2022-08-20,x,y,z
5,A103,23899006,Sixth Row,Derek Hunt,"5---32,14",-53214.0,63423,63423.0,2019-04-21,2020-05-07,2015-08-20,alpha,bravo,charlie
6,A103,23899007,Seventh Row,Tony Goddard,178.3035,178.3035,27.04502,27.04502,2019-10-21,2025-12-31,2024-08-20,24th May,18th July,5th August
7,A104,23899008,Eighth Row,Matthew Whitaker,"4-2-4,00",-42400.0,563.00,563.0,2019-10-21,2025-12-31,2025-08-20,cc,bb,aa
8,A105,23899009,Ninth Row,Carsten Seiffert,"2-4,00",-2400.0,"9,.8.00",98.0,2018-05-21,2025-12-10,2026-08-20,1304,53-02,6932
9,A105,23899010,Tenth Row,Craig Freer,1800.22,1800.22,563,563.0,2019-10-21,2025-12-31,2027-08-20,40a3,64g,023;


## Rename columns

In [5]:
# Mike's utility for manipulating dataframe columns
display(names(df, regex='', astype='dict'))
display(names(df, astype='text'))

{'gropuing_cde': 'gropuing_cde',
 'order_nbr': 'order_nbr',
 'this_column_name_is_too_long': 'this_column_name_is_too_long',
 'second_column': 'second_column',
 'quantity': 'quantity',
 'price': 'price',
 'effective': 'effective',
 'expired': 'expired',
 'transaction_date': 'transaction_date',
 'a_delimitted_list': 'a_delimitted_list'}

"['gropuing_cde', 'order_nbr', 'this_column_name_is_too_long', 'second_column', 'quantity', 'price', 'effective', 'expired', 'transaction_date', 'a_delimitted_list']"

In [6]:
columns_renamed = ['grouping', 'orders', 'description', 'second_col',
                   'qty', 'price', 'effective', 'expired', 'date', 'delimitted_list']

df.columns = columns_renamed
df.iloc[:3, :]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,date,delimitted_list
0,A100,23899001,First row,"Scally, Aidan",14,123,21.10.2015,31.12.2019,20.08.2018,123;456;789
1,A101,23899002,SECOnd Row,"McAllister, Eoin",103,432,21.10.2016,31.12.2020,20.08.2017,ss;11;33
2,A101,23899003,Thrid Row,"Tarpey, Mike",1,"3 4,32",21.10.2017,31.12.2021,20.08.2020,A;b;C


## Chaining functions - clean strings example

In [7]:
df['description'] = (df['description'].str.lower()
                                      .str.strip()
                                      .str.title()
                                      .str.replace('ri','ir', regex=True)
                                      .str.replace(r'(\w)\s+(\w)', r'\1 \2', regex=True))
df.iloc[:4, :]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,date,delimitted_list
0,A100,23899001,First Row,"Scally, Aidan",14,123,21.10.2015,31.12.2019,20.08.2018,123;456;789
1,A101,23899002,Second Row,"McAllister, Eoin",103,432,21.10.2016,31.12.2020,20.08.2017,ss;11;33
2,A101,23899003,Third Row,"Tarpey, Mike",1,"3 4,32",21.10.2017,31.12.2021,20.08.2020,A;b;C
3,A102,23899004,Fourth Row,"Denton, Alan",13,49,21.10.2018,31.12.2022,20.08.2021,cat;dog;books


In [8]:
df.second_col = (df.second_col
                   .str.strip()
                   .str.title()
                   .str.replace(r'(\w+),\s+(\w+)', r'\2 \1', regex=True)
                   )
df.iloc[:4, :]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,date,delimitted_list
0,A100,23899001,First Row,Aidan Scally,14,123,21.10.2015,31.12.2019,20.08.2018,123;456;789
1,A101,23899002,Second Row,Eoin Mcallister,103,432,21.10.2016,31.12.2020,20.08.2017,ss;11;33
2,A101,23899003,Third Row,Mike Tarpey,1,"3 4,32",21.10.2017,31.12.2021,20.08.2020,A;b;C
3,A102,23899004,Fourth Row,Alan Denton,13,49,21.10.2018,31.12.2022,20.08.2021,cat;dog;books


## Clean-up

### Numbers

In [9]:
df.loc[df.qty.str.match(r'[\w]') == True, 'qty'] = (df.qty.str.strip()
                                                          .str.replace(r'[\s\-]', '', regex=True)
                                                          .str.replace(',(\d{2})$',r'.\1', regex=True)
                                                          .str.replace(r',', '', regex=True))
df.loc[df.price.str.match(r'[\w]') == True, 'price'] = (df.price
                                                          .str.replace(r'\s+', r'', regex=True)
                                                          .str.replace(r'\.{2}',r'', regex=True))

In [10]:
df.loc[df.price.str.match(r'[\W]') == True, 'price'] = (df.price
                                                          .str.replace(r',', r'.', regex=True))
 
df.loc[df.price.str.match(r'[\w]') == True, 'price'] = (df.price
                                                          .str.replace(r',', r'.', regex=True)
                                                          .str.replace(r'\s+', r'', regex=True)
                                                          .str.replace(r'\.{2}',r'', regex=True))

df.loc[df.price.str.match(r'[\w]') == True, 'price'] = (df.price     
                                                          .str.replace(r'\.{2}',r'', regex=True)
                                                          .str.replace(r'(\.\d)\.',r'\1', regex=True))
# df.price

In [11]:
df.price = df.price.astype(float).round(2)
df.qty = pd.to_numeric(df.qty).astype(int)
df.iloc[:4, :]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,date,delimitted_list
0,A100,23899001,First Row,Aidan Scally,14,1.23,21.10.2015,31.12.2019,20.08.2018,123;456;789
1,A101,23899002,Second Row,Eoin Mcallister,103,4.32,21.10.2016,31.12.2020,20.08.2017,ss;11;33
2,A101,23899003,Third Row,Mike Tarpey,1,34.32,21.10.2017,31.12.2021,20.08.2020,A;b;C
3,A102,23899004,Fourth Row,Alan Denton,13,49.0,21.10.2018,31.12.2022,20.08.2021,cat;dog;books


### Dates

In [12]:
df.effective = pd.to_datetime(df.effective)
df.expired = pd.to_datetime(df.expired)
df['duration'] = (df.expired - df.effective).dt.days
df.drop(columns=['date'], inplace=True)
df.sort_values(by='expired', inplace=True)
df.insert(8, 'duration', df.pop('duration'))
df.iloc[:4, 1:]

Unnamed: 0,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list
0,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532,123;456;789
5,23899006,Sixth Row,Derek Hunt,532,634.23,2019-04-21,2020-05-07,382,alpha; bravo; charlie
1,23899002,Second Row,Eoin Mcallister,103,4.32,2016-10-21,2020-12-31,1532,ss;11;33
2,23899003,Third Row,Mike Tarpey,1,34.32,2017-10-21,2021-12-31,1532,A;b;C


### Delimitted data expanded to separate columns

In [13]:
expanded_list = df.delimitted_list.str.split(';', n=2, expand=True)
expanded_list.columns = ['list_1', 'list_2', 'list_3']
df = pd.concat([df, expanded_list], axis=1)
df.iloc[:, :]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3
0,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532,123;456;789,123,456,789
5,A103,23899006,Sixth Row,Derek Hunt,532,634.23,2019-04-21,2020-05-07,382,alpha; bravo; charlie,alpha,bravo,charlie
1,A101,23899002,Second Row,Eoin Mcallister,103,4.32,2016-10-21,2020-12-31,1532,ss;11;33,ss,11,33
2,A101,23899003,Third Row,Mike Tarpey,1,34.32,2017-10-21,2021-12-31,1532,A;b;C,A,b,C
3,A102,23899004,Fourth Row,Alan Denton,13,49.0,2018-10-21,2022-12-31,1532,cat;dog;books,cat,dog,books
10,A102,23899011,Fourth Row,Alan Denton,13,49.0,2018-10-21,2022-12-31,1532,d20;tg54;kde40,d20,tg54,kde40
4,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532,x;y;z,x,y,z
11,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532,5555;4444;3333,5555,4444,3333
8,A105,23899009,Ninth Row,Carsten Seiffert,24,98.0,2018-05-21,2025-12-10,2760,1304;53-02;6932,1304,53-02,6932
6,A103,23899007,Seventh Row,Tony Goddard,178,27.05,2019-10-21,2025-12-31,2263,24th May; 18th July; 5th August,24th May,18th July,5th August


# DataFrame - exploratory data analysis

## head()

In [14]:
head(df, 2)

12 rows, 13 columns


Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3
0,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532,123;456;789,123,456,789
5,A103,23899006,Sixth Row,Derek Hunt,532,634.23,2019-04-21,2020-05-07,382,alpha; bravo; charlie,alpha,bravo,charlie


## tail()

In [15]:
tail(df, 2)

12 rows, 13 columns


Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3
7,A104,23899008,Eighth Row,Matthew Whitaker,424,56.3,2019-10-21,2025-12-31,2263,cc;bb;aa,cc,bb,aa
9,A105,23899010,Tenth Row,Craig Freer,1800,563.0,2019-10-21,2025-12-31,2263,40a3;64g;023;,40a3,64g,023;


## info()

In [16]:
df.loc[df.duration == 2263, 'duration'] = np.NaN

In [17]:
subset = [
    'grouping', 'orders', 'description', 'second_col', 'qty', 'price',
    'effective', 'expired', 'duration', 'delimitted_list', 'list_1', 'list_2',
    'list_3'
]
info(df[subset], n_dupes=True, fillna=False)

Dataframe consumes 0.01 Mb


Unnamed: 0,columns,type,inferred,n,isna,isnull,unique,n dupes
0,grouping,object,string,12,0,0,6,10
1,orders,int64,integer,12,0,0,12,0
2,description,object,string,12,0,0,10,4
3,second_col,object,string,12,0,0,10,4
4,qty,int64,integer,12,0,0,10,4
5,price,float64,floating,12,0,0,10,4
6,effective,datetime64[ns],datetime64,12,0,0,7,7
7,expired,datetime64[ns],datetime64,12,0,0,8,7
8,duration,float64,floating,12,3,3,3,10
9,delimitted_list,object,string,12,0,0,12,0


In [18]:
df.duration.value_counts(dropna=False)

1532.0    7
NaN       3
2760.0    1
382.0     1
Name: duration, dtype: int64

In [19]:
count(df, 'duration', totals=True)

5 rows, 3 columns


Unnamed: 0,n,%,cum %
1532.0,7.0,58.33,58.33
,3.0,25.0,83.33
382.0,1.0,8.33,91.67
2760.0,1.0,8.33,100.0
Total,12.0,99.99,


## names()

In [20]:
names(df)

['grouping',
 'orders',
 'description',
 'second_col',
 'qty',
 'price',
 'effective',
 'expired',
 'duration',
 'delimitted_list',
 'list_1',
 'list_2',
 'list_3']

In [21]:
names(df, 'e', astype='dict')

{'effective': 'effective', 'expired': 'expired'}

In [22]:
head(df[names(df, 'list|e|qty|.+ers$')], 2)

12 rows, 7 columns


Unnamed: 0,orders,qty,effective,expired,list_1,list_2,list_3
0,23899001,14,2015-10-21,2019-12-31,123,456,789
5,23899006,532,2019-04-21,2020-05-07,alpha,bravo,charlie


In [23]:
names(df, '\w+e$')

['price', 'effective']

## paste() - clipboard list of columns to a list

In [24]:
# Copy header row from Excel sheet then uncomment and run code below
# paste(source='horizontal_list')

## count() - Count column frequency

In [25]:
df.effective.value_counts()

2019-10-21    5
2018-10-21    2
2017-10-21    1
2015-10-21    1
2018-05-21    1
2016-10-21    1
2019-04-21    1
Name: effective, dtype: int64

In [26]:
count_df = count(df, 'effective')
count_df

7 rows, 3 columns


Unnamed: 0_level_0,n,%,cum %
effective,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-10-21,5,41.67,41.67
2018-10-21,2,16.67,58.33
2015-10-21,1,8.33,66.67
2016-10-21,1,8.33,75.0
2017-10-21,1,8.33,83.33
2018-05-21,1,8.33,91.67
2019-04-21,1,8.33,100.0


## duplicated() - Check for duplicate data

In [27]:
subset = ['description', 'second_col']

df = (duplicated(df, subset=subset, loc='last').reset_index(drop=True))
head(df)

12 rows, 14 columns


Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,duplicate
0,A104,23899008,Eighth Row,Matthew Whitaker,424,56.3,2019-10-21,2025-12-31,,cc;bb;aa,cc,bb,aa,False
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z,True
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333,True
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,False


In [28]:
subset = ['description']

(head(duplicated(df, subset=subset, loc='first', 
                 ref_column='duplicate description')
      .reset_index(drop=True)
      .drop(columns=['duplicate']), 4))

12 rows, 13 columns


Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3
0,A104,23899008,Eighth Row,Matthew Whitaker,424,56.3,2019-10-21,2025-12-31,,cc;bb;aa,cc,bb,aa
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789


In [29]:
subset=['description']

(head(duplicated(df, subset=subset, column='duplicate description')
      .reset_index(drop=True)
      .drop(columns=['duplicate']), 4))

12 rows, 14 columns


Unnamed: 0,duplicate description,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3
0,False,A104,23899008,Eighth Row,Matthew Whitaker,424,56.3,2019-10-21,2025-12-31,,cc;bb;aa,cc,bb,aa
1,True,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z
2,True,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333
3,False,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789


## Explode data

### combining columns into a single 'list' column

In [30]:
combined_listcol = pd.Series(df[['list_1', 'list_2', 'list_3']].values.tolist())
combined_listcol

0                            [cc, bb, aa]
1                               [x, y, z]
2                      [5555, 4444, 3333]
3                         [123, 456, 789]
4                       [cat, dog, books]
5                      [d20, tg54, kde40]
6                     [1304, 53-02, 6932]
7                            [ss, 11, 33]
8     [24th May,  18th July,  5th August]
9               [alpha,  bravo,  charlie]
10                      [40a3, 64g, 023;]
11                              [A, b, C]
dtype: object

In [31]:
df.insert(13, 'combined_listcol', combined_listcol)
head(df)

12 rows, 15 columns


Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate
0,A104,23899008,Eighth Row,Matthew Whitaker,424,56.3,2019-10-21,2025-12-31,,cc;bb;aa,cc,bb,aa,"[cc, bb, aa]",False
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z,"[x, y, z]",True
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333,"[5555, 4444, 3333]",True
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False


### explode (extract) values from a single 'list' column

In [32]:
head(df[['second_col', 'combined_listcol']].explode('combined_listcol'), 10)

36 rows, 2 columns


Unnamed: 0,second_col,combined_listcol
0,Matthew Whitaker,cc
0,Matthew Whitaker,bb
0,Matthew Whitaker,aa
1,Theo Dallis,x
1,Theo Dallis,y
1,Theo Dallis,z
2,Theo Dallis,5555
2,Theo Dallis,4444
2,Theo Dallis,3333
3,Aidan Scally,123


## add_formula - Add Excel formula(s)

In [33]:
formula = '=CONCATENATE(A{row}, B{row}, C{row})'
add_xl_formula(df, column_name='X8', formula=formula)
head(df)

12 rows, 16 columns


Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate,X8
0,A104,23899008,Eighth Row,Matthew Whitaker,424,56.3,2019-10-21,2025-12-31,,cc;bb;aa,cc,bb,aa,"[cc, bb, aa]",False,"=CONCATENATE(A2, B2, C2)"
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z,"[x, y, z]",True,"=CONCATENATE(A3, B3, C3)"
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333,"[5555, 4444, 3333]",True,"=CONCATENATE(A4, B4, C4)"
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False,"=CONCATENATE(A5, B5, C5)"


## pivot()

In [34]:
# dx = get_sample_data()

# pv = pivot(dx,
#            index=['countries', 'regions'],
#            sort_values=None,
#            percent=True,
#            cum_percent=True,
#            aggfunc=np.sum,
#            values='values_1')

# pv = pv.sort_values(['countries', 'regions', 'Totals'], ascending=[True, None, True])
# head(pv)

In [35]:
# index=['grouping', 'expired']
# values=['qty']

# p1 = (pivot(df, index=index, values=values,
#                   sort_values=None,
#                   percent=True, cum_percent=True,
#                   aggfunc=np.sum, round=3))

# # Add totals for all columns
# total_text = 'Grand Total'

# for col in p1.columns:
#     if col == 'cum %':
#         p1.loc[('', total_text), col] = np.nan
#     else:
#         p1.loc[('', total_text), col] = p1[col].sum()

# p1['Totals'] = p1['Totals'].astype('int32')
# p1.fillna('', inplace=True)

# p1 = p1.reset_index()

# # f = lambda x: print(type(x)) if pd.datetime else 'NOT' 
# # # p1.expired[:-1].dt.strftime('%Y-%m-%d')
# # p1.expired.apply(f)
# p1

## relocate()

In [36]:
# relocate(p1, ['expired', 'grouping'], loc='first')

## pivot_table()

In [37]:
rule = 'D'
index = ['expired', 'grouping']

x = (pivot_table(df, index=index, freq=rule, format_date=True, values='qty')
     .sort_values(by=['qty'], ascending=False))

# Add a total
x.loc[('', 'Grand Total'), 'qty'] = x.qty.sum()
x.qty = x.qty.astype('int32')
x

Unnamed: 0_level_0,Unnamed: 1_level_0,qty
expired,grouping,Unnamed: 2_level_1
2025-12-31,A105,1800
2020-05-07,A103,532
2025-12-31,A104,424
2025-12-31,A103,178
2020-12-31,A101,103
2025-12-10,A105,24
2023-12-31,A103,19
2019-12-31,A100,14
2022-12-31,A102,13
2021-12-31,A101,1


### multi-index selection

In [38]:
x.loc[x.index.isin(['2025-12-31', '2020-05-07'], level=0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,qty
expired,grouping,Unnamed: 2_level_1
2025-12-31,A105,1800
2020-05-07,A103,532
2025-12-31,A104,424
2025-12-31,A103,178


# Categorical data

In [39]:
names(df)

['grouping',
 'orders',
 'description',
 'second_col',
 'qty',
 'price',
 'effective',
 'expired',
 'duration',
 'delimitted_list',
 'list_1',
 'list_2',
 'list_3',
 'combined_listcol',
 'duplicate',
 'X8']

In [40]:
count(df, 'description')

10 rows, 3 columns


Unnamed: 0_level_0,n,%,cum %
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fifth Row,2,16.67,16.67
Fourth Row,2,16.67,33.33
Eighth Row,1,8.33,41.67
First Row,1,8.33,50.0
Ninth Row,1,8.33,58.33
Second Row,1,8.33,66.67
Seventh Row,1,8.33,75.0
Sixth Row,1,8.33,83.33
Tenth Row,1,8.33,91.67
Third Row,1,8.33,100.0


In [41]:
df.description.value_counts().index.tolist()

['Fourth Row',
 'Fifth Row',
 'Sixth Row',
 'Eighth Row',
 'Ninth Row',
 'Tenth Row',
 'Third Row',
 'Second Row',
 'Seventh Row',
 'First Row']

In [42]:
# Required 'sequence' for categories
categories = [
    'First Row', 'Second Row', 'Third Row', 'Fourth Row', 'Fifth Row',
    'Sixth Row', 'Seventh Row', 'Eighth Row', 'Ninth Row', 'Tenth Row'
]

df.description = df.description.astype(str)

In [43]:
count(df, 'description')

10 rows, 3 columns


Unnamed: 0_level_0,n,%,cum %
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fifth Row,2,16.67,16.67
Fourth Row,2,16.67,33.33
Eighth Row,1,8.33,41.67
First Row,1,8.33,50.0
Ninth Row,1,8.33,58.33
Second Row,1,8.33,66.67
Seventh Row,1,8.33,75.0
Sixth Row,1,8.33,83.33
Tenth Row,1,8.33,91.67
Third Row,1,8.33,100.0


In [44]:
df.description = pd.Categorical(values=df.description,
                                categories=categories,
                                ordered=True)

df_categorical = df.copy(deep=True)
df_categorical.pivot_table(index=['grouping'],
                           columns='description',
                           values='orders',
                           fill_value=0,
                           aggfunc=np.size)

description,First Row,Second Row,Third Row,Fourth Row,Fifth Row,Sixth Row,Seventh Row,Eighth Row,Ninth Row,Tenth Row
grouping,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A100,1,0,0,0,0,0,0,0,0,0
A101,0,1,1,0,0,0,0,0,0,0
A102,0,0,0,2,0,0,0,0,0,0
A103,0,0,0,0,2,1,1,0,0,0
A104,0,0,0,0,0,0,0,1,0,0
A105,0,0,0,0,0,0,0,0,1,1


# DataFrame - Select/filter and extract

## Simple filter

In [45]:
df[df.second_col == 'Theo Dallis'].iloc[:, :10]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333


## Multiple filter - AND

In [46]:
filter_1 = df.grouping == 'A103'
filter_2 = df.second_col == 'Theo Dallis'

df[filter_1 & filter_2].iloc[:, :10]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333


## Multiple filter - OR

In [47]:
filter_1 = df.grouping == 'A103'
filter_2 = df.second_col == 'Theo Dallis'

df[filter_1 | filter_2].iloc[:, :10]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333
8,A103,23899007,Seventh Row,Tony Goddard,178,27.05,2019-10-21,2025-12-31,,24th May; 18th July; 5th August
9,A103,23899006,Sixth Row,Derek Hunt,532,634.23,2019-04-21,2020-05-07,382.0,alpha; bravo; charlie


## str.extractall()

In [48]:
df.second_col.str.extractall('(Theo|Aidan) (\w+)', flags=re.I)

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0,Theo,Dallis
2,0,Theo,Dallis
3,0,Aidan,Scally


## str.findall()

In [49]:
df.description.str.findall('fourth', flags=re.I)

0           []
1           []
2           []
3           []
4     [Fourth]
5     [Fourth]
6           []
7           []
8           []
9           []
10          []
11          []
Name: description, dtype: object

## str.match()

In [50]:
df[df.delimitted_list.str.match('55')].iloc[:, :10]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333


In [51]:
df[df.description.str.match('(first|fifth)', flags=re.I)]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate,X8
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z,"[x, y, z]",True,"=CONCATENATE(A3, B3, C3)"
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333,"[5555, 4444, 3333]",True,"=CONCATENATE(A4, B4, C4)"
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False,"=CONCATENATE(A5, B5, C5)"


## str.contains()

In [52]:
df[df.second_col.str.contains('Tony|Derek|Theo')].iloc[:, :10]

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list
1,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z
2,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333
8,A103,23899007,Seventh Row,Tony Goddard,178,27.05,2019-10-21,2025-12-31,,24th May; 18th July; 5th August
9,A103,23899006,Sixth Row,Derek Hunt,532,634.23,2019-04-21,2020-05-07,382.0,alpha; bravo; charlie


## df.query()

### single selection

In [53]:
query = """ grouping == 'A100' """
df.query(query)

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate,X8
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False,"=CONCATENATE(A5, B5, C5)"


### multiple OR

In [54]:
query = """(grouping == 'A101') or (second_col.str.contains('Mike Tarpey')) or (orders < 23899004) """
df.query(query)

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate,X8
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False,"=CONCATENATE(A5, B5, C5)"
7,A101,23899002,Second Row,Eoin Mcallister,103,4.32,2016-10-21,2020-12-31,1532.0,ss;11;33,ss,11,33,"[ss, 11, 33]",False,"=CONCATENATE(A9, B9, C9)"
11,A101,23899003,Third Row,Mike Tarpey,1,34.32,2017-10-21,2021-12-31,1532.0,A;b;C,A,b,C,"[A, b, C]",False,"=CONCATENATE(A13, B13, C13)"


In [55]:
query = """(grouping == 'A101') | (second_col.str.contains('Mike Tarpey')) | (orders < 23899004) """
df.query(query)

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate,X8
3,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False,"=CONCATENATE(A5, B5, C5)"
7,A101,23899002,Second Row,Eoin Mcallister,103,4.32,2016-10-21,2020-12-31,1532.0,ss;11;33,ss,11,33,"[ss, 11, 33]",False,"=CONCATENATE(A9, B9, C9)"
11,A101,23899003,Third Row,Mike Tarpey,1,34.32,2017-10-21,2021-12-31,1532.0,A;b;C,A,b,C,"[A, b, C]",False,"=CONCATENATE(A13, B13, C13)"


### multiple AND / OR with substitutional variables

In [56]:
list_of_groups = ['A100', 'A101', 'A103']
duration_criteria = 380

query = """(grouping in @list_of_groups) and\
           (~second_col.str.startswith('Theo')) or\
           (duration >= @duration_criteria) 
        """
df.query(query).reset_index(drop=True)

Unnamed: 0,grouping,orders,description,second_col,qty,price,effective,expired,duration,delimitted_list,list_1,list_2,list_3,combined_listcol,duplicate,X8
0,A103,23899005,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,x;y;z,x,y,z,"[x, y, z]",True,"=CONCATENATE(A3, B3, C3)"
1,A103,23899012,Fifth Row,Theo Dallis,19,45.8,2019-10-21,2023-12-31,1532.0,5555;4444;3333,5555,4444,3333,"[5555, 4444, 3333]",True,"=CONCATENATE(A4, B4, C4)"
2,A100,23899001,First Row,Aidan Scally,14,1.23,2015-10-21,2019-12-31,1532.0,123;456;789,123,456,789,"[123, 456, 789]",False,"=CONCATENATE(A5, B5, C5)"
3,A102,23899004,Fourth Row,Alan Denton,13,49.0,2018-10-21,2022-12-31,1532.0,cat;dog;books,cat,dog,books,"[cat, dog, books]",True,"=CONCATENATE(A6, B6, C6)"
4,A102,23899011,Fourth Row,Alan Denton,13,49.0,2018-10-21,2022-12-31,1532.0,d20;tg54;kde40,d20,tg54,kde40,"[d20, tg54, kde40]",True,"=CONCATENATE(A7, B7, C7)"
5,A105,23899009,Ninth Row,Carsten Seiffert,24,98.0,2018-05-21,2025-12-10,2760.0,1304;53-02;6932,1304,53-02,6932,"[1304, 53-02, 6932]",False,"=CONCATENATE(A8, B8, C8)"
6,A101,23899002,Second Row,Eoin Mcallister,103,4.32,2016-10-21,2020-12-31,1532.0,ss;11;33,ss,11,33,"[ss, 11, 33]",False,"=CONCATENATE(A9, B9, C9)"
7,A103,23899007,Seventh Row,Tony Goddard,178,27.05,2019-10-21,2025-12-31,,24th May; 18th July; 5th August,24th May,18th July,5th August,"[24th May, 18th July, 5th August]",False,"=CONCATENATE(A10, B10, C10)"
8,A103,23899006,Sixth Row,Derek Hunt,532,634.23,2019-04-21,2020-05-07,382.0,alpha; bravo; charlie,alpha,bravo,charlie,"[alpha, bravo, charlie]",False,"=CONCATENATE(A11, B11, C11)"
9,A101,23899003,Third Row,Mike Tarpey,1,34.32,2017-10-21,2021-12-31,1532.0,A;b;C,A,b,C,"[A, b, C]",False,"=CONCATENATE(A13, B13, C13)"


## df.loc[]

In [57]:
subset_cols = ['second_col', 'effective', 'expired']
df.loc[df['second_col'] == 'Alan Denton', subset_cols]

Unnamed: 0,second_col,effective,expired
4,Alan Denton,2018-10-21,2022-12-31
5,Alan Denton,2018-10-21,2022-12-31


In [58]:
df.loc[df['second_col'] == 'Alan Denton', 'orders':'duration']

Unnamed: 0,orders,description,second_col,qty,price,effective,expired,duration
4,23899004,Fourth Row,Alan Denton,13,49.0,2018-10-21,2022-12-31,1532.0
5,23899011,Fourth Row,Alan Denton,13,49.0,2018-10-21,2022-12-31,1532.0


# DataFrame - Testing 

## selecting columns

In [59]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
orders,12.0,23899010.0,3.605551,23899001.0,23899000.0,23899006.5,23899010.0,23899012.0
qty,12.0,261.6667,515.689535,1.0,13.75,21.5,239.5,1800.0
price,12.0,134.0042,218.971149,1.23,32.5025,47.4,66.725,634.23
duration,9.0,1540.667,594.748686,382.0,1532.0,1532.0,1532.0,2760.0


### select numeric (exclude object)

In [60]:
df.select_dtypes(exclude='object')

Unnamed: 0,orders,description,qty,price,effective,expired,duration,duplicate
0,23899008,Eighth Row,424,56.3,2019-10-21,2025-12-31,,False
1,23899005,Fifth Row,19,45.8,2019-10-21,2023-12-31,1532.0,True
2,23899012,Fifth Row,19,45.8,2019-10-21,2023-12-31,1532.0,True
3,23899001,First Row,14,1.23,2015-10-21,2019-12-31,1532.0,False
4,23899004,Fourth Row,13,49.0,2018-10-21,2022-12-31,1532.0,True
5,23899011,Fourth Row,13,49.0,2018-10-21,2022-12-31,1532.0,True
6,23899009,Ninth Row,24,98.0,2018-05-21,2025-12-10,2760.0,False
7,23899002,Second Row,103,4.32,2016-10-21,2020-12-31,1532.0,False
8,23899007,Seventh Row,178,27.05,2019-10-21,2025-12-31,,False
9,23899006,Sixth Row,532,634.23,2019-04-21,2020-05-07,382.0,False


In [61]:
head(df[df.columns[df.columns.str.startswith('o')]])

12 rows, 1 columns


Unnamed: 0,orders
0,23899008
1,23899005
2,23899012
3,23899001


In [62]:
head(df[df.columns[df.columns.str.contains('[a]')]])

12 rows, 2 columns


Unnamed: 0,duration,duplicate
0,,False
1,1532.0,True
2,1532.0,True
3,1532.0,False


## Testing display_df

In [63]:
# qq = df.pivot_table(index=['grouping', 'orders', 'second_col']
#                 , values='qty'
#                 , margins=True
#                 , aggfunc=np.sum)

In [64]:
# %run ../lib/widget_utils.py
# %run ../lib/widget_utils_pd.py
# cols = select_cols(df.iloc[:3])

In [65]:
# print(cols.to_cols)

In [66]:
# %run ../lib/widget_utils.py
# %run ../lib/widget_utils_pd.py
# # c = "grouping.isin(['A105'])"
# # c = "second_col != 'Alan Denton' & orders != 23899004"
# c = "qty > 10 & qty < 20"
# # c = "expired > '2019-12-31' and grouping == 'A101'"
# # c = "index"
# mask = df.query(c)

# display_df(mask[cols.to_cols])

# Export (Excel)

In [67]:
project = 'demo'

## Standard Pandas

In [68]:
file_name = f'outputs/{project} example #1 Standard pandas.xlsx'
df.to_excel(file_name, index=False, freeze_panes=(1, 0), float_format="%0.2f")
print(file_name)

outputs/demo example #1 Standard pandas.xlsx


## Standard Pandas - multiple sheets

In [69]:
file_name = 'outputs/example #2 Standard pandas multi-sheets.xlsx'

# Create a Pandas Excel writer using XlsxWriter as the engine.
with pd.ExcelWriter(file_name, engine='xlsxwriter', date_format='YYYY-MM-DD',
                    datetime_format='YYYY-MM-DD') as writer:
    df_original.to_excel(writer, sheet_name='Sheet1', index=False, freeze_panes=(1, 0))
    df.to_excel(writer, sheet_name='Sheet2', index=False, freeze_panes=(1, 0))

## WorkBook class - single/multiple sheets

### Single sheet

In [70]:
file_name = 'outputs/example #3 SaveXL single sheet'
WorkBook(file_name, sheets=df);

Workbook: outputs/20210409_example #3 SaveXL single sheet.xlsx


<< mult-sheet mode >>


Sheet (range): sheet1 ($A$1:$P$13)


Completed.


### Multi-sheet

In [71]:
file_name = 'outputs/example #4 SaveXL multi sheet'
WorkBook(file_name, [df, df_original]);

Workbook: outputs/20210409_example #4 SaveXL multi sheet.xlsx


<< mult-sheet mode >>


Sheet (range): sheet1 ($A$1:$P$13)


Sheet (range): sheet2 ($A$1:$J$13)


Completed.


### Multi-sheet with sheet names

In [72]:
file_name = 'outputs/example #5 SaveXL multi sheet with sheet names.xlsx'
WorkBook(file_name, {'revised': df, 'original': df_original});

Workbook: outputs/20210409_example #5 SaveXL multi sheet with sheet names.xlsx


<< mult-sheet mode >>


Sheet (range): revised ($A$1:$P$13)


Sheet (range): original ($A$1:$J$13)


Completed.


## WorkBook class - advanced mode

### Basic usage

In [73]:
wb = WorkBook('outputs/Basic workbook object', ts_prefix=False)
wb.add_sheet(df)
wb.close()

Workbook: outputs/Basic workbook object.xlsx


<< sheet mode >>


Sheet (range): sheet1 ($A$1:$P$13)


Completed.


In [74]:
with WorkBook('outputs/Basic workbook object', ts_prefix=False) as wb:
    wb.add_sheet(df)

Workbook: outputs/Basic workbook object.xlsx


<< sheet mode >>


Sheet (range): sheet1 ($A$1:$P$13)


### Adding sheets

In [75]:
xl_file = f'example #6 WorkBook - Greater control.xlsx'
wb = WorkBook(join('outputs', xl_file), ts_prefix=None)
wb.add_sheet(df, sheet_name='revised data', tab_color='red', zoom=120)
wb.add_sheet(df_original, sheet_name='original', tab_color='red')
wb.close()

Workbook: outputs/example #6 WorkBook - Greater control.xlsx


<< sheet mode >>


Sheet (range): revised data ($A$1:$P$13)


Sheet (range): original ($A$1:$J$13)


Completed.


### demo accent colours

In [76]:
def demo_accents(ws, row=16):
    ''' Experimenting with additional set colours '''

    shades_types = (20, 40, 60)
    accent_types = range(1, 7)

    accents = {f'accent{x}': [f'accent{x}_{y}' for y in shades_types] for x in accent_types}
    logger.debug(f'{accents}')

    for accent, accent_variants in accents.items(): 
        ws.write(row, 0, accent, styles.get(accent))

        for idx, variant in enumerate(accent_variants):
            ws.write(row, idx+1, variant, styles.get(variant))

        row+=1

### Complex conditional format

In [77]:
xl_file = f'example #7 WorkBook - Multi sheet with conditional formatting.xlsx'
wb = WorkBook(join('outputs', xl_file), ts_prefix=None)
styles = wb.get_styles()

sheet_name='original'
ws = wb.add_sheet(df_original, sheet_name=sheet_name, tab_color='yellow', zoom=175)

wb.add_format(ws, column_attr={'column': 'A', 'width': 10, 'format': 'center_wrap'})
wb.add_format(ws, column_attr={'column': 'B', 'width': 11})
wb.add_format(ws, column_attr={'column': 'C', 'format': 'center', 'width': 25})

# Pass a list of dictionary formats: 
# Below, example to quickly set widths for a range of columns 
cols = ['E', 'F', 'G', 'H', 'I']
formats = [{'column': f'{c}', 'width': 10} for c in cols]

wb.add_format(ws, column_attr=formats)

sheet_name = 'revised data'
ws = wb.add_sheet(df, sheet_name, tab_color='red', zoom=175)

selected_date = datetime.strptime('2018-01-01', "%Y-%m-%d")

c= [ {'type': 'formula', 'criteria': '=$I2=2263', 'format': 'accent4'},
     {'type': 'cell', 'criteria': 'equal to', 'value': '"A103"', 'format': 'accent5', 'range': 'A'},
     {'type': 'cell', 'criteria': 'equal to', 'value': 23899003, 'format': 'accent6', 'range': 'B'} ,
     {'type': 'duplicate', 'format': 'accent1', 'range': 'C:D'},
     {'type': 'text', 'criteria': 'containing', 'value': 'Eoin', 'format': 'accent2', 'range': 'D'},
     {'type': 'data_bar', 'data_bar_2010': True, 'criteria': '=$F2>0', 'range': 'F'},
     {'type': 'date', 'criteria': 'less than', 'value': selected_date, 'format': 'accent3', 'range': 'G'},
     {'type': 'formula', 'criteria': '=$J2="cat;dog;books"', 'format': 'accent5', 'range': 'J'}]
wb.add_condition(ws, condition=c)

wb.add_condition(ws, condition={'type': '3_color_scale', 'range': 'H'})

wb.add_format(ws, column_attr={'column': 'F', 'format': 'price4'})

demo_accents(ws, row=16)
        
wb.close()

Workbook: outputs/example #7 WorkBook - Multi sheet with conditional formatting.xlsx


<< sheet mode >>


Sheet (range): original ($A$1:$J$13)


Sheet (range): revised data ($A$1:$P$13)


Completed.


### Workbook - meta data

In [78]:
# wb.sheet_dict

In [79]:
wb.show_styles()

dict_keys(['error', 'good', 'yellow', 'neutral', 'input', 'explanatory', 'note', 'money', 'price2', 'price4', 'center', 'center_valign', 'center_wrap_valign', 'center_wrap', 'left_wrap', 'sql', 'accent1', 'accent2', 'accent3', 'accent4', 'accent5', 'accent6', 'accent1_20', 'accent1_40', 'accent1_60', 'accent2_20', 'accent2_40', 'accent2_60', 'accent3_20', 'accent3_40', 'accent3_60', 'accent4_20', 'accent4_40', 'accent4_60', 'accent5_20', 'accent5_40', 'accent5_60', 'accent6_20', 'accent6_40', 'accent6_60', 'eudc_hdr1', 'eudc_hdr2', 'sheet_default'])

In [80]:
wb.show_themes()

'Light 1, Light 2, Light 3, Light 4, Light 5, Light 6, Light 7, Light 8, Light 9, Light 10, Light 11, Light 12, Light 13, Light 14, Light 15, Light 16, Light 17, Light 18, Light 19, Light 20, Light 21, Light 22, Light 23, Light 24, Light 25, Light 26, Light 27, Light 28, Medium 1, Medium 2, Medium 3, Medium 4, Medium 5, Medium 6, Medium 7, Medium 8, Medium 9, Medium 10, Medium 11, Medium 12, Medium 13, Medium 14, Medium 15, Medium 16, Medium 17, Medium 18, Medium 19, Medium 20, Medium 21, Medium 22, Medium 23, Medium 24, Medium 25, Medium 26, Medium 27, Medium 28, Dark 1, Dark 2, Dark 3, Dark 4, Dark 5, Dark 6, Dark 7, Dark 8, Dark 9, Dark 10, Dark 11, Dark 12'

In [81]:
# ?WorkBook

In [82]:
# ?WorkBook.add_condition

# Zip / compress data

In [83]:
zip_data(filter='example*.xlsx', ts_prefix='date', test_mode=True, mode='w', info=False)
zip_data(filter='*.csv', ts_prefix='date', test_mode=True, mode='a', info=False)

Source: outputs, filter: example*.xlsx


<TEST> Target: outputs/20210409_zip_data.zip - 3 files not created.


Source: outputs, filter: *.csv


<TEST> Target: outputs/20210409_zip_data.zip - 1 files not created.


<zipfile.ZipFile [closed]>