In [1]:
from piper import piper
from piper.defaults import *
from piper.verbs import *
from piper.factory import get_sample_data

piper version 0.0.9, last run: Tuesday, 09 March 2021 20:22:45


In [2]:
import pandas as pd
import numpy as np


def calc_weights(weights):
    ''' 
    Calculate proportion/weighting of probabilities
    '''   
    return list(map(lambda x: x / sum(weights), weights))


def get_skus(astype='list'):

    skus = ['Apple iPhone 11', 'Apple iPhone 11 Pro', 'Apple iPhone 11 Pro Max',
            'Apple iPhone XR', 'Apple iPhone XS', 'Apple iPhone XS Max',
            'Galaxy S10+', 'Galaxy S10e', 'Huawei Mate 20', 'Huawei Mate 20 Pro',
            'Huawei P20', 'Huawei P20 Pro', 'Huawei P20 lite', 'Huawei P30',
            'Huawei P30 Pro', 'Samsung Galaxy A10', 'Samsung Galaxy A20',
            'Samsung Galaxy A50', 'Samsung Galaxy J2 Core', 'Samsung Galaxy S10',
            'Samsung Galaxy S9', 'Samsung Galaxy S9+', 'Xiaomi Redmi Note 7',
            'Xiaomi Redmi Note 7 Pro', 'Xiaomi Redmi Note 8', 'Xiaomi Redmi Note 8 Pro']
    
    price = pd.Series(np.random.uniform(low=400, high=800, size=len(skus)).round(2))

    skus_data = [{'sku': sku, 'unit_price': price[idx]} for idx, sku in enumerate(skus)]
    
    if astype == 'list':
        return skus
    
    if astype == 'dataframe':
        return pd.DataFrame(skus_data)
    
    return skus_data

In [3]:
import pandas as pd
import numpy as np


def get_sample_data2(year='2020', freq='D', rows=1000, seed=None):
    ''' 
    
    '''
    if seed is not None:
        np.random.seed(seed)

    month = np.random.randint(1, 13, size=rows)
    day = np.random.randint(1, 28, size=rows)
    order_dates = pd.DataFrame({'year': year, 'month': month, 'day': day})
    
    # Calculate random day intervals
    f = lambda x: pd.Timedelta(value=x, unit='days')
    f = np.vectorize(f)
    
    order_dates = pd.to_datetime(order_dates) 
    invoice_dates = order_dates + pd.Series(f(np.random.randint(1, 3, size=rows)))
    delivery_dates = invoice_dates + pd.Series(f(np.random.randint(3, 5, size=rows))) 

    orders = pd.Series(np.random.randint(low=4600000, high=4800000, size=rows))

    country_list = ['Germany', 'Italy', 'France', 'Spain', 'Sweden', 
                    'Portugal', 'Norway', 'Switzerland']
    weights = calc_weights([3, 1, 2, 1, 1, 1, 1, 1])
    countries = pd.Series(np.random.choice(country_list, p=weights, size=rows))

    region_list = ['East', 'West', 'North', 'South']
    weights = calc_weights([4, 2, 4, 1])
    regions = pd.Series(np.random.choice(region_list, p=weights, size=rows))

    reps = ['Mr. D. Davis', 'Mr. B. Johnson', 'Mr S. Baker',
            'Mr K. Starmer', 'Mrs T. May', 'Mrs R. Johnson']
    weights = calc_weights([1, 3, 2, 2, 2, 1])
    reps = pd.Series(np.random.choice(reps, p=weights, size=rows))

    df_skus = get_skus(astype='dataframe')
    skus = pd.Series(np.random.randint(low=0, high=df_skus.shape[0], size=rows))
    skus = skus.apply(lambda x: df_skus.iloc[x, 0])
    qty = pd.Series(np.random.randint(low=1, high=40, size=rows))

    data_dictionary = {
        'country': countries,
        'region': regions,
        'order': orders,
        'order_dt': order_dates,
        'invoice_dt': invoice_dates,
        'delivery_dt': delivery_dates,
        'rep': reps,
        'sku': skus,
        'qty': qty
    }

    df = pd.DataFrame(data_dictionary)    
#     df = df.merge()

    return df

In [4]:
df = get_sample_data2(year=2022, freq='D', rows=10000)
df['duration (order to cash)'] = df.invoice_dt - df.order_dt
df['duration (order to delivery)'] = df.delivery_dt - df.order_dt
df

Unnamed: 0,country,region,order,order_dt,invoice_dt,delivery_dt,rep,sku,qty,duration (order to cash),duration (order to delivery)
0,France,South,4763678,2022-01-06,2022-01-08,2022-01-11,Mr S. Baker,Samsung Galaxy J2 Core,15,2 days,5 days
1,Spain,East,4614000,2022-12-02,2022-12-04,2022-12-07,Mr. B. Johnson,Huawei P30,27,2 days,5 days
2,Portugal,North,4719580,2022-04-22,2022-04-23,2022-04-26,Mr K. Starmer,Apple iPhone 11 Pro,27,1 days,4 days
3,Germany,North,4602829,2022-01-11,2022-01-12,2022-01-15,Mr S. Baker,Xiaomi Redmi Note 8 Pro,2,1 days,4 days
4,Germany,West,4673724,2022-12-02,2022-12-03,2022-12-06,Mr S. Baker,Samsung Galaxy S9,11,1 days,4 days
...,...,...,...,...,...,...,...,...,...,...,...
9995,Spain,East,4626528,2022-06-10,2022-06-11,2022-06-15,Mr. B. Johnson,Huawei Mate 20,1,1 days,5 days
9996,Germany,North,4663709,2022-10-03,2022-10-05,2022-10-08,Mr K. Starmer,Xiaomi Redmi Note 8,35,2 days,5 days
9997,Portugal,North,4668094,2022-03-21,2022-03-22,2022-03-25,Mr S. Baker,Samsung Galaxy A50,23,1 days,4 days
9998,Germany,East,4660661,2022-06-23,2022-06-25,2022-06-29,Mr. D. Davis,Xiaomi Redmi Note 8,37,2 days,6 days


In [5]:
%%piper

df >> count('rep') 

Unnamed: 0_level_0,n,%,cum %
rep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mr. B. Johnson,2793,27.93,27.93
Mrs T. May,1867,18.67,46.6
Mr S. Baker,1783,17.83,64.43
Mr K. Starmer,1781,17.81,82.24
Mr. D. Davis,933,9.33,91.57
Mrs R. Johnson,843,8.43,100.0
