In [1]:
from piper.defaults import *

piper v0.1.0: Monday, 29 March 2021 19:36:09


In [2]:
import pandas as pd
import numpy as np


def calc_weights(weights):
    ''' 
    Calculate proportion/weighting of probabilities
    '''   
    return list(map(lambda x: x / sum(weights), weights))


def get_skus(astype='list'):

    skus = ['Apple iPhone 11', 'Apple iPhone 11 Pro', 'Apple iPhone 11 Pro Max',
            'Apple iPhone XR', 'Apple iPhone XS', 'Apple iPhone XS Max',
            'Galaxy S10+', 'Galaxy S10e', 'Huawei Mate 20', 'Huawei Mate 20 Pro',
            'Huawei P20', 'Huawei P20 Pro', 'Huawei P20 lite', 'Huawei P30',
            'Huawei P30 Pro', 'Samsung Galaxy A10', 'Samsung Galaxy A20',
            'Samsung Galaxy A50', 'Samsung Galaxy J2 Core', 'Samsung Galaxy S10',
            'Samsung Galaxy S9', 'Samsung Galaxy S9+', 'Xiaomi Redmi Note 7',
            'Xiaomi Redmi Note 7 Pro', 'Xiaomi Redmi Note 8', 'Xiaomi Redmi Note 8 Pro']
    
    price = pd.Series(np.random.uniform(low=400, high=800, size=len(skus)).round(2))

    skus_data = [{'sku': sku, 'unit_price': price[idx]} for idx, sku in enumerate(skus)]
    
    if astype == 'list':
        return skus
    
    if astype == 'dataframe':
        return pd.DataFrame(skus_data)
    
    return skus_data

In [3]:
import pandas as pd
import numpy as np


def get_sample_data2(year='2020', freq='D', rows=1000, seed=None):
    ''' 
    
    '''
    if seed is not None:
        np.random.seed(seed)

    month = np.random.randint(1, 13, size=rows)
    day = np.random.randint(1, 28, size=rows)
    order_dates = pd.DataFrame({'year': year, 'month': month, 'day': day})
    
    # Calculate random day intervals
    f = lambda x: pd.Timedelta(value=x, unit='days')
    f = np.vectorize(f)
    
    order_dates = pd.to_datetime(order_dates) 
    invoice_dates = order_dates + pd.Series(f(np.random.randint(1, 3, size=rows)))
    delivery_dates = invoice_dates + pd.Series(f(np.random.randint(3, 5, size=rows))) 

    orders = pd.Series(np.random.randint(low=4600000, high=4800000, size=rows))

    country_list = ['Germany', 'Italy', 'France', 'Spain', 'Sweden', 
                    'Portugal', 'Norway', 'Switzerland']
    weights = calc_weights([3, 1, 2, 1, 1, 1, 1, 1])
    countries = pd.Series(np.random.choice(country_list, p=weights, size=rows))

    region_list = ['East', 'West', 'North', 'South']
    weights = calc_weights([4, 2, 4, 1])
    regions = pd.Series(np.random.choice(region_list, p=weights, size=rows))

    reps = ['Mr. D. Davis', 'Mr. B. Johnson', 'Mr S. Baker',
            'Mr K. Starmer', 'Mrs T. May', 'Mrs R. Johnson']
    weights = calc_weights([1, 3, 2, 2, 2, 1])
    reps = pd.Series(np.random.choice(reps, p=weights, size=rows))

    df_skus = get_skus(astype='dataframe')
    skus = pd.Series(np.random.randint(low=0, high=df_skus.shape[0], size=rows))
    skus = skus.apply(lambda x: df_skus.iloc[x, 0])
    qty = pd.Series(np.random.randint(low=1, high=40, size=rows))

    data_dictionary = {
        'country': countries,
        'region': regions,
        'order': orders,
        'order_dt': order_dates,
        'invoice_dt': invoice_dates,
        'delivery_dt': delivery_dates,
        'rep': reps,
        'sku': skus,
        'qty': qty
    }

    df = pd.DataFrame(data_dictionary)    
#     df = df.merge()

    return df

In [4]:
df = get_sample_data2(year=2022, freq='D', rows=10000)
df['duration (order to cash)'] = df.invoice_dt - df.order_dt
df['duration (order to delivery)'] = df.delivery_dt - df.order_dt
df

Unnamed: 0,country,region,order,order_dt,invoice_dt,delivery_dt,rep,sku,qty,duration (order to cash),duration (order to delivery)
0,Switzerland,West,4629733,2022-07-11,2022-07-13,2022-07-17,Mr S. Baker,Samsung Galaxy S9,28,2 days,6 days
1,Germany,East,4726303,2022-08-13,2022-08-15,2022-08-19,Mr. B. Johnson,Xiaomi Redmi Note 8 Pro,10,2 days,6 days
2,Germany,North,4627912,2022-04-22,2022-04-23,2022-04-26,Mrs T. May,Samsung Galaxy S10,2,1 days,4 days
3,Germany,West,4777111,2022-03-05,2022-03-06,2022-03-10,Mr S. Baker,Samsung Galaxy A10,38,1 days,5 days
4,Germany,East,4685299,2022-05-13,2022-05-15,2022-05-18,Mr. B. Johnson,Huawei P30,14,2 days,5 days
...,...,...,...,...,...,...,...,...,...,...,...
9995,Germany,North,4735026,2022-01-18,2022-01-20,2022-01-24,Mr S. Baker,Samsung Galaxy A50,33,2 days,6 days
9996,Switzerland,South,4734171,2022-12-18,2022-12-20,2022-12-24,Mr. B. Johnson,Huawei P20 Pro,26,2 days,6 days
9997,Germany,East,4601778,2022-06-09,2022-06-11,2022-06-14,Mr. B. Johnson,Apple iPhone XR,4,2 days,5 days
9998,Germany,East,4759191,2022-09-10,2022-09-12,2022-09-16,Mr. B. Johnson,Galaxy S10+,17,2 days,6 days


In [5]:
%%piper

df >> count('rep') 

Unnamed: 0_level_0,n,%,cum %
rep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mr. B. Johnson,2741,27.41,27.41
Mrs T. May,1869,18.69,46.1
Mr S. Baker,1810,18.1,64.2
Mr K. Starmer,1760,17.6,81.8
Mr. D. Davis,932,9.32,91.12
Mrs R. Johnson,888,8.88,100.0
