In [185]:
import random
import string

import numpy as np
import pandas as pd

from IPython.display import display

In [282]:
def prefix_generator(size):
    """
    Helper function to generate a random phone prefix from a set of digits
    """
    return ''.join(random.choices(string.digits, k=size))


# Testing
display(prefix_generator(1))
display(prefix_generator(3))
display(prefix_generator(5))
display(prefix_generator(7))

'5'

'149'

'19684'

'6314760'

In [286]:
def price_list_generator(max_size, max_prefix_size, min_price=0.1, max_price=3.0):
    """
    Helper function to generate a price list with :
        - random size : the list size is randomly selected between 1 and `max_size`
        - random prefixes : each prefix has a length randomly selected between 1 and `max_prefix_size`
        - random prices : to each prefix is matched a price from a uniform random distribution [min_price, max_price]
    """
    return sorted([
        [
            prefix_generator(random.randint(1, max_prefix_size)),
            np.round(random.uniform(min_price, max_price), 2)
        ]
        for _ in range(random.randint(1, max_size))
    ])


# Testing
display(price_list_generator(max_size=3, max_prefix_size=5))
display(price_list_generator(max_size=5, max_prefix_size=5))
display(price_list_generator(max_size=7, max_prefix_size=5))

[['1187', 2.48], ['12999', 0.91], ['70', 1.34]]

[['18', 1.41], ['465', 1.74], ['5', 1.58], ['53', 2.27], ['75', 0.31]]

[['3', 0.48], ['42282', 1.9], ['627', 1.7], ['656', 2.28]]

In [342]:
def create_prices_dataset(num_operators):
    """
    Helper function for creating a dataset holding the operators, the prefixes for
    each operator, and the price for each prefix
    """
    
    # Since operator names are unknown, we create a list of N letters ['A', 'B', ...]
    operators = list(string.ascii_uppercase)[:num_operators]
    
    # Create a DataFrame holding the operator name, the prefixes for that operator, and the price for each prefix
    df_list = []
    
    for op in operators:
        df = pd.DataFrame(price_list_generator(max_size=10, max_prefix_size=5), columns=["Prefix", "Price"])
        df["Operator"] = op
        
        df_list.append(df)
        
    # Return a concatenated dataframe
    return pd.concat(df_list)


# Testing
price_df = create_prices_dataset(num_operators=3)

# Remove duplicated prefixes
price_df = price_df.drop_duplicates("Prefix")

for op, prices in price_df.groupby(["Operator"]):
    display(prices)
    
price_df.to_csv("price-list.csv", header=True, index=False)

Unnamed: 0,Prefix,Price,Operator
0,2397,1.6,A
1,32,2.43,A
2,784,1.07,A
3,87,1.1,A
4,9655,2.03,A


Unnamed: 0,Prefix,Price,Operator
0,3,2.02,B
1,48,0.55,B
2,6776,0.21,B
4,9773,0.52,B


Unnamed: 0,Prefix,Price,Operator
0,1162,1.43,C
1,3476,0.37,C
2,50371,2.57,C
3,819,1.23,C
4,933,1.57,C
