In [401]:
import random
import string

import numpy as np
import pandas as pd

from IPython.display import display, HTML

# Helper Functions

In [282]:
def prefix_generator(size):
    """
    Helper function to generate a random phone prefix from a set of digits
    """
    return ''.join(random.choices(string.digits, k=size))


# Testing
display(prefix_generator(1))
display(prefix_generator(3))
display(prefix_generator(5))
display(prefix_generator(7))

'5'

'149'

'19684'

'6314760'

In [286]:
def price_list_generator(max_size, max_prefix_size, min_price=0.1, max_price=3.0):
    """
    Helper function to generate a price list with :
        - random size : the list size is randomly selected between 1 and `max_size`
        - random prefixes : each prefix has a length randomly selected between 1 and `max_prefix_size`
        - random prices : to each prefix is matched a price from a uniform random distribution [min_price, max_price]
    """
    return sorted([
        [
            prefix_generator(random.randint(1, max_prefix_size)),
            np.round(random.uniform(min_price, max_price), 2)
        ]
        for _ in range(random.randint(1, max_size))
    ])


# Testing
display(price_list_generator(max_size=3, max_prefix_size=5))
display(price_list_generator(max_size=5, max_prefix_size=5))
display(price_list_generator(max_size=7, max_prefix_size=5))

[['1187', 2.48], ['12999', 0.91], ['70', 1.34]]

[['18', 1.41], ['465', 1.74], ['5', 1.58], ['53', 2.27], ['75', 0.31]]

[['3', 0.48], ['42282', 1.9], ['627', 1.7], ['656', 2.28]]

In [394]:
def create_prices_dataset(num_operators, max_size, max_prefix_size=5):
    """
    Helper function for creating a dataset holding the operators, the prefixes for
    each operator, and the price for each prefix
    """
    
    # Since operator names are unknown, we create a list of N letters ['A', 'B', ...]
    operators = list(string.ascii_uppercase)[:num_operators]
    
    # Create a DataFrame holding the operator name, the prefixes for that operator, and the price for each prefix
    df_list = []
    
    for op in operators:
        df = pd.DataFrame(price_list_generator(max_size, max_prefix_size), columns=["Prefix", "Price"])
        df["Operator"] = op
        
        df_list.append(df)
        
    # Return a concatenated dataframe
    return pd.concat(df_list)

### Generate a random dataset

In [422]:
NUM_OPERATORS = 3    # Number of operators
MAX_SIZE      = 1000 # Max list size for each operator

price_df = create_prices_dataset(num_operators=NUM_OPERATORS, max_size=MAX_SIZE)

# Remove duplicated prefixes
price_df = price_df.drop_duplicates("Prefix")

###### Display a sub-dataset for each operator

In [418]:
for op, prices in price_df.groupby(["Operator"]):
    display(prices.sample(frac=1).head(5).sort_values(by="Prefix"))

Unnamed: 0,Prefix,Price,Operator
297,275,1.23,A
491,48742,0.41,A
558,558,0.27,A
581,58277,2.2,A
625,627,1.94,A


Unnamed: 0,Prefix,Price,Operator
11,558,0.35,B
55,416,1.23,B
73,5674,2.72,B
91,7099,0.25,B
116,902,0.56,B


Unnamed: 0,Prefix,Price,Operator
126,177,0.42,C
380,53494,1.47,C
496,700,2.22,C
520,742,0.68,C
533,7618,0.58,C


### Adding save / load capabilities

##### Save to CSV format

In [None]:
price_df.to_csv("price-list.csv", header=True, index=False)

##### Load from CSV

In [None]:
price_df = pd.read_csv("price-list.csv")

# Extract Best Prefix Match for each Operator

#### Generating random numbers to call

In [414]:
CALL_SIZE = 100    # How many numbers to call
NUM_SIZE  = 10     # The size of each number (e.g. we'd want to call 0123456789, which has a size of 10 digits)

# The function `prefix_generator` comes in very handy to generate random numbers to call
call_list = [prefix_generator(NUM_SIZE) for _ in range(CALL_SIZE)]

print("We'll be calling these numbers :")
display(call_list[:5])

We'll be calling these numbers :


['6349809787', '2217160023', '6843094149', '3958886792', '4930352079']

### Helper Functions

In [525]:
def get_operator_best_match(number, data):
    """
    Helper function to extract the best prefix match
    
    @arg number (string): the number to call
    @arg data (pd.DataFrame): the prices dataset
    
    @return : a pd.DataFrame holding the data (Operator / Prefix that matches best / Price)
    """
    # Get all prefixes that match the number to call
    all_prefixes = data[data.apply(lambda x: number.startswith(x["Prefix"]), axis=1)]
    
    # Find the longest prefix for each operator
    result = {"Operator": [], "Prefix": [], "Price": []}
    
    for op, group in all_prefixes.groupby("Operator"):
        result["Operator"].append(op)
        result["Prefix"].append(group.iloc[-1]["Prefix"])
        result["Price"].append(group.iloc[-1]["Price"])
        
    return pd.DataFrame.from_dict(result)

In [526]:
def cheapest_operator(price_per_operator):
    """
    Helper function to find the cheapest operator for a given number
    
    @arg price_per_operator (pd.DataFrame): a dictionary holding the data (Operator/Prefix that matches best/Price)
    
    @return: the operator name
    """
    
    return price_per_operator.iloc[price_per_operator["Price"].idxmin()]["Operator"]

#### Unit Test

In [533]:
test_df = pd.DataFrame.from_dict({
    "Prefix"   : ["1", "12", "123", "12", "123456", "1", "99"],
    "Price"    : np.round(np.random.uniform(0.1, 3.0, 7), 2),
    "Operator" : ["A", "A", "A", "B", "B", "C", "C"]
})

display(test_df)

num = "123456789"
match_df = get_operator_best_match(num, test_df)

display(HTML("<h3>We can see that the function behaves correctly by returning the expected " \
             "values, which are '123' for 'A' and '123456' for 'B' and '1' for 'C'"))
display(match_df)

display(HTML("<h3>Furthermore, the assertion that the cheapest operator is 'C' is true."))
display(HTML("<h4>cheapest_operator(match_df) == 'C'"))

assert(cheapest_operator(match_df) == "C")

Unnamed: 0,Prefix,Price,Operator
0,1,0.44,A
1,12,2.93,A
2,123,1.46,A
3,12,2.33,B
4,123456,1.66,B
5,1,1.37,C
6,99,2.14,C


Unnamed: 0,Operator,Prefix,Price
0,A,123,1.46
1,B,123456,1.66
2,C,1,1.37


In [None]:
[0.4, 2.9, 1.4, ]