In [2]:
import pandas as pd

# Load the Excel file into a DataFrame
df = pd.read_excel("./EDA/Datasets/st_combinations.xlsx")
df.head()

Unnamed: 0.1,Unnamed: 0,delivery_country,product_line,task_type_en,baseline_st
0,97,MY,Non-Auction Ads,Promote,0.54
1,131,TH,Non-Auction Ads,Promote,0.54
2,128,US,Non-Auction Ads,Promote,0.54
3,123,DE,Non-Auction Ads,Promote,0.54
4,121,TR,Non-Auction Ads,Promote,0.54


In [3]:
# Convert the DataFrame to a dictionary 
st_dict = {}

for index, row in df.iterrows():
    key = (row['delivery_country'], row['product_line'], row['task_type_en'])
    value = row['baseline_st']
    st_dict[key] = value

# Check the first few entries in the dictionary
dict(list(st_dict.items())[:5])

{('MY', 'Non-Auction Ads', 'Promote'): 0.54,
 ('TH', 'Non-Auction Ads', 'Promote'): 0.54,
 ('US', 'Non-Auction Ads', 'Promote'): 0.54,
 ('DE', 'Non-Auction Ads', 'Promote'): 0.54,
 ('TR', 'Non-Auction Ads', 'Promote'): 0.54}

In [4]:
def get_baseline_st(delivery_country, product_line, task_type_en):
    """
    Queries the dictionary to retrieve the baseline_st given the input parameters.
    
    Parameters:
    - delivery_country (str): The delivery country
    - product_line (str): The product line
    - task_type_en (str): The task type in English
    
    Returns:
    - float: The baseline_st for the given combination or None if the combination is not found.
    """
    
    return st_dict.get((delivery_country, product_line, task_type_en), None)

# Test the function with a sample query
test_country = "MY"
test_product_line = "Non-Auction Ads"
test_task_type_en = "Promote"

get_baseline_st(test_country, test_product_line, test_task_type_en)


0.54

In [None]:
def compute_normalized_score(baseline_st, days_diff):
    """
    Computes the normalized score based on min-max normalization for the given inputs.
    
    Parameters:
    - baseline_st (float): The baseline_st value
    - days_diff (int): The days_diff value
    
    Returns:
    - float: The average normalized score between the two inputs in the range [0, 1]
    """
    
    # Min-max normalization for baseline_st
    min_baseline_st = 0.54
    max_baseline_st = 7.59
    normalized_baseline_st = (baseline_st - min_baseline_st) / (max_baseline_st - min_baseline_st)
    
    # Min-max normalization for days_diff
    min_days_diff = 0
    max_days_diff = 37
    normalized_days_diff = (days_diff - min_days_diff) / (max_days_diff - min_days_diff)
    
    # Compute the average of the two normalized values
    average_score = (normalized_baseline_st + normalized_days_diff) / 2
    
    return average_score

# Test the function with a sample input
test_baseline_st = 1.0
test_days_diff = 10

compute_normalized_score(test_baseline_st, test_days_diff)

In [None]:
def score(x):
    """Calculate score based on distance from 0.5"""
    if abs(x - 0.4) < 0.1:
        return 1 - abs(x - 0.4) * 10  # Scale score linearly within [0.4, 0.5]
    elif abs(x - 0.6) < 0.1:
        return 1 - abs(x - 0.6) * 10  # Scale score linearly within [0.5, 0.6]
    return 0  # for values exactly at 0.5

def calculate_confidence(A):
    n = len(A)
    
    # Check conditions
    if all(x <= 0.4 for x in A):
        return sum(1 - x for x in A) / n
    elif any(x >= 0.6 for x in A):
        return max(A)
    elif all(x >= 0 and x < 0.6 for x in A):
        total_score = sum(score(x) for x in A)
        # Normalize the confidence to be in the range [0, 0.6]
        return 0.6 * (total_score / n)
    else:
        # This case shouldn't be reached based on the provided conditions, but is included for completeness.
        return None