In [None]:
# when rebalancing a dataset is required in a binary setup (e.g. 0 vs 1)
# this function can be used to calculate different class ratios

# for example, if a dataset contains 15% of churns we may want to test
# how does the model behaves when there is a list of ratios from 15% up to 90%
def calculate_balance_rates(original_balance_rate,n_max=None,ranges=0.1):
    from math import ceil
    rates = []
    rates.append(original_balance_rate)
    next_rate = ceil(original_balance_rate/ranges)*ranges
    
    while(next_rate<1):
        rates.append(next_rate)
        next_rate = round( next_rate + ranges ,3)
    
    if n_max is not None:
        if n_max < len(rates):
            rates = rates[0:n_max]
    
    return rates

In [None]:
# Lift tells how much better a classifier predicts compared to a random selection. It compares the precision to the overall churn rate in the test set
def lift_score(actuals, predictions):
    from sklearn.metrics import precision_score
    
    precision = precision_score(actuals,predictions)
    positive_rate = actuals.sum() / actuals.size
    
    lift = precision/positive_rate
    
    return(lift)

In [None]:
def get_eval_metrics(y, predictions):
      
    eval_metrics = {}
    eval_metrics['accuracy'] = accuracy_score(y, predictions)
    eval_metrics['recall'] = recall_score(y, predictions)
    eval_metrics['precision'] = precision_score(y, predictions)
    eval_metrics['lift'] = lift_score(y, predictions)
    eval_metrics['f1'] = f1_score(y, predictions)
    fpr, tpr, _ = roc_curve(y, predictions)
    eval_metrics['auc_score'] = auc(fpr,tpr)
    
    return(eval_metrics)

In [None]:
def y_fn(y, desired_minority_rate, minority_class = 0):
    from math import ceil
    
    rate0 = len(y[y==0])/len(y)
    
    if rate0 > 0.5:
        minority_class = 1
        majority_class = 0
    else:
        minority_class = 0
        majority_class = 1
    
    maj_rate = len(y[y==majority_class])/len(y)
    min_rate = 1-maj_rate
    
    y_min = len(y[y==minority_class])
    y_maj = len(y[y==majority_class])
    
    new_sample_split = {}
    new_sample_split[majority_class] = y_maj
    if desired_minority_rate > min_rate:
        sample_size_rs = ceil(y_maj/(1-desired_minority_rate))
        y_min_rs = ceil(sample_size_rs*desired_minority_rate)
        
        new_sample_split[minority_class] = y_min_rs
    else:
        new_sample_split[minority_class] = y_min
        
    return(new_sample_split)
