In [2]:
# Helper Functions

def load_file(data_file_path):
    data_df = pd.read_csv(data_file_path, delimiter=";")
    return data_df
  
    
def plot_graph_new(results, max_calls):
    x_pts = [i+1 for i in range(0, max_calls)]
    y_pts = [results[i]['expected'] for i in range(0, max_calls)]
    plt.title("Plot of Expected Success Per Call Rate")
    plt.plot(x_pts, y_pts)
    plt.axvline(x=0, color ="black")
    plt.axhline(y=0, color ="black")
    plt.xticks(np.arange(1, max_calls+1, 1))
    plt.show()
    

def div(a,b):
    if int(b) == 0:
        return 0.0
    else:
        return a/b
    

# Used for creating all possible combinations of the features.
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = iterable
    return itertools.chain.from_iterable(combinations(s, r) for r in range(len(s)+1))


def construct_dict(feature_comb):
    new_dict = {}
    new_dict['education'] = feature_comb[0]
    new_dict['job'] = feature_comb[2]
    new_dict['marital'] = feature_comb[1]
    new_dict['default'] = feature_comb[3]
    new_dict['loan'] = feature_comb[4]
    new_dict['housing'] = feature_comb[5]
    return new_dict


# This was the old metric (reward per call rate).
# def compute_expected_reward_feature_set_new(fs_df, no_calls_considered):
#     expected_values_calls = []
#     len_df = len(fs_df)
#     for i in range(1, no_calls_considered + 1):
#         expected_values_calls.append({'neg_value':0.0, 'pos_value':0.0, 'count':0, 'expected':0.0})
#         for index, row in fs_df.iterrows():
#             no_calls = row['campaign']
#             if no_calls <= i:
#                 if row['y'] == "yes":
#                     expected_values_calls[i-1]['pos_value'] += ((no_calls_considered+1) - no_calls)
#                 else:
#                     expected_values_calls[i-1]['neg_value'] += (-no_calls)
#             else:
#                 expected_values_calls[i-1]['neg_value'] += (-i)
#             expected_values_calls[i-1]['count'] += 1
#     for loc, item in enumerate(expected_values_calls):
#         expected_values_calls[loc]['expected'] = (expected_values_calls[loc]['pos_value'] + expected_values_calls[loc]['neg_value'])/len_df
#     return expected_values_calls


# This is the new metric (success per call rate).
def compute_expected_succ_per_call_rate_feature_set(fs_df, no_calls_considered):
    expected_values_call_nums = []
    for i in range(1, no_calls_considered + 1):
        expected_values_call_nums.append({'succ':0, 'total_calls':0, 'expected':0.0})
        for index, row in fs_df.iterrows():
            no_calls = row['campaign']
            if no_calls <= i:
                if row['y'] == "yes":
                    expected_values_call_nums[i-1]['succ'] += 1
                    expected_values_call_nums[i-1]['total_calls'] += no_calls
            else:
                expected_values_call_nums[i-1]['total_calls'] += i
    for loc, item in enumerate(expected_values_call_nums):
        expected_values_call_nums[loc]['expected'] = div(item['succ'], item['total_calls'])
    return expected_values_call_nums


def compute_optimal_call_no(results):
    max_loc = max(range(len(results)), key=lambda index: results[index]['expected'])
    if results[max_loc]['expected'] < 0.0:
        return -1
    else:
        return max_loc


# Given a dictionary of what attributes comprise a feature set, we can get all rows corresponding to this feature set.
def extract_rows_feature_set(fs_df, feature_labels = {'education':['tertiary', 'unknown'], 
                                                      'job':['management', 'technician', 'blue-collar'], 
                                                      'marital':['single'], 'default':['no'], 
                                                      'housing':['no'], 'loan':['no']}):
    for key in feature_labels:
        feature_labels_query_str = ''
        arr = feature_labels[key]
        for label in arr:
            feature_labels_query_str += (key + ' == "'+ label + '" | ')
        feature_labels_query_str = feature_labels_query_str[:-3]
        fs_df = fs_df.query(feature_labels_query_str)
    return fs_df