In [2]:
import numpy as np
import pandas as pd
import math


import random
import scipy


from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [22]:
data = pd.read_excel("../input.xlsx")

COST_COEF = 5
DEAL_SIZE = 10
threshold = 0.5
X = 1e4
Y= 1e3
PDGW = 0.5

In [4]:
data.columns

Index(['NewOldCustomer', 'Continent', 'Region', 'Country', 'Sector', 'Price',
       'Duration', 'Complexity', 'BuyingBehavior', 'Competitors',
       'CoverageType', 'Outcome'],
      dtype='object')

In [5]:
def trainModel(data , size=0.25):
    data = data.copy()
    #data_input = data_input_interim.drop(candidate_indexes,axis=0)
    Y = data["Outcome"]
    data.drop("Outcome",axis=1,inplace=True)
    feature_cols = data.columns
    X = data[feature_cols]
    X_train, X_test, Y_train, Y_test = train_test_split(
            X, Y, test_size=size, random_state=0)

    clf = RandomForestClassifier(max_depth = 20, 
                                 min_samples_split=2, 
                                 n_estimators = 100, 
                                 random_state = 1)
    clf = clf.fit(X_train, Y_train)
    return clf

In [6]:
model = trainModel(data)

In [7]:
# ### Method to extract candidate bids based on given parameters. The optimizer will work on the subset of candidates generated by this step


def candidateBids(df, size):
    df_copy = df.copy()
    df_copy = df_copy[df_copy["Outcome"] == 1]
    candidate_list = []
    candidate_df = df_copy
    available_size = min(len(candidate_df), size)
    candidate_indexes = random.sample(list(candidate_df.index), available_size)
    candidate_df = candidate_df.loc[candidate_indexes]
    candidate_df.drop("Outcome", axis=1, inplace=True)
    for idx in candidate_indexes:
        candidate_list.append(df_copy.loc[idx].drop("Outcome"))
    return candidate_list, candidate_df, candidate_indexes




# ### Selecting candidates group

all_candidate_lists, candid_df, candid_indexes = candidateBids(data, size=DEAL_SIZE)


In [13]:
# Generating random costs for the subset of candidates.

def randomCosts(df, deal_indexes):
    costs = [
        float(
            "%.2f"
            % np.round(
                df.iloc[deal_indexes[i]]["Price"] / (np.random.random() / 10 + 1.15), 2
            )
        )
        for i in range(len(deal_indexes))
    ]
    return costs

def getCandidatesOriginalPrices(df, deal_indexes):
    prices = [
        float(df.iloc[deal_indexes[i]]["Price"]) for i in range(len(deal_indexes))
    ]
    return prices

costs = randomCosts(data, candid_indexes)
prices = getCandidatesOriginalPrices(data, candid_indexes)

bounds = np.asarray([(c, float("%.2f" % round(COST_COEF * c, 2))) for c in costs])

In [96]:
# Use the trained model to calculate probability of successs

class probability_function():
    def __init__(self, data, model):
        self.data = data.copy()
        self.data["Price"] = 0
        #self.data = self.data.values
        self.model = model
    
    def calculate(self, prices):
        prices = np.round(prices, 4)
        if not np.isfinite(prices).all():
            raise Exception("Value is too big")
        #data = self.data.copy()
        self.data["Price"] = prices
        probs = model.predict_proba(self.data)[:, 1]
        self.data["Price"] = 0
        return probs


In [128]:
def firstObjective(prices, costs, probabilities):
    return np.sum((prices - costs) * probabilities)


def secondObjective(probabilities):
    return np.sum(probabilities)

def thirdObjectiveConstDec(probabilities, PDGW, threshold, x, y):
    res = np.clip(probabilities - threshold, a_min=0, a_max = np.inf) * (x - y * pdgw)
    return np.sum(res)


def thirdObjectiveExpDec(probabilities, PDGW, threshold, x, y):
    res = np.clip(probabilities - threshold, a_min=0, a_max = np.inf) * x * math.exp(-y * PDGW)
    return np.sum(res)

class objectives():
    
    def __init__(self, data, costs, PDGW, threshold, x, y, probability_function):
        self.data, self.costs, self.PDGW, self.threshold, self.x, self.y = data, costs, PDGW, threshold, x, y
        self.probability_function = probability_function
    
    def evaluate(self, prices):
    
        if prices.ndim == 2:
            
            objs = np.zeros((prices.shape[0], 3))
            for i, price in enumerate(prices):
                probabilities = self.probability_function.calculate(price)
                objs[i] = (
                    firstObjective(price, self.costs, probabilities), 
                    secondObjective(probabilities), 
                    thirdObjectiveExpDec(probabilities, self.PDGW, self.threshold, self.x, self.y)
                )
        else:
            probabilities = self.probability_function.calculate(prices)
            objs = np.asarray([[
                    firstObjective(price, self.costs, probabilities), 
                    secondObjective(probabilities), 
                    thirdObjectiveExpDec(probabilities, self.PDGW, self.threshold, self.x, self.y)
            ]])
        return -objs

In [134]:
from desdeo_problem import variable_builder, MOProblem, VectorObjective

from desdeo_emo.EAs import NSGAIII, RVEA

from desdeo_emo.utilities import animate_init_, animate_next_

In [135]:
variables = variable_builder(
    names = [f"x{i}" for i in range(DEAL_SIZE)],
    initial_values=bounds[:, 0],
    lower_bounds=bounds[:, 0],
    upper_bounds=bounds[:, 1]
)

In [136]:
objs_instance = objectives(candid_df, costs, PDGW, threshold, X, Y, probability_function(candid_df, model))
objs = [VectorObjective(name=["f1", "f2", "f3"], evaluator=objs_instance.evaluate)]

In [137]:
problem = MOProblem(objs, variables)

In [140]:
evolver = NSGAIII(problem, n_gen_per_iter=10, n_iterations=15)


individual, solutions = evolver.end()
figure = animate_init_(solutions, filename="IBM.html")

Plot saved as:  IBM.html
View the plot by opening the file in browser.
To view the plot in Jupyter Notebook, use the IFrame command.


In [141]:
while evolver.continue_evolution():
    print(f"Running iteration {evolver._iteration_counter+1}")
    evolver.iterate()
    non_dominated = evolver.population.non_dominated_fitness()
    figure = animate_next_(
        evolver.population.objectives[non_dominated],
        figure,
        filename="IBM.html",
        generation=evolver._iteration_counter,
    )

Running iteration 1
Running iteration 2
Running iteration 3
Running iteration 4
Running iteration 5
Running iteration 6
Running iteration 7
Running iteration 8
Running iteration 9
Running iteration 10
Running iteration 11
Running iteration 12
Running iteration 13
Running iteration 14
Running iteration 15
