# Implementation
## Data and Uncertainty


First obtain a matrix with the information of alternatives on attributes, including the uncertainty ranges

In [None]:
import pandas as pd
import numpy as np
import time
import itertools as it
from scipy.stats import norm
from warnings import warn
from matplotlib import pyplot as plt
import seaborn as sns
import math

class Dataset():
     
    
    def __init__(self, dataset, sheetname_matrix):
        self.dataset = dataset
        self.sheetname_matrix = sheetname_matrix
        
    def read_dataset(self):
        return pd.read_excel(self.dataset, self.sheetname_matrix)

    def isolate_alternatives(self):
        self.alternatives_array = self.read_dataset().values[:,0].astype(int)
        return self.alternatives_array
    
    def column_names(self):
        return list(self.read_dataset().columns)
    
    def criteria_weights(self, sheetname_criteria = "Criteria"):
        return pd.read_excel(self.dataset, sheetname_criteria).values
    
    def uncertainty(self, sheetname_uncertainty = "Deviation"):
        return pd.read_excel(self.dataset, sheetname_uncertainty)
    
    def metrics(self, sheetname_metrics =  "Units"):
        return pd.read_excel(self.dataset, sheetname_metrics)
  
    def scales_gather(self, sheetname_metrics =  "Units"):
        return pd.read_excel(self.dataset, sheetname_metrics).iloc[[1,2]]

    def interactions(self, sheetname_interactions =  "Interactions"):
        return pd.read_excel(self.dataset, sheetname_interactions)
    
Complete_Matrix = Dataset("C:/Users/paulu/Documents/Epa/Thesis/Complete_Matrix.xlsx", sheetname_matrix ='Sheet2')
print("Data loaded")

Complete_Matrix.criteria_weights()


### Generating Box plot representation of the impacts of the alternatives using Uncertainty_Alternatives Class

In order to present the uncertainty of the impacts of an alternative the Class "Uncertainty_Alternatives()" has been written.
The distribution of the uncertainty can have different forms, but since that information is lacking a normal distribution is assumed. If another form of distribution is used this will be noted.
In order to be able to generate the boxplots the corresponding deviations, have to be filled in in the excel file. For this there is a sheetname called "Deviation" and on the corresponding places of the means (same position on the grid in the other sheet), the standard deviations have to be filled in. For now often assumed to be 10% of the mean value. 

From this range a normal distribution is to be generated in the methods of the class setting the numpy.random.seed(0) and the using np.random.normal methods. the outcome is a set of data used to represent the uncertainty in the boxplots. However, if the alternatives generate no impact == 0, on an attribute, for clarity sake, no boxplot is generated. (This would represent just a line on the x-axis).

If one alternative on one attribute has to be generated one can use the boxplot() method. Here alt = the alternative to be represented in integer format, n = 150, the amount of random generated data is used for the boxplot and the attribute is the name of the attribute to be represented in string format.

The process is the same for the other methods, "alternatives()" and "all_alternatives()", which present all the alternative on 1 attribute and all alternatives on all attributes, respectively.

In [None]:
class Uncertainty_Alternatives():
    def __init__(self, dataset = Complete_Matrix):
        self.dataset = dataset
        self.uncertainty_df = dataset.uncertainty()
        self.alternatives_df = dataset.read_dataset()
        self.temp_df = pd.DataFrame()
        sns.set(rc={"figure.figsize":(7, 5)})
        sns.set_style('whitegrid')
        
                
        #If you have filled in the deviations of an alternative in log-normal distributions values you can add these to this list for
        #that particulare attribute.
        #the code will draw n random numbers from a lognormal distributions using the given mean and sigma from the excell file and not use a normal distribution.
        #Be aware that in the alternative_uncertainty method, the mean value is put in a log function; So you don't have to do that in the excell file itself.
        #however, instead of the standard deviation in the "Deviation Tab" you need to fill in the sigma function.
        self.lognormal_list_alt = []
        self.uniform_list_alt = []
        
        return

    def boxplot(self, alt, n = 100000, attribute = "Attribute_3"):
        if alt < 1:
            return "alternatives number must be greater than zero, otherwise things go wrong, trust me, than it starts from the other side of the matrix"
        np.random.seed(0)
        if alt not in self.lognormal_list_alt and alt not in self.uniform_list_alt:
            print("Normal distribution found for alternative:", str(alt))
            Distribution_generation = np.random.normal(loc=self.alternatives_df[attribute].values[alt-1], scale=self.uncertainty_df[attribute].values[alt-1], size = n)
            if np.sum(Distribution_generation) == 0.0:
                print("Impact of Alternative ", str(self.alternatives_df["Name"].values[alt-1]), "is zero on the selected attribute")
                return
            ax = sns.boxplot(y = Distribution_generation, showfliers = False)
            ax.set_title("Impact on " + attribute)
            ax.set_xlabel("Alternative " + str(alt) + ": "+ str(self.alternatives_df["Name"].values[alt-1]));
            ax.set_ylabel(self.dataset.metrics()[attribute].values[0])
            print("Scale = " + str(self.uncertainty_df[attribute].values[alt-1]))
            print("Loc = "+ str(self.alternatives_df[attribute].values[alt-1]))
            #ax.set(ylim=(8, 16))
            plt.show()
        
            return
        if alt in self.lognormal_list_alt:
            #0.03*self.uncertainty_df[attribute].values[alt-1]
            self.sigma = 1.2
            print("Log-normal distribution found for alternative:", str(alt))
            Distribution_generation = np.random.lognormal(mean=math.log(self.alternatives_df[attribute].values[alt-1]), sigma=self.sigma, size = n)
            if np.sum(Distribution_generation) == 0.0:
                print("Impact of Alternative ", str(self.alternatives_df["Name"].values[alt-1]), "is zero on the selected attribute")
                return
            ax = sns.boxplot(y = Distribution_generation, showfliers = False)
            ax.set_title("Impact on " + attribute)
            ax.set_xlabel("Alternative " + str(alt) + ": "+ str(self.alternatives_df["Name"].values[alt-1]));
            ax.set_ylabel(self.dataset.metrics()[attribute].values[0])
            #ax.set(ylim=(8, 16))
            #self.uncertainty_df[attribute].values[alt-1]
            print("Sigma = " + str(self.sigma))
            print("Mean = "+ str(self.alternatives_df[attribute].values[alt-1]))
            plt.show()

        if alt in self.uniform_list_alt:
            #0.03*self.uncertainty_df[attribute].values[alt-1]
            self.low = self.alternatives_df[attribute].values[alt-1] - self.uncertainty_df[attribute].values[alt-1]
            self.high = self.alternatives_df[attribute].values[alt-1] + self.uncertainty_df[attribute].values[alt-1]
            print("Uniform distribution found for alternative:", str(alt))
            Distribution_generation = np.random.uniform(low=self.low, high = self.high, size = n)
            if np.sum(Distribution_generation) == 0.0:
                print("Impact of Alternative ", str(self.alternatives_df["Name"].values[alt-1]), "is zero on the selected attribute")
                return
            ax = sns.boxplot(y = Distribution_generation, showfliers = False)
            ax.set_title("Impact on " + attribute)
            ax.set_xlabel("Alternative " + str(alt) + ": "+ str(self.alternatives_df["Name"].values[alt-1]));
            ax.set_ylabel(self.dataset.metrics()[attribute].values[0])
            #ax.set(ylim=(8, 16))
            #self.uncertainty_df[attribute].values[alt-1]
            print("Low = " + str(self.low))
            print("High = "+ str(self.high))
            plt.show()
    
    
   
    def distributions(self, alt = 3, n = 100000, attribute = "Attribute_3"):
        if alt < 1:
            #print("alternatives number must be greater than zero, otherwise things go wrong, trust me, than it starts from the other side of the matrix")
            return 
        if alt not in self.lognormal_list_alt and alt not in self.uniform_list_alt:
            np.random.seed(1)
            Distribution_generation = np.random.normal(loc=self.alternatives_df[attribute].values[alt-1],scale=self.uncertainty_df[attribute].values[alt-1], size = n)
            if np.sum(Distribution_generation) == 0.0:
                #print("Impact of Alternative " + str(self.alt) + " " + str(self.alternatives_df["Name"].values[self.alt-1]), "is zero")
                return
            self.temp_df["Alt_" + str(alt)] = Distribution_generation
            return self.temp_df
        if alt in self.lognormal_list_alt:
            self.sigma = 0.2
           # 0.0135*self.uncertainty_df[attribute].values[alt-1]
            np.random.seed(1)
            Distribution_generation = np.random.lognormal(mean=math.log(self.alternatives_df[attribute].values[alt-1]), sigma= self.sigma, size = n)
            if np.sum(Distribution_generation) == 0.0:
                #print("Impact of Alternative " + str(self.alt) + " " + str(self.alternatives_df["Name"].values[self.alt-1]), "is zero")
                return
            self.temp_df["Alt_" + str(alt)] = Distribution_generation
            return self.temp_df

        if alt in self.uniform_list_alt:
            self.low = self.alternatives_df[attribute].values[alt-1] - self.uncertainty_df[attribute].values[alt-1]
            self.high = self.alternatives_df[attribute].values[alt-1] + self.uncertainty_df[attribute].values[alt-1]
           # 0.0135*self.uncertainty_df[attribute].values[alt-1]
            #print("Uniform distribution found for alternative:", str(alt))
            np.random.seed(1)
            Distribution_generation = np.random.uniform(low=self.low, high = self.high, size = n)
            if np.sum(Distribution_generation) == 0.0:
                #print("Impact of Alternative " + str(self.alt) + " " + str(self.alternatives_df["Name"].values[self.alt-1]), "is zero")
                return
            self.temp_df["Alt_" + str(alt)] = Distribution_generation
            return self.temp_df

    def alternatives(self, attribute = "Attribute_3"):
        self.temp_df = pd.DataFrame()
        sns.set(rc={"figure.figsize":(7, 5.5)})
        sns.set_style('whitegrid')
        for i in range(self.alternatives_df.values.shape[0]):
            
            #i becomes to large probably; look at alt = i+1
            self.distributions(alt = i+1, attribute = attribute)
        ax = sns.boxplot(x = 'variable', y = 'value', data = pd.melt(self.temp_df), showfliers = False, palette = "Blues")
        ax.set_title("Impact on: " + str(attribute))
        ax.set_xlabel("Alternatives")
        ax.set_ylabel(self.dataset.metrics()[attribute].values[0])

        return plt.show()
    
    def all_alternatives(self):
        for j in range(2, (len(self.dataset.column_names()))):
            self.alternatives(attribute = self.dataset.column_names()[j])
            plt.show()
        return 

Uncertainty_Data_Alternatives = Uncertainty_Alternatives()
#Uncertainty_Data_Alternatives.alternatives_df.values.shape[0]
#Uncertainty_Data_Alternatives.boxplot(alt = 4, attribute = 'Chrome_concentration_in_Crops')
#Uncertainty_Data_Alternatives.boxplot(alt = 5, attribute = 'Chrome_concentration_in_Crops')
#Uncertainty_Data_Alternatives.boxplot(alt = 8, attribute = 'Chrome_concentration_in_Crops')
#test = Uncertainty_Data_Alternatives.boxplot(alt = 2, attribute = "Attribute_8")

### Generate the Solution Space
To generate the solution space all alternative combinations are generated. Some alternatives are mutually exclusive or dependent, this information is than also added. For example, alternative 1 is the BUA case and is exclusive with all alternatives. Therefore this alternative is isolated before the solution space ("def solspace()") is generated.

In [None]:
class Explicit_Space():
    def __init__(self, choices, alternative_set = Complete_Matrix):
        self.n = choices
        self.set = alternative_set.isolate_alternatives()
        self.subseq = []  # subsequent activities
        self.mut_exc = []  # mutually exclusive
        self.solution_space = "Not yet generated"

    def add_mutually_exclusive(self, a, b):

        # if a >= self.n:
        #    _msj = ('a is out of bounds. Maximum value is {0} and \
        #            got {1}'.format(self.n, a))
        #    raise ValueError(_msj)

        # if b >= self.n:
        #    _msj = ('b is out of bounds. Maximum value is {0} and \
        #            got {1}'.format(self.n, b))
        #    raise ValueError(_msj)

        _ac = [a, b]
        _ca = [b, a]
        if _ac not in self.mut_exc or _ca not in self.mut_exc:
            self.mut_exc.append(_ac)
        else:
            warn('Mutualy exclusive activity already added')

    def add_subsequent(self, parent, sub):

        #if parent >= self.n:
         #   _msj = ('parent is out of bounds. Maximum value is {0} and \
         #           got {1}'.format(self.n, parent))
          #  raise ValueError(_msj)

        #if sub >= self.n:
        #    _msj = ('sub is out of bounds. Maximum value is {0} and \
         #           got {1}'.format(self.n, sub))
         #   raise ValueError(_msj)

        _ac = [parent, sub]
        if _ac not in self.subseq:
            self.subseq.append(_ac)
        else:
            warn('Subsequent activity already added')

    def generate(self):
        for m in range(self.n+1):
            for item in it.combinations(self.set, m):
                #print(item)
                _flag = True
                #for si in self.subseq:
                  #  if si == si:
                        #_flag = False
                        #print(_flag)
                  #      break
                   # print("Dependecy found")
            # Check for mutually exclusive
            
            
            
                for si in self.subseq:
                    if si[0] in item and si[1] not in item:
                        _flag = False
                        break
                    
                if _flag:
                    for ei in self.mut_exc:
                   # print((ei[0], ei[1]))
                        if  ei[0] in item and ei[1] in item:
                        #print(item)
                        #print(ei[0], ei[1])
                            _flag = False
                            break
                        #print(_flag)


                if _flag:
                    yield item

    def solspace(self):
        list = []
        for item in self.generate():
            list.append(item)
        self.solution_space = list
        return list

    def size(self):
        return len(self.solspace())
    
print("Explicit Class created")

### Data Acquisistion
From the generated solution space the corresponding information of each alternative on each attribute is collected in the class "Data_Acquisition"

In [None]:
class Data_Acquisition():
    def __init__(self, dataset = Complete_Matrix, n = 1,):
        self.dataset = dataset
        self.n = n
        self.stop = False
        self.collected_data = "No Data Generated as of yet"
        self.temp = Explicit_Space(self.n, self.dataset)
        
        #Fill in what mutually exclusive project there are:
        #For now, alternative one represents the BAU case.
        for i in range(2,self.dataset.alternatives_array.shape[0]):
            self.temp.add_mutually_exclusive(1,i)
        self.temp.add_mutually_exclusive(2,9)
        self.temp.add_mutually_exclusive(3,9)
        self.temp.add_mutually_exclusive(4,9)
        self.list_combinations = self.temp.solspace()
        self.collected_df = "No Data Generated as of yet"
        self.df_aggregated = "No Data Generated as of yet"
    
    def collect(self):
   # array = np.asarray(decision_matrix)
    #print(decision_matrix)
    #print(list_combinations)
        decision_matrix = self.dataset.read_dataset()
        list_combinations = self.list_combinations

    #First create an empty set in which the different alternatives are stored
        Collected_sets = []
    #iterate trought the combinations created by the Explicit_Space Class.
        for i in list_combinations:
            if self.stop == True:
                if len(Collected_sets) > self.portfolio_set:
                    self.stop = False
                    return Collected_sets
        #From the original dataset each row with al the data has to be selected/collected.
        #For this first an empty set is made to obtain all the information for each combination set and store this one in T_set
            T_set = []
        #For each element, j, in the ith set of combination the information is obtained. 
        #If element i = 4, contains the Alternatives, 5, 6, 19 and 22 than for these the information has to be collected
            for j in i:
            #The information is collected by searching trought the original data set (decision_matrix) by compairing the Alternative numbers
            
                A_selected = decision_matrix.loc[decision_matrix[self.dataset.column_names()[0]] == j]
            #For collection purposes the pandadataframe is converted to a simple list. 
            #The [0] in the next section is needed to get the list out of the lists.
            #To many lists [[[5]]].
                A_converted = A_selected.values.tolist()[0]
                A_converted[0] = int(A_converted[0])
            #The values are appended to the list until all combinations within the ith set are added.
                T_set.append(A_converted)
            #After this the set is added to the collected set and reset for the next ith combination.
            #It is thus a temporary set.
        #print(T_set)
            Collected_sets.append(T_set)
    #print(Collected_sets)
        #print("For ", self.n, " action(s) to chooce from the dataset, the combinations are:")
        self.collected_data = Collected_sets
        return self.collected_data
        
    def collect_dataframe(self):
        df = []
        for i in range (1, self.temp.size()):
            test = (self.collected_data[i])
            dataframe = pd.DataFrame(test, columns = self.dataset.column_names())
            dataframe["Portfolio_set"] = i
            dataframe_temp = dataframe
            df.append(dataframe_temp)
            self.collected_df_seperated = df
            self.collected_df = pd.concat(self.collected_df_seperated)
        #print("For portfolio number", self.portfolio_set, "we obtain:")
        self.stop = True
        return self.collected_df
        #self.collected_df = pd.DataFrame((Collected_sets), columns = self.dataset.column_names())
        
    
    def aggregate(self):
        self.sp_temp = []        
        df_23=[]
        for j in range (1, self.temp.size()):
            self.sp = self.collected_df.loc[self.collected_df['Portfolio_set'] == j]
            self.sp_temp.append(self.sp)

        for i in range(len(self.list_combinations)-1):
            result = self.sp_temp[i]
            result.pop("Portfolio_set")
            result.pop("Alternative")
            result.pop("Name")
            result_2 = result.sum(axis = 0).to_frame().T
            #check wheter there are attributes expressen in percentages in the dataset that have to be calculated differently.
            for k in self.sp_temp[i].T.index:
                if k == "Alternative" or k == "Name" or k == "Portfolio_set":
                    continue
                if Complete_Matrix.metrics()[str(k)][0] == "%":
                    #print(k + " is expressed in percentages")
                    temp_value = np.asarray(self.sp_temp[i].T.loc[k]/100)
                    new_value = temp_value.sum() + np.prod(temp_value)
                    result_2[k] = result_2[k].replace(result_2[k][0],new_value*100)
            df_23.append(result_2)
        self.df_aggregated = pd.concat(df_23)
        self.df_aggregated["Strategic_Portfolio"] = list(range(1, self.df_aggregated.shape[0]+1))
        for z in self.df_aggregated.columns:
            if z == "Strategic_Portfolio":
                continue
            if Complete_Matrix.metrics()[z][1] == 'global':
                #print("local attribute found: ", z, eval(Complete_Matrix.metrics()[z][2])[1])
                self.df_aggregated.loc[self.df_aggregated[z] > eval(Complete_Matrix.metrics()[z][2])[1], z] = eval(Complete_Matrix.metrics()[z][2])[1]
                self.df_aggregated.loc[self.df_aggregated[z] < eval(Complete_Matrix.metrics()[z][2])[0], z] = eval(Complete_Matrix.metrics()[z][2])[0]


        return self.df_aggregated 
        return self.df_aggregated          
         
    
    def single_portfolio(self, portfolio_set = 1):
        if self.stop == False:
            self.collect_dataframe()
        selected_portfolio = self.collected_df.loc[self.collected_df['Portfolio_set'] == portfolio_set]
        return selected_portfolio          
        
    def aggregated_single_portfolio(self, portfolio_set = 2):
        return self.aggregate_old()[portfolio_set-1:portfolio_set]
    

    def one_value(self, weight_1 = 0.5 , weight_2 = (1/3), weight_3 = (1/6)):
        store=self.aggregate_old()
        return pd.DataFrame(store["Crit_1"]*weight_1+store["Crit_2"]*weight_2+store["Crit_3"]*weight_3
    , columns = ["Crit_total"])
    
    def run(self):
        self.collect()
        self.collect_dataframe()
        self.aggregate()


Portfolio_Data = Data_Acquisition(Complete_Matrix, 5)
#Portfolio_Data.dataset
Portfolio_Data.run()
print("Portfolio Data Collected")

#Portfolio_Data.collected_df
#Portfolio_Data.single_portfolio(91)
#Portfolio_Data.df_aggregated

In [None]:
#runtime_plot.drop(runtime_plot.tail(2).index,inplace=True)
#sns.lineplot(data = runtime_plot, x = "Amount of Portfolios", y = "Run time (s)").set(title='Runtime Portfolio Creation [14 Alternatives]')

#plt.show()

### Uncertainty in Portfolios

In order to analyse the performances of the portfolios the uncertainty of the performance has to be taken into account.
There are two ways to approach this problem, but for both approaches the uncertainty distribution of the alternatives themselves have to be available for the attributes. Before aggregation of the alternative values normalisation of both the values and the uncertainty has to take place.

Since a portfolio is a set of alternatives the uncertainties are propogated. One way is to add the uncertainties analytically. The difficulty of this approach is then the situation where uncertainties that are distributed differently have to be combined.
The other way is computationally, which is the way taken here. A Monte Carlo simulation is done for the portfolios. This encompasses selecting a value within each distribution of each alternative. These uncertainty distributions have many forms.
Then these, for example 5 values are then aggregated as the portfolio value. This process is then iterated many times to approach the uncertainty distribution of the portfolio. From these aggregated values the boxplot performance of a portfolio is then presented.

Graphically presenting the performances of all portfolios simultaniously is difficulty since these will be 257 portfolios to be shown. So from the analysis best performing

In [None]:
class Uncertainty_Portfolios():
    def __init__(self, dataset_portfolio = Portfolio_Data, dataset_alternatives = Complete_Matrix):
        self.dataset = dataset_portfolio
        self.uncertainty_df = dataset_alternatives.uncertainty()
        self.alternatives_df = dataset_alternatives.read_dataset()
        self.dataset_alternatives_col_names = dataset_alternatives.column_names()
        self.temp_df = pd.DataFrame()
        sns.set(rc={"figure.figsize":(4, 4)})
        sns.set_style('whitegrid')
        self.list_combination = dataset_portfolio.list_combinations
        self.temp_df_2 = []
        self.attribute = "not defined"
        
        #If you have filled in the deviations of an alternative in log-normal distributions values you can add these to this list for
        #that particulare attribute.
        #the code will draw n random numbers from a lognormal distributions using the given mean and sigma from the excell file and not use a normal distribution.
        #Be aware that in the alternative_uncertainty method, the mean value is put in a log function; So you don't have to do that in the excell file itself.
        #however, instead of the standard deviation in the "Deviation Tab" you need to fill in the sigma function.
        self.lognormal_list_alt = []
        self.uniform_list_alt = []
        
    def alternative_uncertainty(self, attribute, alt, n =100000):
        np.random.seed(0)
        if alt not in self.lognormal_list_alt and alt not in self.uniform_list_alt:
            np.random.seed(1)
            Distribution_generation = np.random.normal(loc=self.alternatives_df[attribute].values[alt-1], scale=self.uncertainty_df[attribute].values[alt-1], size = n)
            self.temp_df[str(alt)] = Distribution_generation
            return self.temp_df
        if alt in self.lognormal_list_alt:
            self.sigma = 0.2
            print("log-normal distribution found for alternative:", str(alt))
            np.random.seed(1)
            Distribution_generation = np.random.lognormal(mean=math.log(self.alternatives_df[attribute].values[alt-1]), sigma=self.sigma, size = n)
            self.temp_df[str(alt)] = Distribution_generation
            return self.temp_df
        
        if alt in self.uniform_list_alt:
            print("log-normal distribution found for alternative:", str(alt))
            self.low = self.alternatives_df[attribute].values[alt-1] - self.uncertainty_df[attribute].values[alt-1]
            self.high = self.alternatives_df[attribute].values[alt-1] + self.uncertainty_df[attribute].values[alt-1]
           # 0.0135*self.uncertainty_df[attribute].values[alt-1]
            #print("Uniform distribution found for alternative:", str(alt))
            np.random.seed(1)
            Distribution_generation = np.random.uniform(low=self.low, high = self.high, size = n)
            if np.sum(Distribution_generation) == 0.0:
                #print("Impact of Alternative " + str(self.alt) + " " + str(self.alternatives_df["Name"].values[self.alt-1]), "is zero")
                return
            self.temp_df["Alt_" + str(alt)] = Distribution_generation
            return self.temp_df
    
    def portfolio_uncertainty(self, attribute = "Time_underperforming_due_to_electricity_shortages", n = 100000, portfolio_number = 5):
        #instead of the current list a portfolio has to be added
        np.random.seed(0)
        self.temp_df = pd.DataFrame()
        for i in self.list_combination[portfolio_number]:
            self.alternative_uncertainty(attribute = attribute, alt = i, n = n)
        Portfolio_data_boxplot = self.temp_df.values.sum(1)
        self.portfolio = pd.DataFrame(Portfolio_data_boxplot, columns = ["Portfolio number: " + str(portfolio_number)])
        return self.portfolio
    
    def plot_portfolio(self, portfolio_number = 1, attribute = "Time_underperforming_due_to_electricity_shortages", n = 100000):
        if portfolio_number > (len(self.list_combination)-1):
            print("The requested Portfolio does not exist in this dataset, look at how many portfolios you have generated:" + str(len(Portfolio_Data.list_combinations)-1))
            return
        self.portfolio_uncertainty(portfolio_number = portfolio_number, attribute = attribute, n = n)
        ax = sns.boxplot(x = 'variable', y = 'value', data = pd.melt(self.portfolio), showfliers = False, palette = "Greens")
        ax.set_title("Impact on: " + str(attribute))
        ax.set_xlabel(None)
        ax.set_ylabel(self.dataset.dataset.metrics()[attribute].values[0])
    #Fist obtain the portfolio combinations via explicit space,
    
    def plot_all_portfolios(self, attribute = "Time_underperforming_due_to_electricity_shortages", n = 100000, exclude = True, plot = True):
        self.attribute = attribute
        sns.set(rc={"figure.figsize":(11, 25)})
        sns.set_style('whitegrid')
        if len(self.temp_df_2) != 0:
            print("Distributions already created")
        else:
            self.temp_df_2 = pd.DataFrame()
            for i in range(1, len(self.list_combination)):
                if exclude == True:
                    if self.portfolio_uncertainty(attribute = attribute, n = n, portfolio_number = i).values.sum(1)[0] == 0.0:
                        continue
                self.temp_df_2[str(i)]=(self.portfolio_uncertainty(attribute = attribute, n = n, portfolio_number = i)["Portfolio number: " + str(i)])
            #if i == 15:
                #break
        if plot == True:
            ax = sns.boxplot(x = 'variable', y = 'value', data = pd.melt(self.temp_df_2), showfliers = False)
            ax.set_title("Impact on: " + str(attribute))
            ax.set_xlabel("Portfolios")
            ax.set_ylabel(self.dataset.dataset.metrics()[attribute].values[0])
        return
    
    def plot_all_portfolios_all_attributes(self, n = 100000):
        for j in range(2, (len(self.dataset_alternatives_col_names))):
            self.plot_all_portfolios(attribute = self.dataset_alternatives_col_names[j])
            plt.show()
        return 
    
    def plot_core_portfolios(self, n = 100000, core = [3, 5], attribute = "Time_underperforming_due_to_electricity_shortages"):
        sns.set(rc={"figure.figsize":(11, 7)})
        sns.set_style('whitegrid')
        self.temp_df_core = pd.DataFrame()
        for i in core:
            if self.portfolio_uncertainty(attribute = attribute, n = n, portfolio_number = i).values.sum(1)[0] == 0.0:
                continue
            self.temp_df_core[str(i)]=(self.portfolio_uncertainty(attribute = attribute, n = n, portfolio_number = i)["Portfolio number: " + str(i)])
            #if i == 15:
                #break
        ax = sns.boxplot(x = 'variable', y = 'value', data = pd.melt(self.temp_df_core), showfliers = False)
        ax.set_title("Impact on: " + str(attribute))
        ax.set_xlabel("Portfolios")
        ax.set_ylabel(self.dataset.dataset.metrics()[attribute].values[0])
        return
    
    def plot_core_portfolio_all_attributes(self, core = [3, 5] ):
        for j in range(2, (len(self.dataset_alternatives_col_names))):
            self.plot_core_portfolios(core = core, attribute = self.dataset_alternatives_col_names[j])
            plt.show()
        return 
    
    def plot_one_portfolio_all_attributes(self, n = 100000, portfolio_number = 2):
        for j in range(2, (len(self.dataset_alternatives_col_names))):
            self.plot_portfolio(portfolio_number = portfolio_number, attribute = self.dataset_alternatives_col_names[j])
            plt.show()

Boxplots = Uncertainty_Portfolios()
#Boxplots.plot_core_portfolios(core = Portfolios_with_Synergy.portfolios_with_synergy["Strategic_Portfolio"].values)
#Boxplots.plot_core_portfolio_all_attributes(core = Portfolios_with_Synergy.portfolios_with_synergy["Strategic_Portfolio"].values)

## Value Functions
From the obtained portfolio the mean performances are converted to values:
The first step to obtain these values is to create a value function. The value function presented here are linear value functions per attribute. The scales are either local or globally determined, depending on the relevant attribute.
Secondly, the value functions are created: The portfolios generate a corresponding value per partial value function.

The third step is to aggregate the values of each partial value functiond.
The fourth is to do this for the portfolio uncertainty distribution generated in the Portfolio_Uncertainty class to obtain the PDF of the values per attribute and then per aggregated portfolio. 

In [None]:
class Values():
    def __init__(self, dataset = Portfolio_Data, dataset_2 = Complete_Matrix):
        self.dataset_2 = dataset_2
        self.portfolio_data = dataset
        self.dataset = dataset.df_aggregated
        #
        #Indexes of the criteria that are deemed 'cost criteria'
        #For days attribute the global scales are minimal 0 days, maximal 365 days.
        #Per attribute these have to be defined in the excell file.
        self.scl = []
        self.attribute = "Profits_from_hides_processing"
        self.info = self.portfolio_data.dataset.scales_gather()
    def scales(self):
        #from the excell file the scale information is loaded:
        #If the scales are globally identified they are obtained from the given input in the Excel.
        if self.info[self.attribute].iloc[0] == "global":
            print("A global scale has been identified for attribute: " + self.attribute) 
            self.scl = eval(self.dataset_2.scales_gather()[self.attribute].iloc[1])
        #If it local variable the min and max are obtained from the Portfolio_Dataset to obtain the ranges.
        #Since the ranges are dealing with point data and the portfolio sets have a PDF the highest and lowest found datapoints
        #from the Monte Carlo simulations are taken as min and max values.
        else:
            print("A Local scale has been identified for attribute: " + self.attribute)
            Boxplots_values = Uncertainty_Portfolios()
            Boxplots_values.plot_all_portfolios(self.attribute, exclude = False, plot = False)
            self.temp = Boxplots_values.temp_df_2
            self.scl = [np.min(Boxplots_values.temp_df_2.values), np.max(Boxplots_values.temp_df_2.values)]
            
        return 
        
    
    def value_function(self, attribute = "Space_requirement_for_Waste_Water_Treatment", p = 10):
        self.a_type = self.dataset_2.criteria_weights()[1][self.info.columns.tolist().index(attribute)]
        #check wheter new scales have to be loaded for the Value class when new attribute is chocen.
        if attribute != self.attribute:
            self.attribute = attribute
            self.scales()
        #first test wheter the scales are loaded, if not load them in via the scales method.
        #if len(self.scl) == 0:
        #print('Scales are loaded')
        #For performance p for p element of self.scale:
        #For linear function
        if p< min(self.scl) or p>max(self.scl):
            print("Performance value, p, exceeds value function scale: p =", str(p), "when range is"+str(self.scl)), 
            return
        if self.a_type == "cost" or self.a_type == "Cost":
            return 1-(1/(max(self.scl)-min(self.scl)))*p+((min(self.scl)*-1)/((max(self.scl)-min(self.scl)))), self.a_type
        else:
            return (1/(max(self.scl)-min(self.scl)))*p+((min(self.scl)*-1)/((max(self.scl)-min(self.scl)))), self.a_type
        
    
    
    def Single_Portfolio_Single_Attribute(self, attribute = "Space_requirement_for_Waste_Water_Treatment", Portfolio = 1, a_type = "benefit",  plot = True):
        #Now we are using the value function created to calculate the portfolio value distribution (PDF)
        #For this we have to generate the portfolio_distribution corresponding first using the uncertainty class
        #If the scales are obtained locally this has already been done and the flag is set to True.
        #if the scales are obtained globally the distribution still has to be generated.
        self.a_type = self.dataset_2.criteria_weights()[1][self.info.columns.tolist().index(attribute)]
        if attribute != self.attribute:
            self.attribute = attribute
            print("Uncertainty Distributions are first generated")
            Boxplots_values = Uncertainty_Portfolios()
            Boxplots_values.plot_all_portfolios(self.attribute, exclude = False, plot = False)
            self.scl = [np.min(Boxplots_values.temp_df_2.values), np.max(Boxplots_values.temp_df_2.values)]
            self.temp = Boxplots_values.temp_df_2
        if self.a_type == "cost" or self.a_type == "Cost":
            self.df = 1-((1/(max(self.scl)-min(self.scl))*self.temp[str(Portfolio)].values+(min(self.scl)*-1)/((max(self.scl)-min(self.scl)))))
        else: 
            self.df = 1/(max(self.scl)-min(self.scl))*self.temp[str(Portfolio)].values+(min(self.scl)*-1)/((max(self.scl)-min(self.scl))) 
        if plot == True:
            ax = sns.boxplot(data = self.df, showfliers = False)
            ax.set_title("Impact on: " + str(attribute))
            ax.set_xlabel("Portfolio:"+str(Portfolio))
            ax.set_ylabel("Value")
        return self.df
    
    
    def collect_attributes_portfolios(self, attribute = "Space_requirement_for_Waste_Water_Treatment", a_type = "benefit"):
        self.Portfolios_n = np.linspace(1,self.dataset.values.shape[0], self.dataset.values.shape[0])
        self.a_type = self.dataset_2.criteria_weights()[1][self.info.columns.tolist().index(attribute)]
        if attribute != self.attribute:
            self.attribute = attribute
            #print("Uncertainty Distributions are first generated")
            Boxplots_values = Uncertainty_Portfolios()
            Boxplots_values.plot_all_portfolios(self.attribute, exclude = False, plot = False)
            self.scl = [np.min(Boxplots_values.temp_df_2.values), np.max(Boxplots_values.temp_df_2.values)]
            self.temp = Boxplots_values.temp_df_2
        if self.a_type == "cost" or self.a_type == "Cost":
            self.df_all = 1-((1/(max(self.scl)-min(self.scl))*self.temp.values+(min(self.scl)*-1)/((max(self.scl)-min(self.scl)))))
        else: 
            self.df_all = 1/(max(self.scl)-min(self.scl))*self.temp.values+(min(self.scl)*-1)/((max(self.scl)-min(self.scl)))
        return self.df_all
    
    #Method that generates the ranking for cardinal weights w.
    
    def all_attributes(self):
        self.summed_over_attributes = 0
        self.generated_attribute_stored = []
        weight = self.portfolio_data.dataset.criteria_weights()[0]
        for attribute in self.info.columns:
            w = weight[self.info.columns.tolist().index(attribute)]
            print(attribute + " with weight:" + str(w))
            #add weights
            self.generated_attribute = self.collect_attributes_portfolios(attribute)
            self.generated_attribute_stored.append(self.generated_attribute)
            #This one immidiatly rewrites and regenerates the analysis. But xi is not stored.
            self.summed_over_attributes += (self.generated_attribute*w)
        self.F = np.asarray(self.generated_attribute_stored)    
        self.portfolio_values_df = pd.DataFrame(self.summed_over_attributes, columns = list(range(1,self.dataset.values.shape[0]+1)))
        return self.portfolio_values_df
    
    #Method to create the entire value dataset.
    
    def xi(self):
        self.generated_attribute_stored = []
        for attribute in self.info.columns:
            #print(attribute)
            #add weights
            self.generated_attribute = self.collect_attributes_portfolios(attribute)
            self.generated_attribute_stored.append(self.generated_attribute)
        self.F = np.asarray(self.generated_attribute_stored)
        print("Finished")
        return
    
    #method all_attributes_2 is quicker than the first version because it does not recreate all the uncertainties unncessary.
    #Furterhmore is immidiatly converts to the means of the portfolios on the attributes.
    def all_attributes_2(self, weight = np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])):
        np.random.seed(9)
        self.summed_over_attributes_2 = 0
        #weight = self.portfolio_data.dataset.criteria_weights()[0]
        self.weight = weight
        for attribute in self.info.columns:
            w = self.weight[self.info.columns.tolist().index(attribute)]
            #print(attribute + " with weight:" + str(w))
            self.summed_over_attributes_2 += np.mean(self.F[self.info.columns.tolist().index(attribute)], axis = 0)*w
            #self.portfolio_values_df_2 = pd.DataFrame([self.summed_over_attributes_2], columns = list(range(1,self.dataset.values.shape[0]+1)))
        return self.summed_over_attributes_2
            
    
    def rank_portfolios_average(self):
        #Mean is the expected value from a distribution of discrete numbers.
        self.sorted_series =  pd.DataFrame([self.summed_over_attributes_2], columns = list(range(1,self.dataset.values.shape[0]+1))).mean().sort_values(ascending = False).index
        return self.sorted_series.values
    
    def ranks(self, samples = 5):
        self.r_1 = []
        weight = self.sample_weight_space(samples)
        for i in weight:
            self.r_1.append(pd.DataFrame([self.all_attributes_2(weight = i)], columns = list(range(1,self.dataset.values.shape[0]+1))).mean().sort_values(ascending = False).index)
        self.r_1 = np.asarray(self.r_1)
        self.r_1 = np.transpose(self.r_1)
        return self.r_1[0]    

    def sample_weight_space(self, samples, n=10):
        a = np.linspace(np.append(np.insert(arr = np.sort(np.random.uniform(size = n)), obj=0, values = [0]), 1), np.append(np.insert(arr = np.sort(np.random.uniform(size = n)), obj=0, values = [0]), 1), samples)*-1
        return np.sort(np.diff(a[::-1]))*-1
    
    def ranks_2(self, samples = 5):
        start = time.time()
        self.samples = samples
        np.random.seed(9)
        self.summed_over_attributes_2 =  0
        weight = self.sample_weight_space(samples)
        self.matrix = 0
        self.dsb = {}
        self.r_1 = []
        self.weight_rank = self.info.columns.values[np.argsort(self.dataset_2.criteria_weights()[0])]

        for attribute in self.weight_rank:
            #self.summed_over_attributes_2 += np.mean(self.F[self.info.columns.tolist().index(attribute)], axis = 0)
            #Takes the mean of all 137 portfolios on that attribute. This one do want to multiply vectorised before continuing to the next attribute
            #Now it recalculates and resums everything unnecissarily.
            #matrix is the output of the portfolios on the rows, and the sample of the weight vector per colums.
            #This matrix is for the attribute and thus still has to be summed with the other attribute matrices.
            #This produces a matrix of values for each portfolio on the rows and sampled weights.
            #From this the ranking can occur much faster than iterate over each weight vector.
            
            
            #last part here ignores the weights : "self.info.columns.tolist().index(attribute)"
            self.matrix += np.outer(np.mean(self.F[self.info.columns.tolist().index(attribute)], axis = 0), weight.T[self.info.columns.values[np.argsort(self.dataset_2.criteria_weights()[0])].tolist().index(attribute)])
       #p_vector is the sorted row.
        #compare index values to match a portfolio from the matrix index to rank index. 
        self.matrix_sorted = np.argsort(self.matrix, axis = 0)[::-1]+1
        #self.r_1 = pd.DataFrame(matrix_sorted.T, columns = list(range(1,self.dataset.values.shape[0]+1)))
        #matrix.sort(axis = 0)
        unique, counts = np.unique(self.matrix_sorted[1], return_counts=True)
        self.dsa = dict(zip(unique, counts))

        for i in range(1, self.matrix_sorted.shape[0]+1):
            unique, counts = np.unique(self.matrix_sorted[i-1], return_counts=True)
            self.dsa = dict(zip(unique, counts))
            self.dsb["rank " + str(i)] = self.dsa

        #self.test = unique, counts
       # print(self.dsa)     
        end = time.time()
        elaped_time = end-start 
        print("Elaped time was: ", elaped_time, " for: ", samples, 'samples')
        return
    
    def acceptability_index(self, rank = "rank 2"):
        self.dsc = {}
        for x in self.dsb[rank]:
            #print("acceptability for", rank, "and portfolio", x, "is ", self.dsb[rank][x]/self.samples)
            self.dsc[x] = {self.dsb[rank][x]/self.samples}
        return
    
    
    def acceptability_indeces(self, amount_of_ranks_included = 10, port_amount = 10):
        self.dsx = {}
        for j in list(self.dsb.keys()):
            self.acceptability_index(rank = j)
            self.dsx[j] = self.dsc
        list_best = []
        

        for i in range(1,amount_of_ranks_included+1):
            list_best.append(list(self.dsx['rank ' + str(i)].keys()))
        flat_list = list(it.chain(*list_best))
        self.best = pd.DataFrame(pd.unique(np.asarray(flat_list)), columns = ["Strategic_Portfolio"])[0:port_amount]     
        return self.best
    
    def dict_df(self):
        self.acceptability = pd.DataFrame(columns = ["Strategic Portfolio", "Acceptibility Index", "rank"])
        for i in list(self.dsx.keys()):
            regular_list = list(list(self.dsx[i][k]) for (k) in self.dsx[i].keys())
            flat_index = [item for sublist in regular_list for item in sublist]
            df = pd.DataFrame(list(self.dsx[i].keys()), columns = ["Strategic Portfolio"])
            df["Acceptibility Index"] = flat_index
            df["rank"] = i
            self.acceptability = pd.concat([self.acceptability, df])
        return self.acceptability

    
    
    #Attribute = on which the portfolios are ranked as best performing.
    #Amount = How many of top performing are returned, maximum would be the amount of portfolios generated ranked from low to high.
    def heuristic(self, attribute = 'Not Specified', amount_of_ranks_included = 10):\
        #looks at how many attributes there are and uses this lenght to slice the appropiate columns, the other columns are not attributes but name and such.
        length = np.asarray(self.dataset_2.column_names()).shape[0]
        attribute_array = np.asarray(self.dataset_2.column_names())[2:length]
        #Find the corresponding index position for the attribute.
#         if attribute == "Not Specified":
#             attribute = 1
        self.index_value = np.where(attribute_array == attribute)
        #plus one is to count the first portfolio as portfolio 1 and not as portfolio 0
        self.heuristic_outcome = pd.DataFrame(self.F.mean(axis = 1)[self.index_value].argsort().T+1, columns = ["Portfolios"]).iloc[-amount_of_ranks_included:][::-1]
        self.heuristic_outcome["Rank"] = list(range(1,amount_of_ranks_included +1))
        self.heuristic_outcome.set_index("Rank", inplace = True)
        return self.heuristic_outcome
    
    
    def return_info_portfolio(self, portfolio = 1):
        return self.portfolio_data.single_portfolio(portfolio)
    
    def return_info_partial_values(self):
        self.partial_values = pd.DataFrame(self.F.mean(axis = 1).T, columns= ["Attribute 1", "Attribute 2", "Attribute 3", "Attribute 4", "Attribute 5", "Attribute 6", "Attribute 7", "Attribute 8", "Attribute 9", "Attribute 10", "Attribute 11"])
        return self.partial_values
    
    def core(self, actor = "Not Specified"):
        core_array = []
        for i in self.best.values.flatten():
            core_array.append(self.return_info_portfolio(i)["Alternative"].to_list())
        core_array_flat = list(it.chain(*core_array))
        core_df = pd.DataFrame(core_array_flat, columns = ["Core Alternative"])
        self.core_index = core_df.value_counts().to_frame(name = "Alternative Count")
        self.core_index["Actor"] = actor
        self.core_index["Core Index"] = self.core_index.values[:, 0]/self.best.shape[0]
        self.core_index.to_csv("C:/Users/paulu/Documents/Epa/Thesis/Portfolios " + str(actor) + ".csv")
        Core_actor = pd.read_csv("C:/Users/paulu/Documents/Epa/Thesis/Portfolios " + str(actor) + ".csv")
        self.CI = Core_actor[["Core Alternative", "Core Index"]].rename(columns={"Core Index": "Actor " + str(actor)})
        self.CI.to_csv("C:/Users/paulu/Documents/Epa/Thesis/CI " + str(actor) + ".csv", index = False)
        return self.core_index
    
    #When wanting to search for all the portfolio that contain specific alternatives one can run this search_alt method.
    #Additional ques can be set in the if statement if more alternatives have to be included.
    #The mehod returns a list with all the portfolio containing alt_1 and alt_2
    def search_alt(self, alt_1 = 5, alt_2 = 8):
        Portfolio_including = []
        for elements in range(1, np.asarray(dataset.list_combinations).shape[0]):
            if alt_1 in np.asarray(dataset.list_combinations)[elements] and alt_2 in np.asarray(dataset.list_combinations)[elements]:
                Portfolio_including.append(elements)
        return Portfolio_including
    
    def portfolio_value_dist(self, attribute = "Attribute_1", portfolio = 118):
        self.portfolio_dist = pd.DataFrame(self.F.T[portfolio-1].T[self.info.columns.tolist().index(attribute)], columns = ["Portfolio number: " + str(portfolio)])
        return self.portfolio_dist
    
    def portfolio_value_dist_all(self, portfolio = 118):
        self.portfolio_dist_merged = pd.DataFrame()
        for attribute in self.info.columns:
            self.attribute_temp = attribute
            self.portfolio_value_dist(attribute = attribute, portfolio = portfolio)
            self.portfolio_dist_merged[attribute] = self.portfolio_dist["Portfolio number: " + str(portfolio)].values
            
        return self.portfolio_dist_merged
    
    def boxplot_portfolio_values(self, attribute = "Attribute_1", portfolio = 118):
        sns.set(rc={"figure.figsize":(4, 5)})
        sns.set_style('whitegrid')
        self.portfolio_test = pd.DataFrame(self.F.T[portfolio-1].T[self.info.columns.tolist().index(attribute)], columns = ["Portfolio number: " + str(portfolio)])
        ax = sns.boxplot(x = 'variable', y = 'value', data = pd.melt(self.portfolio_test), showfliers = False, palette = "Blues")
        ax.set_title("Impact on: " + str(attribute))
        ax.set_xlabel(None)
        return
    
    def boxplot_portfolio_values_all(self, portfolio = 118):
        sns.set(rc={"figure.figsize":(7.5, 6)})
        sns.set_style('whitegrid')
        self.portfolio_all = self.portfolio_value_dist_all(portfolio = portfolio)
        ax = sns.boxplot(x = 'variable', y = 'value', data = pd.melt(self.portfolio_all), showfliers = False, palette = "Blues")
        ax.set_title("Portfolio " + str(portfolio) + ": Impact on all attributes")
        ax.set_xlabel(None)
        plt.setp(ax.get_xticklabels(), rotation=40, horizontalalignment='right')
        plt.show()
        return 

        
    
V = Values()
V.xi()
V.ranks_2(samples = 100000)
V.acceptability_indeces()
V.dict_df()
#V.return_info_partial_values()
#V.boxplot_portfolio_values(attribute = "Attribute_5", portfolio = 118)
#V.info.columns

In [None]:
V.core("Y")


In [None]:
Core_Y = V.core_index

In [None]:
Core_Y["Alternative"] = Core_Y.index
Core_Merged

In [None]:
#To Produce the Barplot this code is used to generate the countings of the barplots
#Be AWARE that Core_Y first needs to be generated unto which you do the first merger. After this you can merge the Core_Merger with Core_X
#Replace the old Core_Merger with the new one.
def barplot_data(actor = "X"):
    #The V.core(actor) runs the Value Class method so have the attribute ranking also right for the corresponding actor.
    V.core(actor)
    Core_temp = V.core_index
    Core_temp["Alternative"] = Core_temp.index
    Core_Merged = Core_Y.merge(Core_temp, how = "outer")
    
    #After all actors are incorporated into the Core_Merged the alternative values are obtained from the weird tuple format:
    Alternative_List = []
    for i in Core_Merged["Alternative"].values:
        Alternative_List.append(i[0])
    Core_Merged["Alternative"] = Alternative_List
    return Core_Merged

#Plot the Barplot
Core_merged = Core_merged.replace(np.nan,0)
#Core_merged["Combined CI"] = Core_merged["Actor Y"]*Core_merged["Actor X"]
Core_merged.sort_index()
sns.set(rc={"figure.figsize":(7, 5)})
sns.set_style('whitegrid')
sns.barplot(data =Core_merged, x = "Alternative", y = "Alternative Count", hue = "Actor", palette = "Blues")
plt.show()


#Plot the Heatmap
df_wide = Core_Merged.pivot_table(index='Alternative', columns='Actor', values='Core Index')
rdgn = sns.diverging_palette(h_neg=10, h_pos=133, s=99, l=55, sep=3, as_cmap=True)
sns.set(rc={"figure.figsize":(8, 5.5)})
sns.heatmap(df_wide , center=0.5, cmap=rdgn)

## Interaction Values
The selection of best performing portfolios can be fine tuned by selecting from alreadybest ranking portfolios only portfolios with synergies. (So, highest Values with Synergy). In this second situation it is also assumed that the best performing portfolios have similar outcomes in which the existence of a synergy can decide in favour of very similar Portfolios.

First all the alternative combinations with positive synergy values have to be loaded in.
Secondly, the "top" performing portfolios are screened wheter these interactions do occur. This happens after a general MAUT ranking which already includes attribute preferences. Then the rankings are compared for the 5 best performing portfolios and the core projects in these portfolios.

In [None]:
class Interaction():
    #dataset_1 is the original data from which the synergy combinations are extracted loaded into a panda dataframe.
    #dataset_2 is the dataset of ranked portfolios in a panda dataframe.
    def __init__(self, dataset_1 = Complete_Matrix, dataset_2 = Portfolio_Data.df_aggregated, top = V):
        self.interaction_sets = dataset_1.interactions()
        #here specify the top ranking portfolios to be included in the synergy check.
        self.portfolios = dataset_2
        self.top_ranking = top
        self.top = np.unique(np.asarray(top.acceptability_indeces(10)))
        return
    
    
    def check(self):
        #from the input of dataset_2 the portfolios are checked on the existence of synergies from the Excel File.
        #These are than isolated from the other portfolios and returned in a dataframe.
        self.portfolios_with_synergy = pd.DataFrame()
        for i in range(1, len(Complete_Matrix.interactions())+1):
            set_1 = Complete_Matrix.interactions().values[i-1][0], Complete_Matrix.interactions().values[i-1][1]
            for j in self.top:
                if set_1[0] in Portfolio_Data.list_combinations[j] and set_1[1] in Portfolio_Data.list_combinations[j]:
                    #print(set_1[0], "and", set_1[1],"are in portfolio", str(j), ": ", Portfolio_Data.list_combinations[j])
                    self.portfolios_with_synergy = self.portfolios_with_synergy.append(self.portfolios.iloc[[j-1]])   
        return np.unique(self.portfolios_with_synergy["Strategic_Portfolio"].values)
    #to obtain only the strategic portfolio number add [["Strategic_Portfolio"]].values to the above returned method.



    def core(self, amount_of_ranks_included = 6, actor = "Not Specified"):
        self.top = np.unique(np.asarray(self.top_ranking.acceptability_indeces(amount_of_ranks_included = amount_of_ranks_included)))
        core_array = []
        for i in self.check().flatten():
            core_array.append(self.top_ranking.return_info_portfolio(i)["Alternative"].to_list())
        core_array_flat = list(it.chain(*core_array))
        core_df = pd.DataFrame(core_array_flat, columns = ["Core Alternative"])
        self.core_index = core_df.value_counts().to_frame(name = "Alternative Count")
        #self.core_index["Actor"] = actor
        self.core_index.to_csv("C:/Users/paulu/Documents/Epa/Thesis/Portfolios " + str(actor) + ".csv")
        return self.core_index

Core_test = Interaction()
Core = Core_test.core(2, actor = "Y")

#Portfolios_with_Synergy = Interaction()
#a = Portfolios_with_Synergy.check()
#a = pd.DataFrame(a, columns = ["Best Performing Portfolios for Actor Y"])

#Portfolios_with_Synergy.portfolios

## Normalisation Techniques

Mainly used for exploratory purposes

In [None]:
class Normalisation():
    def __init__(self,  dataset = Portfolio_Data):
        self.inhereted_class = dataset
        self.dataset = dataset.df_aggregated
        self.x = self.dataset.values
        #Indexes of the criteria that are deemed 'cost criteria'
        self.cost = [6, 7, 8, 10]
        self.z = "Dataset not yet normalised"
        self.z_df = "Dataframe not yet constructed"
        self.raise_norm = False
        self.raise_mul = False
        self.zw = "Normalised Data not yet weighted, run weights_added() first"
        self.zw_df = "Dataframe not yet constructed"


    def v_norm(self):
        k = np.array(np.cumsum(self.x**2, axis=0))
        #next step is to take the root of this numer k to obtain the magnitude of each column.
        #Each element is divided by this magnitude to have it normalised.
        #For row i and column j the elements are scaled by the corresponding magnitude ratio for that column.
        #For example the first criteria, vector has a certain magnitude. 
        z = np.array([[round(self.x[i, j] / np.sqrt(k[self.x.shape[0] - 1, j]), 3) for j in range(self.x.shape[1])]
        for i in range(self.x.shape[0])])
        #print("The vector normalised matrix yields:\n")
        self.z = z
        self.raise_norm = True
        return self.z
    
    def v_norm_df(self):
        if self.raise_norm == False:
            self.v_norm()
        self.z_df = pd.DataFrame(self.z, columns = list(self.dataset.columns))
        self.z_df["Strategic_Portfolio"] = np.array(range(self.z.shape[0])) + 1
        return self.z_df
    
    def norm_cost(self):
        if self.raise_norm == False:
            print("First run norm()")
            return
        print('For criteria in indexes of self.cost, namely: ', self.cost, " the values are transformed to cost criteria, altering the normalisation")
        for i in self.cost:
                self.z[:, i:(i+1)] = 1 - self.z[:, i:(i+1)]
        return self.z
            
        #For a criteria in the criteria column list, if it is one to be decreased, then the outcomes on that column have to substracted from one and replaced
        #Or do this in the normalisation step to, with extra if statement, search via dataset.aggregate()
        #T = 1 - self.z[:,0:1]
        #self.z[:,0:1] = T
        #return self.z

      
    def weights_added(self):
        self.weights = self.inhereted_class.dataset.criteria_weights()[0]
        if self.weights.shape[0] != self.z.shape[1]:
            print("The amount of criteria do not match the amount of weights in the excel files; they have to be equal.")
            return
        #weight= [range(self.z.shape[1])]
        #self.w = np.ones(self.z.shape[1])
        self.zw = np.array([[self.z[i, j] * self.weights[j]
            for j in range(self.z.shape[1])]
            for i in range(self.z.shape[0])])
        #print("The weighted vector normalised matrix yields:\n {}.".format(self.zw))
        self.raise_mul = True
        return self.zw
    
    def weights_added_df(self):
        if self.raise_mul == False:
            self.weights_added()
        self.zw_df = pd.DataFrame(self.zw, columns = list(self.dataset.columns))
        self.zw_df["Strategic_Portfolio"] = np.array(range(self.zw.shape[0])) + 1
        return self.zw_df
    
    def run(self):
        self.v_norm()
        self.norm_cost()
        self.v_norm_df()
        self.weights_added()
        self.weights_added_df()
        print("The Portfolio Matrix has been normalised.")
        return

Portfolio_Normalised = Normalisation()
Portfolio_Normalised.run()


## Topsis Implementation
Used in exploratory first understanding of MCDA ranking techniques. Is deemed less adaquate than MAVT.

In [None]:
class Topsis():
    def __init__(self,  dataset = Portfolio_Normalised):
        self.inhereted_class = dataset.inhereted_class
        self.dataset = dataset.z
        self.zw = dataset.zw
        self.raise_zenith = False
        self.normalisation = "Not Specified"
        self.nadir_method = "Not Specified"
   
    def zenith_nadir(self):
        """ zenith and nadir virtual action function; self.u is the
        weighted normalized decision matrix and method is the
        action used. For min/max input 'm' and for absolute
        input enter 'a'
        """
        u = self.zw
        if self.nadir_method == 'm':
                bb = []
                cc = []
                for i in range(u.shape[1]):
                    bb.append(np.amax(u[:, i:i + 1]))
                    b = np.array(bb)
                    cc.append(np.amin(u[:, i:i + 1]))
                    c = np.array(cc)
                #print("The zenith is {} and the nadir is {}.".format(b, c))
                self.zenith = b
                self.nadir = c
                self.raise_zenith = True
                return (self.zenith, self.nadir)
        else:
                #creates a vector of ones and zeros of length of matrix X
                b = np.ones(u.shape[1])
                print(b)
                c = np.zeros(u.shape[1])
                #print("The zenith is {} and the nadir is {}.".format(b, c))
                self.zenith = b
                self.nadir = c
                self.raise_zenith = True
                return (self.zenith, self.nadir)     
            
            
    def distance(self):
        u = self.zw
        """ calculate the distances to the ideal solution (di+)
        and the anti-ideal solution (di-); u is the result
        of mul_w() and b, c the results of zenith_nadir()
        """
        distance = []            
        ideal_i = []
        non_ideal_i = []
        for i in range(u.shape[0]):
            #Alternatives
            #The j is the amount of criteria.
            g = 0
            o = 0
            for j in range(u.shape[1]):
                #criteria
                a = u[i, j] - self.zenith[j]
                g += a**2
                b = u[i, j] - self.nadir[j]
                o += b**2
            g = math.sqrt(g)
            o = math.sqrt(o)
            ideal_i.append(g)
            non_ideal_i.append(o)
        distance.append(g)
        distance.append(o)
        return np.asarray(ideal_i), np.asarray(non_ideal_i)
            
        return (np.sqrt(sum(a, 1)), np.sqrt(sum(b, 1)))
        
        #a = np.array([[(u[i, j] - self.b[j])**2 
        #    for j in range(self.zw.shape[1])]
        #    for i in range(self.zw.shape[0])])
       # print(a)
        #b = np.array([[(u[i, j] - self.c[j])**2 
       #     for j in range(self.zw.shape[1])]
       #     for i in range(self.zw.shape[0])])
       # return (np.sqrt(sum(a, 1)), np.sqrt(sum(b, 1)))
    
    def single_portfolio(self, portfolio_set = 1):
        self.portfolio_set = portfolio_set
        return self.inhereted_class.single_portfolio(portfolio_set)
    
    def aggregated_single_portfolio(self, portfolio_set = 1):
        return self.dataset.aggregated_single_portfolio(portfolio_set)
        

    
    
    def topsis(self, pl = 'no', normalisation = "v", nadir_method = "a"):
        self.normalisation = normalisation
        self.nadir_method = nadir_method
        if self.raise_zenith == False:
            #print("First calculate zenith and nadir by running zenith_nadir()")
            self.zenith_nadir()
        """ matrix is the initial decision matrix, weight is 
        the weights matrix, norm_m is the normalization 
        method, method is the action used by zenith_nadir(), and pl is 'yes' 
        for plotting the results or any other string for 
        not. 
        
        """
        #z = self.zw
        s, f = self.zenith, self.nadir

        p, n = self.distance()
        final_s = np.array([n[i] / (p[i] + n[i]) for i in range(p.shape[0])])
        #Here merge the final_s outcomes with the original dataframe of the portfolios as to be able to select the best x performing portfolios.
        #Now only the max is selected.
        #C_F_list = pd.DataFrame(final_s, columns = ["Closeness coefficient"])
        #self.list_total = self.dataset.join(C_F_list).sort_values("Closeness coefficient", ascending = False)
        
        
        if pl == 'yes':
            q = [i + 1 for i in range(self.zw.shape[0])]
            plt.plot(q, p, 'p--', color = 'red', 
                markeredgewidth = 1, markersize = 3)
            plt.plot(q, n, '*--',  color = 'blue', 
                markeredgewidth = 1, markersize = 3)
            plt.plot(q, final_s, 'o--', color = 'green',
                markeredgewidth = 1, markersize = 3)
            plt.title('TOPSIS results')
            plt.legend(['Distance from the ideal', 
                'Distance from the anti-ideal', 
                'Closeness coefficient'])
            #plt.xticks(range(self.zw.shape[0]+5))
            if self.nadir_method == "m":
                plt.axis([0, self.zw.shape[0] + 1, 0, 1.2])
            else:
                plt.axis([0, self.zw.shape[0] + 1, 0, 5])
            plt.xlabel('Portfolios')
            plt.grid(True)
            plt.show


            #Determine to which attribute the highest closseness coefficient corresponds:
        place = np.where(final_s == final_s.max())[0]+1
            #print("The place of the max attribute is ", place)
        print("The maximum value of the closeness coefficients is: {}, which corresponds to portfolio {} from the decision matrix.".format(final_s.max(), place))

        self.raise_mul = False
        self.raise_norm = False
        self.raise_zenith = False
        self.normalisation = "Not Specified"
        self.nadir_method = "Not Specified"
        #print("The closeness coefficients are: {}, with maximum value {}, which corresponds to portfolio {} from the decision matrix".format(final_s.self, final_s.self.max(), place))
        return self.single_portfolio(int(place))


Topsis_Method = Topsis()
