In [None]:
from numba import cuda
import random 
import copy
import numpy as np
import pygmo as pg
import pandas as pd
#from scipy.io import arff
import pickle
from multiprocessing import Pool
import os
from collections import OrderedDict
#from sklearn.metrics import confusion_matrix

In [None]:
def sequential_predict_rules_outcome(rules_bodies,rules_classes,data,bounderies,predictions) : 
    for rule_index,rule in enumerate(rules_bodies): 
        for data_index, data_item in enumerate(data): 
            predictions[rule_index,data_index] = rules_classes[rule_index]
            for item_index,item in enumerate(rule) : 
                if (item[0] == item[1]) and item[0] == bounderies[item_index,0]: 
                    continue 
                if (data_item[item_index] > item[1]) or (data_item[item_index] < item[0]) : 
                    predictions[rule_index,data_index] = not(rules_classes[rule_index])
                    break
            



In [None]:

def is_pareto_efficient(costs, return_mask = False):
    """
    Find the pareto-efficient points
    :param costs: An (n_points, n_costs) array
    :param return_mask: True to return a mask
    :return: An array of indices of pareto-efficient points.
        If return_mask is True, this will be an (n_points, ) boolean array
        Otherwise it will be a (n_efficient_points, ) integer array of indices.
    """
    is_efficient = np.arange(costs.shape[0])
    n_points = costs.shape[0]
    next_point_index = 0  # Next index in the is_efficient array to search for
    while next_point_index<len(costs):
        nondominated_point_mask = np.any(costs>=costs[next_point_index], axis=1)
        nondominated_point_mask[next_point_index] = True
        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
        costs = costs[nondominated_point_mask]
        next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
    if return_mask:
        is_efficient_mask = np.zeros(n_points, dtype = bool)
        is_efficient_mask[is_efficient] = True
        return is_efficient_mask
    else:
        return is_efficient

In [None]:
@cuda.jit
def CUDA1D_predict_rules_outcome(rules_bodies,rules_classes,data,bounderies,predictions,confusion_matricies,): 
    tx = cuda.threadIdx.x
    # Block id in a 1D grid
    ty = cuda.blockIdx.x
    # Block width, i.e. number of threads per block
    bw = cuda.blockDim.x
    # Compute flattened index inside the array
    pos = tx + ty * bw
    if pos < len(rules_bodies):  # Check array boundaries
        rule = rules_bodies[pos]
        for data_index, data_item in enumerate(data):
            predictions[pos,data_index] = rules_classes[pos]
            for item_index,item in enumerate(rule) : 
                if (item[0] == item[1]) and item[0] == bounderies[item_index,0]: 
                    continue 
                if (data_item[item_index] > item[1]) or (data_item[item_index] < item[0]) : 
                    predictions[pos,data_index] = not(rules_classes[pos])
                    break
            

In [None]:
@cuda.jit
def CUDA1D_compute_rules_confusion_matricies(rules_bodies,rules_classes,data,y,bounderies,predictions,objectives):
    tx = cuda.threadIdx.x
    # Block id in a 1D grid
    ty = cuda.blockIdx.x
    # Block width, i.e. number of threads per block
    bw = cuda.blockDim.x
    # Compute flattened index inside the array
    pos = tx + ty * bw
    TP = 0
    TN = 0 
    FP = 0 
    FN = 0
    if pos < len(rules_bodies):  # Check array boundaries
        rule = rules_bodies[pos]
        for data_index, data_item in enumerate(data):
            predictions[pos,data_index] = rules_classes[pos]
            for item_index,item in enumerate(rule) : 
                if (item[0] == item[1]) and item[0] == bounderies[item_index,0]: 
                    continue 
                if (data_item[item_index] > item[1]) or (data_item[item_index] < item[0]) : 
                    predictions[pos,data_index] = not(rules_classes[pos])
                    break
            if predictions[pos,data_index] == y[data_index] == True: 
                TP = TP + 1 
                    
            if predictions[pos,data_index] == y[data_index] == False : 
                TN = TN + 1 
                
            if (predictions[pos,data_index] == True) and (predictions[pos,data_index] != y[data_index]):
                FP = FP + 1
                                                                     
            if predictions[pos,data_index] == False and predictions[pos,data_index] != y[data_index]: 
                FN =FN+ 1
        objectives[pos,0] = TP*1.0/(TP + FN)
        objectives[pos,1] = TN*1.0/(TN + FP)

In [None]:
class AbstractProblem: 
    def __init__(self,nb_objectives,nb_variables): 
        self.nb_objectives = nb_objectives
        self.nb_variables = nb_variables 
  
    def evaluate_solution(self,solution) : 
        pass

    def new_solution(self): 
        pass

    def new_velocity(self): 
        pass 
class DefectPrediction(AbstractProblem): 

    def __init__(self,data,outcome_name = 'bug'): 
        super().__init__(2,1)
        self.data = data 
        self.feature_description = {}
        self.outcome_variable = outcome_name
        self.y = np.array(data[self.outcome_variable].astype(bool))
        self.X = data.drop(columns = [self.outcome_variable])
        self.X_as_np = self.X.to_numpy(dtype = 'float64')
        self.bounderies = np.zeros((len(self.X.columns),2),dtype = 'float64')
        for index,column in enumerate(data.columns): 
            if column == outcome_name: 
                continue
            self.feature_description[column] = {'type': 'numerical', 'min' : min(data[column]), 'max': max(data[column])}
            self.bounderies[index] = np.array([min(data[column]),max(data[column])])
        
        self.features_description_as_list = list(self.feature_description.items())
        self.rule_encoding = RuleEncoding(self.feature_description,empty_prob=0.1)

    def evaluate_solution(self,solution): 
        rule = solution.variables[0]
        y_prediction = []
        for index,row in self.X.iterrows():
            y_prediction.append(rule.evaluate_rule(row))

        tp,fp,tn,fn = perf_measure(self.y,y_prediction)
        solution.objectives = [-1*tp*1.0/(tp + fn),-1*tn*1.0/(tn + fp)]

    def new_solution(self): 
        new_solution = Solution(1,2)
        new_solution.variables[0] = RuleVariable(self.rule_encoding)
        new_solution.variables[0].initialize()
        return new_solution
        
    def new_velocity(self): 
        return self.rule_encoding.randomize_velocities()
      
class MOPSO: 
    def __init__(self,configuration,optimization_problem) : 
        self.optimization_problem = optimization_problem
        self.default_config = {
            'particles_per_class': 500,
            'generation_number': 100,
            'c1': 1,
            'c2':1,
            'empty_prob': 0.1,
            'w': [0,0.8],
            'ph1': [0,4],
            'ph2': [0,4],
        }
        self.current_config = copy.deepcopy(self.default_config)
        self.repository = None 
    
        for parameter_name,value in configuration: 
            self.current_config[parameter_name] = value 
    
    def evolve(self):
        #initialization  
        current_population = self.create_initial_population()
        current_velocities = self.initialize_velocities()
        
        #evaluate solutions
        for solution in current_population: 
            self.optimization_problem.evaluate_solution(solution)
        current_best_positions = copy.deepcopy(current_population)

        ndf, dl, dc, ndr = non_dominated_sorting(current_population) 
        self.repository = [current_population[index] for index in ndf[0]]
        leader_assignment = self.assign_local_leader(current_population)

        for generation_count in range(self.current_config['generation_number']) : 
            if (generation_count % 20 == 0):
                print('generation:',generation_count)
            current_velocities = self.update_velocities(current_velocities,current_population,current_best_positions,leader_assignment)
            current_population = self.update_position(current_population,current_velocities) 
            for index,solution in enumerate(current_population):
                self.optimization_problem.evaluate_solution(solution)
                best_position_update = [solution,current_best_positions[index]]
                ndf, dl, dc, ndr = non_dominated_sorting(best_position_update)                
                current_best_positions[index] = random.choice([best_position_update[idx] for idx in ndf[0]])
            ndf, dl, dc, ndr = non_dominated_sorting(current_population) 
            self.repository = [current_population[index] for index in ndf[0]]
            leader_assignment = self.assign_local_leader(current_population)
        

    def update_position(self,current_position,new_velocities) : 
        new_positions = []
        for position_index, position in enumerate(current_position) : 
            new_position = copy.deepcopy(position)
            for feature_name,feature_velocity in new_velocities[position_index].items():
                #if  not (new_position.variables[0].rule_body[feature_name][0] ==  self.optimization_problem.rule_encoding.features_description[feature_name]['min'] and new_position.variables[0].rule_body[feature_name][1] ==  self.optimization_problem.rule_encoding.features_description[feature_name]['min']):
                new_position.variables[0].rule_body[feature_name] = self.optimization_problem.rule_encoding.mod_operator(feature_name,position.variables[0].rule_body[feature_name],feature_velocity)
            new_positions.append(new_position)
        
        return new_positions 

    def update_velocities(self,current_velocities,current_population,current_best_positions,leader_assignment) : 
        new_velocities = []
        for index,velocity in enumerate(current_velocities): 
            new_velocity = copy.deepcopy(velocity)
            for feature_name,value in new_velocity.items(): 
                omega = random.uniform(self.current_config['w'][0],self.current_config['w'][1])
                ph1 = random.uniform(self.current_config['ph1'][0],self.current_config['ph1'][1])
                ph2 = random.uniform(self.current_config['ph2'][0],self.current_config['ph2'][1])
                new_velocity[feature_name] = new_velocity[feature_name]*omega \
                +self.current_config['c1']*ph1*(current_best_positions[index].variables[0].rule_body[feature_name] - current_population[index].variables[0].rule_body[feature_name])\
                +self.current_config['c2']*ph2*(self.repository[leader_assignment[index]].variables[0].rule_body[feature_name] - current_population[index].variables[0].rule_body[feature_name])
            new_velocities.append(new_velocity)
        return new_velocities

    def create_initial_population(self): 
        new_population = []
        for rule_class in [True,False]: 
            for _ in range(self.current_config['particles_per_class']): 
                new_solution = self.optimization_problem.new_solution()
                new_solution.variables[0].set_class(rule_class)
                new_population.append(new_solution)
        
        return new_population

    def assign_local_leader(self,population) : 
        leader_assignment = [None]*len(population)
        for index,solution in enumerate(population): 
            best_sigma_dist = 99999999999999999999999999
            for repository_solution_index,no_dominated_solution in enumerate(self.repository): 
                if abs((no_dominated_solution.get_sigma(0,1) -  solution.get_sigma(0,1)))< best_sigma_dist: 
                    best_sigma_dist = abs((no_dominated_solution.get_sigma(0,1) -  solution.get_sigma(0,1)))
                    leader_assignment[index] = repository_solution_index
        return leader_assignment


    def initialize_velocities(self):
        velocities = []
        for _ in range(2*self.current_config['particles_per_class']): 
            velocities.append(self.optimization_problem.new_velocity())
        return velocities
    

class RuleVariable: 
    def __init__(self,encoding) : 
        self.rule_encoding = encoding
        self.rule_body = None 
    
    def initialize(self):
        self.rule_body = self.rule_encoding.randomize()
    
    def set_class(self,new_class): 
        self.rule_class = new_class

    def evaluate_rule(self,env) : 
      for feature_name, feature_value in self.rule_body.items(): 
          env_feature_value = env[feature_name]
          if self.rule_encoding.features_description[feature_name]['type'] ==  'CATEGORICAL':
              if env_feature_value == -1 : 
                  continue
              else: 
                  if feature_value != env_feature_value :
                      return not(self.rule_class) 
          #feature is numerical
          else: 
              if feature_value[0] == feature_value[1] and feature_value[0] == self.rule_encoding.features_description[feature_name]['min']:
                  continue
              if env_feature_value < feature_value[0] or (env_feature_value > feature_value[1]) :
                  return not(self.rule_class)  
      return self.rule_class

class Solution: 
    def __init__(self,nb_variables,nb_objectives):
        self.nb_variables = nb_variables
        self.nb_objectives = nb_objectives 
        self.objectives = [None]*nb_objectives
        self.variables = [None]*nb_variables
        self.attributes = {}
    
    def get_sigma(self,obj1,obj2): 
        return (self.objectives[obj1]**2 - self.objectives[obj2]**2)*1.0 / (self.objectives[obj1]**2 + self.objectives[obj2]**2)
class RuleEncoding: 
  def __init__(self,features_description,empty_prob): 
      self.features_description = features_description
      self.empty_prob = empty_prob
  
  def mod_operator(self,feature_name,values,velocity) : 
      update = values + velocity
      max_overflow = None 
      min_overflow = None 
      if update[1] > self.features_description[feature_name]['max'] : 
          #print('overflow! max')
          max_overflow = update[1] - self.features_description[feature_name]['max'] 
      if update[0] < self.features_description[feature_name]['min'] : 
          #print('overflow! min')
          min_overflow = self.features_description[feature_name]['max'] - update[0]
      if not (max_overflow is None) and not (min_overflow is None) :
          #print('total overflow') 
          return (np.array([self.features_description[feature_name]['min'],self.features_description[feature_name]['min']])) 
      if not(max_overflow is None) : 
          update[1] = self.features_description[feature_name]['min'] +  max_overflow
      if not(min_overflow is None) : 
          update[0] = self.features_description[feature_name]['max'] -  min_overflow
      update.sort()
      return update
  
  def get_features_number(self): 
      return len(self.features_description)

  def randomize(self): 
    new_rule = {} 
    for feature_name,feature_description in self.features_description.items() :         
        if feature_description['type'] == 'CATEGORICAL':
            empty_rule_update = np.array([-1])
            rule_update = np.array([random.choice([feature_description['values']])])
        else: 
            empty_rule_update = np.array([feature_description['min'],feature_description['min']])
            rule_update = np.sort(np.array([random.uniform(feature_description['min'], feature_description['max']),random.uniform(feature_description['min'], feature_description['max'])]))

        if random.random() <= self.empty_prob: 
            new_rule[feature_name] = empty_rule_update
        else :
            new_rule[feature_name] = rule_update
    return new_rule

  def randomize_rule_body_np(self): 
      new_rule = np.zeros((self.get_features_number(),2),dtype='float32') 
      for index,(feature_name,feature_description) in enumerate(self.features_description.items()) :         
          if feature_description['type'] == 'CATEGORICAL':
              empty_rule_update = np.array([-1,-1])
              rule_update = np.array([random.choice([feature_description['values']]),-1])
          else: 
              empty_rule_update = np.array([feature_description['min'],feature_description['min']])
              rule_update = np.sort(np.array([random.uniform(feature_description['min'], feature_description['max']),random.uniform(feature_description['min'], feature_description['max'])]))
          if random.random() <= self.empty_prob: 
              new_rule[index,:] = empty_rule_update
          else :
              new_rule[index,:] = rule_update
      return new_rule

  def randomize_velocities(self): 
      new_velocity = {} 
      for feature_name,feature_description in self.features_description.items() :  
          if feature_description['type'] == 'CATEGORICAL':
              rule_update = np.array([random.choice([feature_description['values']])])
          else: 
              rule_update = np.array([random.random(),random.random()])

          new_velocity[feature_name] = rule_update
      return new_velocity
  
  def randomize_velocity_np(self):
      new_velocity = np.zeros(self.get_features_number(),2) 
      for index,(feature_name,feature_description) in self.features_description.items() :  
          if feature_description['type'] == 'CATEGORICAL':
              rule_update = np.array([random.choice([feature_description['values']])])
          else: 
              rule_update = np.array([random.random(),random.random()])

          new_velocity[index,:] = rule_update
      return new_velocity
       
class GPUMOPSO(MOPSO): 

    def evolve(self):
        rules_bodies,rules_classes = self.create_new_population()
        current_velocities = self.initialize_velocities()
        #evaluate solutions
        objectives = self.evaluate_rules(rules_bodies,rules_classes)
        current_best_positions = np.copy(rules_bodies)
        current_best_positions_objectives =  np.copy(objectives)
        ndf, dl, dc, ndr = pg.fast_non_dominated_sorting(objectives)
        self.repository = ndf[0]
        leader_assignment = self.assign_local_leader(objectives)

        for generation_count in range(self.current_config['generation_number']) : 
            if (generation_count % 20 == 0):
                print('generation:',generation_count)
            current_velocities = self.update_velocities(current_velocities,rules_bodies,current_best_positions,leader_assignment)
            rules_bodies = self.update_position(rules_bodies,current_velocities)
            objectives = self.evaluate_rules(rules_bodies,rules_classes)
            for index,solution in enumerate(rules_bodies):
                best_position_update = [solution,current_best_positions[index]]
                best_position_objective_update = np.array([objectives[index],current_best_positions_objectives[index]])
                ndf, dl, dc, ndr = pg.fast_non_dominated_sorting(best_position_objective_update) 
                new_best_position_choice = random.choice(ndf[0])
                current_best_positions[index] = best_position_update[new_best_position_choice]
                current_best_positions_objectives[index] = best_position_objective_update[new_best_position_choice]
            ndf, dl, dc, ndr = pg.fast_non_dominated_sorting(objectives)
            self.repository = ndf[0] 
            leader_assignment = self.assign_local_leader(objectives)
        for index in self.repository: 
            print(objectives[index])
        return {
            'rules':rules_bodies,
            'rules_classes':rules_classes,
            'objectives':objectives,
            'no_dominated_rules_indicies': self.repository
        }
    def evaluate_rules(self,rules_bodies,rules_classes) :
        predictions = np.empty(shape = (len(rules_bodies),len(self.optimization_problem.X)),dtype='bool')
        objectives = np.empty(shape = (len(rules_bodies),2),dtype='float32')
        threadsperblock = 8
        blockspergrid = (len(predictions) + (threadsperblock - 1)) // threadsperblock
        
        rules_bodies_cuda = cuda.to_device(rules_bodies)
        rules_classes_cuda =  cuda.to_device(rules_classes)
        data_cuda = cuda.to_device(self.optimization_problem.X_as_np)
        y_cuda = cuda.to_device(self.optimization_problem.y)
        bounderies_cuda = cuda.to_device(self.optimization_problem.bounderies)
        predictions_cuda = cuda.to_device(predictions)
        objectives_cuda = cuda.to_device(objectives)
        
        #CUDA1D_compute_rules_confusion_matricies(rules_bodies,rules_classes,data,y,bounderies,predictions,objectives)
        CUDA1D_compute_rules_confusion_matricies[blockspergrid, threadsperblock](rules_bodies_cuda,rules_classes_cuda,data_cuda,
                                                                                 y_cuda,bounderies_cuda,predictions_cuda,objectives_cuda) 
        objectives = objectives_cuda.copy_to_host()
        
        #sequential_predict_rules_outcome(rules_bodies,rules_classes,self.optimization_problem.X_as_np,self.optimization_problem.bounderies,predictions) 
        return objectives*-1
    
    def assign_local_leader(self,objectives):
        sigmas = self.get_sigmas(objectives)
        leader_assignment = []
        for sigma in sigmas : 
            least_sigma_dist = np.inf
            leader_index = -1
            for no_dominated_solution_index in self.repository: 
                new_sigma_dist = abs(sigma - sigmas[no_dominated_solution_index])
                if ( new_sigma_dist < least_sigma_dist ): 
                    least_sigma_dist = new_sigma_dist
                    leader_index = no_dominated_solution_index
            leader_assignment.append(leader_index)
        
        return leader_assignment 

    def initialize_velocities(self):
        return np.random.rand(self.current_config['particles_per_class']*2,self.optimization_problem.rule_encoding.get_features_number(),2)
    
    def create_new_population(self): 
        index = 0 
        rules_bodies = np.zeros((self.current_config['particles_per_class']*2,self.optimization_problem.rule_encoding.get_features_number(),2),dtype='float64')
        rules_classes = []
        for rule_class in [True,False]:
            for _ in range(self.current_config['particles_per_class']): 
                rules_bodies[index,:,:] = self.optimization_problem.rule_encoding.randomize_rule_body_np()
                rules_classes.append(rule_class)
                index += 1 
        
        return rules_bodies, np.array(rules_classes,dtype = 'bool')
    
    def predict_rules_outcome(self,rules_bodies,rules_classes,data) : 
        predictions = np.empty((len(rules_classes),len(data)), dtype=bool)
        for rule_index,rule in enumerate(rules_bodies): 
            for data_index, data_item in enumerate(data): 
                for item_index,item in enumerate(rule) : 
                    if (item[0] == item[1]) and item[0] == self.optimization_problem.features_description_as_list[index]['min']: 
                        continue 
                    if (data_item[item_index] > item[1]) or (data_item[item_index] < item[0]) : 
                        predictions[rule_index,data_index] = not(rules_classes[rule_index])
                        break
                predictions[rule_index,data_index] = rules_classes[rule_index]

        return predictions 

    def update_velocities(self,current_velocities,rules_bodies,current_best_positions,leader_assignment) :
        for index in range(len(current_velocities)): 
            omega = np.random.uniform(low=self.current_config['w'][0],high=self.current_config['w'][1],size=(rules_bodies.shape[1],rules_bodies.shape[2]))
            ph1 = np.random.uniform(low=self.current_config['ph1'][0],high=self.current_config['ph1'][1],size=(rules_bodies.shape[1],rules_bodies.shape[2]))
            ph2 = np.random.uniform(low=self.current_config['ph2'][0],high=self.current_config['ph2'][1],size=(rules_bodies.shape[1],rules_bodies.shape[2]))
            current_velocities[index] = current_velocities[index]*omega \
                +self.current_config['c1']*ph1*(current_best_positions[index] - rules_bodies[index])\
                +self.current_config['c2']*ph2*(rules_bodies[leader_assignment[index]] - rules_bodies[index])
        return current_velocities
    
    def update_position(self,current_position,new_velocities) :
        for position_index, position in enumerate(current_position) : 
            for velocity_item_index,velocity_item in enumerate(new_velocities[position_index]):
                position[velocity_item_index] = self.mod_operator(self.optimization_problem.features_description_as_list[velocity_item_index][1],position[velocity_item_index],velocity_item)
        return current_position
         
    
    def mod_operator(self,feature_limits,values,velocity) : 
        update = values + velocity
        #eps = 0.01
        max_overflow = None 
        min_overflow = None 
        if update[1] > feature_limits['max'] : 
            max_overflow = update[1] - feature_limits['max'] 

        if update[0] < feature_limits['min'] : 
            min_overflow = feature_limits['min'] - update[0]
      
        if not (max_overflow is None) and not (min_overflow is None) :
            return np.array([feature_limits['min'],feature_limits['min']])
        if not(max_overflow is None) : 
            update[1] = feature_limits['min'] +  max_overflow
        if not(min_overflow is None) : 
            update[0] = feature_limits['max'] -  min_overflow
        update.sort()
        #if np.any(update > feature_limits['max'] ) or np.any(update < feature_limits['min'] ): 
         #   velocity = np.array([random.random(),random.random()])
          #  return np.array([feature_limits['min'] + (feature_limits['max'] - feature_limits['min'])*eps,feature_limits['max'] - (feature_limits['max'] - feature_limits['min'])*eps]).sort()
        return update
    
    def get_sigmas(self,sol_objectives) : 
        first_component_square = (sol_objectives[:,0]**2)*1.0
        second_component_square = (sol_objectives[:,1]**2)*1.0
        return (first_component_square - second_component_square)/(first_component_square + second_component_square)
    
    def compute_objectives(self,predictions) : 
        objectives = np.empty((len(predictions),2))
        for index,prediction in enumerate(predictions): 
            tn, fp, fn, tp = confusion_matrix(self.optimization_problem.y, prediction).ravel()
            objectives[index,:] = np.array([tp*1.0/(tp + fn),tn*1.0 / (tn + fp)])
        
        return objectives
    
class HybridGPUMOPSO(MOPSO) :
    
    def evolve(self):
        #initialization  
        current_population = self.create_initial_population()
        current_velocities = self.initialize_velocities()
        #evaluate solutions
        self.evaluate_rules(current_population)
        current_best_positions = copy.deepcopy(current_population)
        ndf = is_pareto_efficient(np.array([sol.objectives for sol in current_population]))
        self.repository = [current_population[index] for index in ndf]
        leader_assignment = self.assign_local_leader(current_population)

        for generation_count in range(self.current_config['generation_number']) : 
            if (generation_count % 20 == 0):
                print('generation:',generation_count)
            current_velocities = self.update_velocities(current_velocities,current_population,current_best_positions,leader_assignment)
            current_population = self.update_position(current_population,current_velocities) 
            self.evaluate_rules(current_population)
            for index,solution in enumerate(current_population):
                best_position_update = [solution,current_best_positions[index]]
                ndf= is_pareto_efficient(np.array([sol.objectives for sol in best_position_update]))                
                current_best_positions[index] = random.choice([best_position_update[idx] for idx in ndf])
            ndf = is_pareto_efficient(np.array([sol.objectives for sol in current_population])) 
            self.repository = [current_population[index] for index in ndf]
            leader_assignment = self.assign_local_leader(current_population)
        for sol in self.repository: 
            print(sol.objectives)
            
    def evaluate_rules(self,solutions) :
        
        rules_bodies = np.array([[value for value in sol.variables[0].rule_body.values()] for sol in solutions])
        rules_classes = np.array([sol.variables[0].rule_class for sol in solutions])
        
        predictions = np.empty(shape = (len(rules_bodies),len(self.optimization_problem.X)),dtype='bool')
        objectives = np.empty(shape = (len(rules_bodies),2),dtype='float32')
        threadsperblock = 8
        blockspergrid = (len(predictions) + (threadsperblock - 1)) // threadsperblock
        
        rules_bodies_cuda = cuda.to_device(rules_bodies)
        rules_classes_cuda =  cuda.to_device(rules_classes)
        data_cuda = cuda.to_device(self.optimization_problem.X_as_np)
        y_cuda = cuda.to_device(self.optimization_problem.y)
        bounderies_cuda = cuda.to_device(self.optimization_problem.bounderies)
        predictions_cuda = cuda.to_device(predictions)
        objectives_cuda = cuda.to_device(objectives)
        
        #CUDA1D_compute_rules_confusion_matricies(rules_bodies,rules_classes,data,y,bounderies,predictions,objectives)
        CUDA1D_compute_rules_confusion_matricies[blockspergrid, threadsperblock](rules_bodies_cuda,rules_classes_cuda,data_cuda,
                                                                                 y_cuda,bounderies_cuda,predictions_cuda,objectives_cuda) 
        objectives = objectives_cuda.copy_to_host()
        
        #sequential_predict_rules_outcome(rules_bodies,rules_classes,self.optimization_problem.X_as_np,self.optimization_problem.bounderies,predictions) 
        for i,sol in enumerate(solutions): 
            sol.objectives = objectives[i,:]
        return objectives

In [None]:
#main 
#globals
projects = {"ambros" : ["mylyn","pde"],"eclipse" : ["eclipse"], "ck" : ['ant','velocity',"camel","poi","prop","synapse","xalan","xerces","lucene"]}
projects_features = {"ambros" : ["numberOfVersionsUntil:","numberOfFixesUntil:","numberOfRefactoringsUntil:","numberOfAuthorsUntil:","linesAddedUntil:","maxLinesAddedUntil:","avgLinesAddedUntil:","linesRemovedUntil:","maxLinesRemovedUntil:","avgLinesRemovedUntil:","codeChurnUntil:","maxCodeChurnUntil:","avgCodeChurnUntil:","ageWithRespectTo:","weightedAgeWithRespectTo:"],
            "ck" : ["wmc","dit","noc","cbo","rfc","lcom","ca","ce","npm","lcom3","loc","dam","moa","mfa","cam","ic","cbm","amc","max_cc","avg_cc"],
            "eclipse" : ["pre","ACD","FOUT_avg","FOUT_max","FOUT_sum","MLOC_avg","MLOC_max","MLOC_sum","NBD_avg","NBD_max","NBD_sum","NOF_avg","NOF_max","NOF_sum","NOI","NOM_avg","NOM_max","NOM_sum","NOT","NSF_avg","NSF_max","NSF_sum","NSM_avg","NSM_max","NSM_sum","PAR_avg","PAR_max","PAR_sum","TLOC","VG_avg","VG_max","VG_sum"]
            }
outcome =  {"ck" : "bug","ambros" : "bugs","eclipse" : "post"}  
K = 2 
DATA_PATH = "C:/Users/Motaz/Desktop/work/TSE_R3/MOCRDP/data/CV_data/eclipse"
RESULTS_PATH = './MOPSO results_final_CRDP/results_pkl'

In [None]:
#main 
n_runs = 100
os.makedirs(RESULTS_PATH,exist_ok=True)
for file_name in os.listdir(DATA_PATH) :
    for i in range(n_runs):
        if not ('train' in file_name) or not ('.csv' in file_name) :
            continue 

        file_id = file_name.replace('.csv','')
        if os.path.exists(os.path.join(RESULTS_PATH,file_name.replace('.csv','.pkl'))) :
            print(file_name, ' already trained')
            continue 
        
        train_data = pd.read_csv(os.path.join(DATA_PATH,file_name))
        
        project_name = file_id.split("_")[0]
        for project in projects : 
          for pnames in projects[project] : 
            if pnames in project_name :
              project_id = project 
              break
        
        
        print('learning for '+ file_name + " started")
        features = projects_features[project_id]
        output_variable = outcome[project_id]
        sdp_problem = DefectPrediction(train_data[features + [output_variable]],outcome_name=output_variable)
        MOPSO_instance = GPUMOPSO(configuration = {},optimization_problem=sdp_problem) 
        run_results = MOPSO_instance.evolve()
        print('learning for '+ file_name +  ' done')
        final_file_name = file_name.replace(".csv","") + f'-run{i}.pkl'
        with open(os.path.join(RESULTS_PATH,final_file_name), "wb") as f:
            pickle.dump(run_results, f)