In [14]:
from typing import *
import itertools
import random
import numpy as np 


class GenPopulation:
    def __init__(self): 
        self.population = []

    def generate(self, num_features: int, max_pop_size: int, max_features=0, verbose=False) -> np.array: 
        """
            Params: 
                number_of_features: is used to encode the actual data into genotype.
                max_pop_size:       is used to restrict number of individual generation.
                max_featrures:      how many features needed to be in the subset. 
                                      if max_features is 0, then maximum subset size is number of number 
        """
        
        for _ in range(max_pop_size):
            individual = ''
            for col in range(num_features):
                if individual.count('1') == max_features:
                    individual += '0'
                    continue

                individual += str(random.randint(0, 1))

            if verbose: print(f'Genrated a new indivudal: {individual}')
            self.population.append(individual)
            
        if verbose: print(f'Generated list of {num_individuals} individuals: {individuals}')
        self.population = [self.population[i] for i in range(len(self.population)) if self.population[i] not in self.population[i+1: ]]
        return np.array(self.population)

In [16]:
g = GenPopulation()

g.generate(2, 4, 2)

array(['10', '00', '01'], dtype='<U2')

In [2]:
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf 
import tensorflow.keras as keras  


class ANN(keras.Model): 
    """
        SubClassing way of building the Keras Model.
    """
    def __init__(self, fc1, fc2, fc3, in_dims, out_dims): 
        super(ANN, self).__init__()
        
        #self.input = Input(input_shape=(in_dims, ))
        self.out_dims = out_dims
        self.fc1 = Dense(fc1,  activation="relu", input_shape=(in_dims, ))
        self.fc2 = Dense(fc1,  activation="relu")
        self.fc3 = Dense(fc1,  activation="relu")

        self.output_sigmoid = Dense(out_dims, activation="sigmoid")
        self.output_softmax = Dense(out_dims, activation="softmax")
    
    def call(self, input_): 
     #   x = self.input(input)
        x = self.fc1(input_)
      #  x = self.fc2(x)
        x = self.fc3(x)

        if self.out_dims > 1: 
            x = self.output_softmax(x)
            return x 
        
        x = self.output_sigmoid(x)
        return x


In [3]:
import numpy as np 
from sklearn.metrics import *
from math import sqrt


def get_metrics_value(y_true, y_pred, regression): 
    """
        this function is used to calculate the different metrics score for both regression and classification.
        Params:
            y_true: test y value.
            y_pred: prediction for test x from model.
            regression: bool value to represent whether it is regression problem or not.
    """
    if regression: 
        rmse = sqrt(mean_squared_error(y_true, y_pred))
        adj_r2 = r2_score(y_true, y_pred)
        return rmse, adj_r2
    
    else: 
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred)

        return accuracy, precision, recall, f1

In [9]:
import random 
import numpy as np 
import sklearn
#from metrics import *
#from neural_net import *
from tensorflow.keras.optimizers import Adam
import tensorflow as tf 
import math 

class FitnessFunction: 
    def __init__(self, population, train_X, train_y, test_X, test_y, feature_names, scoring_criteria): 
        '''
            Params:
                population: Population array from generate_pop method.
                model: Model from build model method.
                train_X, train_y: training data for our model.
                test_X, test_y: testing data for our model.
                scoring_criteria: On what basis scoring, needs to happend eg: Accuracy, f1-score, recall, precision.
        '''

        self.population = population
        self.scores = []
        self.train_X = train_X 
        self.train_y = train_y
        self.test_X = test_X 
        self.test_y = test_y
        self.scoring_criteria = scoring_criteria
        self.feature_names = feature_names
    
    def compile_model(self, fc1, fc2, fc3, input_dims, out_dims): 
        """
            This method is used to compile the neural network model using adam and binary crossentropy.
            Params:
                fc1          : Number of hidden unit for fully connected layer 1.
                fc2          : Number of hidden unit for fully connected layer 2.
                fc3          : Number of hidden unit for fully connected layer 3.
                input_dims   : Input Dimension of the Neural Network model, which number of features in the dataset.
                out_dims     : Output Dimension of the Neural Network model, by default it is 1.
        """
        model = ANN(fc1, fc2, fc3, input_dims, out_dims)
        model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy")
        return model

    def convert_to_nparray(self, individual): 
        """
            This method, will convert the data of type pandas dataframe into numpy array, to train a tensorflow model.
            Params: 
                genotype     : Genotype is the encoding of the features(individual in population).
        """
        col_numbers = [_ for _ in range(len(individual)) if individual[_] == 1]
        col_names = [self.feature_names[i] for i in range(len(self.feature_names)) if i in col_numbers]
        
        train_X = np.asarray(self.train_X[col_names])
        train_y = np.asarray(self.train_y)
        test_X = np.asarray(self.test_X[col_names])
        test_y = np.asarray(self.test_y)
        return train_X, train_y, test_X, test_y

    def train_model(self, train_X, train_y, input_dims):
        """
            This method, will train a neural net model, with the specific individual.
            Params:
                train_X       : Independent variable to the model.
                train_y       : Dependent variable to the model.
                input_dims    : Input dimension of the model, len(individual).
        """
        model = self.compile_model(32, 16, 16, input_dims, 1)
        model.fit(train_X, train_y, epochs=10, verbose=False)
        col_numbers = []
        return model

    def test_model(self, model, test_X, train_X): 
        """
            This method, will return a prediction for the prediction data.
            Params:
                test_X        : Independent Variable of testing data.
                model         : fitted model returned by the train_model method.
                train_X       : Independent Variable of training data.
        """
        pred_y = model.predict(test_X, verbose=False)
        pred_train_y = model.predict(train_X, verbose=False)
        pred_y = np.where(pred_y >= 0.5, 1, 0)
        return pred_y
    
    def get_metrics_score(self, y_true, y_pred, regression): 
        """
            This method, will calculate the metrics value for the prediction done by the model.
            Params: 
                y_true        : Ground Truth value of the testing data.
                y_pred        : Prediction data of the model.
                regression    : To specify, whether it is regression task or classfication task.
        """
        results = get_metrics_value(y_true, y_pred, regression)

        if not regression:
            acc, precision, recall, f1_score = results 
            return acc

        else: rmse, adj_r2 = results

        if self.scoring_criteria == "rmse" and regression: 
            return ((0.9 * rmse) + (0.10 * adj_r2) )

        if self.scoring_criteria == "adj_r2" and regression: 
            return ((0.9 * adj_r2) + (0.1 * rmse))

        if self.scoring_criteria == "acc": 
            return ((0.88 * acc) + (0.03 * recall) + (0.03 * precision) + (0.03 * f1_score))
        
        if self.scoring_criteria == "recall": 
            return ((0.03 * acc) + (0.88 * recall) + (0.03 * precision) + (0.03 * f1_score))
        
        if self.scoring_criteria == "precision": 
            return ((0.03 * acc) + (0.03 * recall) + (0.88 * precision) + (0.03 * f1_score))

        if self.scoring_criteria == "f1": 
            return ((0.03 * acc) + (0.03 * recall) + (0.03 * precision) + (0.88 * f1_score))
        

    def get_fitness_score(self, regression, verbose=True): 
        """
            This method calculates the fitness score.
            Params:
                regression       : To specify, whether it is regression task or classfication task.
        """
        for i, individual in enumerate(self.population):
            if np.any(individual): 
                train_X, train_y, test_X, test_y = self.convert_to_nparray(individual)
                model = self.train_model(train_X, train_y, len(individual))
                y_pred = self.test_model(model, test_X, train_X)
                score = self.get_metrics_score(test_y, y_pred, regression)

                score = round(score, 5)
                print(f"[+] Successfully calculated fitness score for individual: {i}")
                self.scores.append(score)

        return self.scores / sum(self.scores), self.scores

In [5]:
from sklearn.model_selection import train_test_split 
import imblearn
from imblearn.over_sampling import ADASYN
from sklearn.preprocessing import LabelEncoder

def preprocess_dataframe(df): 
    """
        This Function, will be used to drop unwanted cols, and it converts the categorical data into numerical.
        Params:
            df: dataframe object that needed to be processed
    """
    df.drop(["customerID"], inplace=True, axis=1)
    le = LabelEncoder()
    le.fit(df.Churn)
    churn = le.transform(df.Churn)
    df.Churn = churn
    
    df[df.TotalCharges == " "] = 0
    df.TotalCharges = df.TotalCharges.apply(lambda x: float(x))
    df.MonthlyCharges = df.MonthlyCharges.astype("float")
    df = pd.get_dummies(df)
    return df

def handle_imbalance(train_X, train_y): 
    """
        This function, does handle a imbalance data, by doing the oversampling using ADASYN method from imblearn.
        Params:
            train_X: training x, that needed to be resampled.
            train_y: training y, that is of train x
    """
    ada = ADASYN(random_state=42)
    train_X, train_y = ada.fit_resample(train_X, train_y)
    return train_X, train_y

def get_training_testing_data(dataframe, out_col, train_size): 
    """
        Params:
            dataframe: datafram, that needed to be processed.
            out_col: output column for the y(label).
            test_split: split size.
    """
    y = dataframe[out_col]
    dataframe.drop([out_col], inplace=True, axis=1)
    X = dataframe
    train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=train_size, stratify=y)
    train_y = train_y.astype("int")
    test_y = test_y.astype("int")

    return train_X, train_y, test_X, test_y

In [6]:
import random 
import numpy as np

def get_next_generation_population(population, fitness_score, mutation_rate): 
    """
        This function used to generate a new individauls by combining genes of the parensts from old population.
        Params:
            population      : Array of individuals created from GenPopulation Class.
            fitness_score   : Score calculated for each individual in the population by the fitness class.
            mutation_rate   : Amount of mutation needed to be done.
    """
    new_generation_population = []
    while len(new_generation_population) <= len(population)-1: 
        parent_1, parent_2 = pick_parents(population, fitness_score)
        child_1, child_2 = reproduce(parent_1, parent_2)
        mutated_child1 = mutate(child_1, mutation_rate)
        mutated_child2 = mutate(child_2, mutation_rate)
        
        new_generation_population.append(mutated_child1)
        new_generation_population.append(mutated_child2)
    
    print("[+] Successfully generated population for new generation.")
    return new_generation_population
    
    
def pick_parents(population, fitness_score): 
    """
        This function, will pick two parent chromosomes from the population.
        Params:
            population      : Array of individuals created from GenPopulation Class.
            fitness_score   : Score calculated for each individual in the population by the fitness class.
    """
    parent_1, parent_2 = random.choices(population, fitness_score, k=2)
    return parent_1, parent_2
        
    
def reproduce(parent_1, parent_2):
    """
        This function will generate a new childrens by combining two parents gene.
        Params:
            parent_1       : parent_1 array that is picked by the pick_parent function.
            parent_2       : parent_2 array that is picked by the pick_parent function.
    """
    chromosome_breakage_point = random.randint(1, len(parent_1)-1) 
    child_1 = parent_1[:chromosome_breakage_point] + parent_2[chromosome_breakage_point:]
    child_2 = parent_2[:chromosome_breakage_point], parent_1[chromosome_breakage_point:]
    return child_1, child_2


def mutate(individual, mutation_rate):
    """
        This function is used to mutate the childrens based randomly.
        Params:
            mutation_rate     : It is probability value, that decides whether to mutate or not.
    """
    random_index = random.randint(0, len(individual)-1)
    
    if random.random() < mutation_rate: 
        random_index_val = individual[random_index]
        inverse_random_index_val = int(not random_index_val)
        individual[: random_index] + inverse_random_index_val + individual[random_index+1: ]
        
    return individual

In [10]:
import pandas as pd 
import numpy as np 
import sklearn
#from train_preprocessing import *
#from neural_net import *
#from generate_population import *
#from fitness_score import *

def main(dataframe_path, number_of_generation=10, mutation_rate=0.1, n_individual=100, max_features=25): 
    """
        This method is the combines all the genetic algo pieces into one, and evolve over generation,
        to provide a fittest population.
        Params:
            dataframe_path         : Path for the csv file that contains the dataset.
            number_of_generation   : Number of generation to evolve.
            mutation_rate          : Mutation Rate, decides whether to mutate the children or not, based on probability.
            n_individual           : Number of individuals, needed to be created in a population, if the value is 0, 
                                     it will create a power(len(fearures), 2)
            max_features           : Subset size, if value is 0, then the subset size will be len(features).
    """
    
    dataframe = pd.read_csv(dataframe_path)
    dataframe = preprocess_dataframe(dataframe)
    
    train_X, train_y, test_X, test_y = get_training_testing_data(dataframe, "Churn", 0.8)
    print("Before Resampling", train_X.shape)

    train_X, train_y = handle_imbalance(train_X, train_y)
    print("After Resampling", train_X.shape)

    num_cols = len(train_X.columns)
    generator = GenPopulation()
    population = generator.generate(num_cols, n_individual, max_features)
    
    for i in range(number_of_generation): 
        print(f"Generation: {i}")
        fitness_function = FitnessFunction(
                                    population,
                                    train_X,
                                    train_y,
                                    test_X,
                                    test_y,
                                    train_X.columns,
                                    "acc"
                                )
        fitness_scores , prediction_scores = fitness_function.get_fitness_score(regression=False, verbose=False)
        
        new_generation_population = get_next_generation_population(population, fitness_scores, mutation_rate)
        population = new_generation_population
        print(f"Generation: {i}, Max Prediction Score: {max(prediction_scores)}")
            
    return population, fitness_scores, prediction_scores

df_path = "/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv"
pop, fitness_score, _ = main(df_path, 20, 0.1, 50, 10)

Before Resampling (5634, 60)
After Resampling (8318, 60)


  _warn_prf(average, modifier, msg_start, len(result))


[0 0 1 0 1 0 0 0 0 0] [0 1 0 0 1 1 0 1 1 0] parents
[1 0 0 0 0 1 0 0 1 1] [1 0 1 0 0 1 1 1 1 1] parents
[0 1 0 1 0 1 0 0 1 1] [0 1 0 1 0 1 0 0 1 1] parents
[1 0 1 0 1 1 1 0 1 1] [1 1 0 1 0 0 1 0 1 1] parents
[1 1 1 0 0 1 1 1 1 0] [1 1 0 1 0 0 1 0 1 1] parents
[0 1 0 1 0 1 0 0 1 1] [0 1 1 1 1 0 0 0 0 1] parents
[0 1 1 0 0 0 0 0 0 0] [0 1 1 0 0 1 1 0 0 0] parents
[1 0 0 0 0 1 0 0 0 1] [1 0 0 1 1 1 1 1 1 0] parents
[1 0 1 0 0 1 1 1 1 1] [1 1 1 0 1 1 0 0 0 0] parents
[1 0 0 1 1 1 1 1 1 0] [0 0 0 0 0 0 1 1 0 1] parents
[1 0 0 1 0 1 1 1 0 0] [1 1 1 1 0 1 0 1 0 0] parents
[1 1 1 0 0 0 0 1 1 0] [0 1 0 1 0 0 0 1 0 1] parents
[1 1 0 1 1 1 1 0 0 0] [0 0 1 0 1 0 0 0 0 0] parents
[0 1 0 0 0 0 1 0 1 0] [0 1 1 0 0 1 1 0 0 0] parents
[0 1 0 1 1 0 1 0 0 1] [1 1 1 0 0 1 1 1 1 0] parents
[1 1 0 1 0 1 0 0 1 1] [1 0 1 1 0 1 1 0 0 1] parents
[0 1 1 1 1 0 0 1 0 0] [1 0 1 1 0 1 1 0 0 1] parents
[0 1 0 1 0 1 0 0 1 1] [0 1 0 1 1 0 1 0 0 1] parents
[0 1 1 1 1 0 0 0 0 1] [0 1 0 0 0 0 1 0 1 0] parents
[1 0 0 0 0 1

  _warn_prf(average, modifier, msg_start, len(result))


[1 1 0 1 0 0 1 0 0 1] [0 1 0 0 0 0 1 1 1 0] parents
[1 0 0 0 0 1 0 1 0 0] [0 1 0 1 1 0 0 1 0 0] parents
[1 1 1 0 0 1 0 0 1 1] [0 1 0 1 0 1 1 0 0 1] parents
[1 0 1 1 0 1 0 0 1 1] [1 1 1 0 0 1 1 0 0 0] parents
[1 0 1 1 1 0 1 0 1 1] [0 1 0 0 0 1 1 0 0 0] parents
[0 1 0 1 1 0 0 1 0 0] [0 1 1 1 0 0 0 1 0 1] parents
[1 0 1 1 0 1 0 0 1 1] [0 0 0 0 0 1 0 0 1 1] parents
[0 1 1 0 1 0 1 0 1 1] [0 1 1 1 0 1 1 0 0 1] parents
[1 1 0 1 0 0 1 0 0 1] [1 0 1 0 0 1 1 1 1 1] parents
[1 1 1 0 0 1 1 0 0 0] [0 1 1 1 0 1 1 0 0 1] parents
[0 1 1 0 0 1 1 0 0 1] [1 1 1 0 0 0 1 0 0 0] parents
[1 0 1 1 0 1 0 0 1 1] [1 1 1 0 0 1 1 1 1 1] parents
[1 1 0 1 1 1 0 0 1 1] [1 1 0 1 0 0 1 0 0 1] parents
[0 1 0 1 0 0 1 1 0 0] [1 1 1 1 0 1 1 0 0 1] parents
[0 1 0 1 0 1 1 0 0 1] [1 1 1 0 0 1 1 0 0 0] parents
[1 1 1 0 0 1 0 0 1 1] [1 1 0 1 0 1 1 0 0 1] parents
[0 1 0 1 1 0 0 1 0 0] [0 1 1 0 1 0 1 1 1 0] parents
[0 1 0 1 1 1 1 0 0 1] [1 1 1 0 1 1 1 1 1 0] parents
[0 1 1 1 0 1 1 0 0 1] [1 0 0 0 0 0 1 1 0 1] parents
[1 0 0 0 0 1

  _warn_prf(average, modifier, msg_start, len(result))


[0 1 1 0 0 1 0 1 0 0] [1 1 1 1 0 1 0 0 1 1] parents
[0 1 1 0 0 1 0 1 0 0] [0 1 1 0 0 1 0 1 0 0] parents
[1 1 0 1 0 1 1 0 1 1] [0 1 1 0 1 1 1 0 0 1] parents
[0 1 1 1 0 1 0 0 0 0] [1 1 0 1 0 1 1 0 1 1] parents
[0 1 0 1 0 1 0 0 1 0] [1 1 1 1 0 1 0 1 0 0] parents
[0 1 1 1 0 1 0 0 1 1] [0 1 1 1 0 1 1 1 0 0] parents
[0 1 1 0 1 1 1 1 0 0] [0 1 1 1 0 1 0 0 1 0] parents
[0 1 1 1 0 1 1 0 0 0] [1 1 1 1 0 0 1 1 0 1] parents
[0 1 1 0 0 1 1 1 1 0] [1 1 1 1 0 1 0 1 0 0] parents
[1 1 1 0 0 1 0 1 0 0] [0 1 1 0 1 1 1 1 1 1] parents
[1 1 0 1 0 0 1 1 0 0] [0 1 0 1 0 1 0 0 1 0] parents
[0 1 1 0 0 1 0 1 0 0] [0 0 0 0 0 1 1 1 0 1] parents
[0 1 0 1 0 1 1 0 0 0] [1 1 0 1 0 0 1 1 0 0] parents
[1 1 1 1 0 1 0 0 0 0] [1 1 1 1 0 0 0 0 1 1] parents
[0 1 1 1 1 0 1 1 0 1] [1 1 1 1 0 1 1 0 0 0] parents
[1 1 1 0 0 1 0 1 0 0] [1 1 1 1 0 1 0 0 0 0] parents
[0 1 1 1 0 1 1 0 0 0] [0 1 0 1 0 1 1 0 0 0] parents
[1 1 1 1 0 0 0 0 1 1] [0 1 1 1 0 1 1 0 0 0] parents
[1 1 0 0 0 1 1 1 0 1] [1 1 1 1 0 1 0 1 0 0] parents
[0 1 1 0 0 1

KeyboardInterrupt: 

In [17]:
s = "shilas"
s[1]

'h'

In [19]:
s[:2]

'sh'

In [20]:
s[2:]

'ilas'