<a href="https://colab.research.google.com/github/tintinap/Genetic-Algorithm/blob/main/genetic_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from itertools import combinations
import pandas as pd
import numpy as np
import random
import ast

import mysql.connector

In [2]:
url = 'https://github.com/tintinap/Genetic-Algorithm/blob/main/dataset/diabetes.csv?raw=true'
# # https://github.com/tintinap/Genetic-Algorithm/blob/main/dataset/diabetes.csv
# drive = '/content/drive/MyDrive/ClassRoom/Machine learning/dataset/diabetes.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
features = df.columns.tolist()[0:-1]
features

['Pregnancies',
 'Glucose',
 'BloodPressure',
 'SkinThickness',
 'Insulin',
 'BMI',
 'DiabetesPedigreeFunction',
 'Age']

In [4]:
X = df[features]
label = 'Outcome'
y = df[label]

# normalize
X = X.apply(lambda x: (x - x.min()) / (x.max() - x.min()))
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2


<h1> Fitness Function

<h2> Manhattan Score Table

In [5]:
df_md = {'Outcome': []}

for f in features:
    df_md['Outcome'].append(np.mean([np.sqrt(abs(a[0]-a[1])) for a in zip(X[f], y)]))

df_md = pd.DataFrame.from_dict(df_md)
df_md.index = features
df_md.T

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
Outcome,0.537556,0.658338,0.69739,0.547161,0.452641,0.670003,0.541908,0.518116


<h1>Function Tools

In [6]:
def convertZeroOne(features, zeroOneList):
    """
    convert list of 0 and 1 to be list of string feature
    """
    return [features[i] for i in range(len(zeroOneList)) if zeroOneList[i] == 1]

<h1>Set up db

In [7]:
def checkTableExists(dbcon, tablename):
    dbcur = dbcon.cursor()
    dbcur.execute("""
        SELECT COUNT(*)
        FROM information_schema.tables
        WHERE table_name = '{0}'
        """.format(tablename.replace('\'', '\'\'')))
    if dbcur.fetchone()[0] == 1:
        dbcur.close()
        return True

    dbcur.close()
    return False

In [8]:
def createTable(dbcon, tablename, create_sql):
    if checkTableExists(dbcon, tablename):
        mycursor.execute('drop TABLE', tablename)      
    mycursor.execute(create_sql)


In [9]:
mydb = mysql.connector.connect(
  host="localhost",
  user="admin",
  password="admin",
  database='ml_db'
)
mycursor = mydb.cursor(buffered=True)

print(mydb)

<mysql.connector.connection_cext.CMySQLConnection object at 0x00000233BC4F1248>


In [10]:
#population
population_create_sql = 'create table POPULATION ('+ \
                features[0]+ ' int(1) ,'+ \
                features[1]+ ' int(1) ,'+ \
                features[2]+ ' int(1) ,'+ \
                features[3]+ ' int(1) ,'+ \
                features[4]+ ' int(1) ,'+ \
                features[5]+ ' int(1) ,'+ \
                features[6]+ ' int(1) ,'+ \
                features[7]+ ' int(1) '+');'
try:
    createTable(mydb, 'population', population_create_sql)
    print('population created')
except:
    mycursor.execute('drop table population')
    createTable(mydb, 'population', population_create_sql)
    print('population created')

#selection
selection_create_sql = 'create table selection ('+ \
                features[0]+ ' int(1) ,'+ \
                features[1]+ ' int(1) ,'+ \
                features[2]+ ' int(1) ,'+ \
                features[3]+ ' int(1) ,'+ \
                features[4]+ ' int(1) ,'+ \
                features[5]+ ' int(1) ,'+ \
                features[6]+ ' int(1) ,'+ \
                features[7]+ ' int(1) '+');'
try:
    createTable(mydb, 'selection', selection_create_sql)
    print('selection created')
except:
    mycursor.execute('drop table selection')
    createTable(mydb, 'selection', selection_create_sql)
    print('selection created')

#crossover
crossover_create_sql = 'create table crossover ('+ \
                features[0]+ ' int(1) ,'+ \
                features[1]+ ' int(1) ,'+ \
                features[2]+ ' int(1) ,'+ \
                features[3]+ ' int(1) ,'+ \
                features[4]+ ' int(1) ,'+ \
                features[5]+ ' int(1) ,'+ \
                features[6]+ ' int(1) ,'+ \
                features[7]+ ' int(1) '+');'
try:
    createTable(mydb, 'crossover', crossover_create_sql)
    print('crossover created')
except:
    mycursor.execute('drop table crossover')
    createTable(mydb, 'crossover', crossover_create_sql)
    print('crossover created')

#mutation
mutation_create_sql = 'create table mutation ('+ \
                features[0]+ ' int(1) ,'+ \
                features[1]+ ' int(1) ,'+ \
                features[2]+ ' int(1) ,'+ \
                features[3]+ ' int(1) ,'+ \
                features[4]+ ' int(1) ,'+ \
                features[5]+ ' int(1) ,'+ \
                features[6]+ ' int(1) ,'+ \
                features[7]+ ' int(1) '+');'
try:
    createTable(mydb, 'mutation', mutation_create_sql)
    print('mutation created')
except:
    mycursor.execute('drop table mutation')
    createTable(mydb, 'mutation', mutation_create_sql)
    print('mutation created')

population created
selection created
crossover created
mutation created


<h1> Initialise Population

In [11]:
# from compiler.ast import flatten
all_population = [list(combinations(features, i)) for i in range(1, len(features)+1)]
temp = list()
for lst in all_population:
    temp.extend(lst)
all_population = temp

all_population[:10]

[('Pregnancies',),
 ('Glucose',),
 ('BloodPressure',),
 ('SkinThickness',),
 ('Insulin',),
 ('BMI',),
 ('DiabetesPedigreeFunction',),
 ('Age',),
 ('Pregnancies', 'Glucose'),
 ('Pregnancies', 'BloodPressure')]

In [12]:
len(all_population)

255

In [13]:
init_pop = list()
eight_zeros = [0, 0, 0, 0, 0, 0, 0, 0]

for pop in all_population:
    for attr in pop:
        eight_zeros[features.index(attr)] = 1
    init_pop.append(eight_zeros)
    eight_zeros = [0, 0, 0, 0, 0, 0, 0, 0]
init_pop[:10]

[[1, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 1],
 [1, 1, 0, 0, 0, 0, 0, 0],
 [1, 0, 1, 0, 0, 0, 0, 0]]

In [14]:
columns = str(features).replace('[', '').replace(']', '').replace("'", '')
columns

'Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age'

In [15]:
len(init_pop)

255

<h2>Insert Population Table

In [16]:
#insert init_pop to db
insert_pop_sql = 'insert into population ('+columns+') values (%s, %s, %s, %s, %s, %s, %s, %s)'
mycursor.executemany(insert_pop_sql, init_pop)
try:
    mydb.commit()
    print(mycursor.rowcount, 'was inserted.')
except:
    mydb.rollback()
    print('rollbacked')

255 was inserted.


<h1> Evaluate and Selection

In [17]:
#get top 3/4 selection population
#the criteria to pass to the next gen is 0.5 bcuz the max value is 1 after the normalization
pass_criteria = 0.5
pass_criteria

0.5

In [18]:
history_fitness_funct = dict() # dict of zeroOneList and its score that have been used fitness_func
def fitness_func(features, zeroOneList):
    """
    mean of mahattan distance of all selected features
    """
    if str(zeroOneList) in history_fitness_funct:
        return history_fitness_funct[str(zeroOneList)]

    if zeroOneList == [0, 0, 0, 0, 0, 0, 0, 0]:
        return 0

    history_fitness_funct[str(zeroOneList)] = np.mean([df_md.T[f] for f in convertZeroOne(features, zeroOneList)])
    return history_fitness_funct[str(zeroOneList)]


In [19]:
selection_pop = [i for i in init_pop if fitness_func(features, i) >= 0.5]
print('Strong generation(>=',str(pass_criteria)+') :', len(selection_pop))
selection_pop

Strong generation(>= 0.5) : 250


[[1, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 1],
 [1, 1, 0, 0, 0, 0, 0, 0],
 [1, 0, 1, 0, 0, 0, 0, 0],
 [1, 0, 0, 1, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 1, 0, 0],
 [1, 0, 0, 0, 0, 0, 1, 0],
 [1, 0, 0, 0, 0, 0, 0, 1],
 [0, 1, 1, 0, 0, 0, 0, 0],
 [0, 1, 0, 1, 0, 0, 0, 0],
 [0, 1, 0, 0, 1, 0, 0, 0],
 [0, 1, 0, 0, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 0, 1, 0],
 [0, 1, 0, 0, 0, 0, 0, 1],
 [0, 0, 1, 1, 0, 0, 0, 0],
 [0, 0, 1, 0, 1, 0, 0, 0],
 [0, 0, 1, 0, 0, 1, 0, 0],
 [0, 0, 1, 0, 0, 0, 1, 0],
 [0, 0, 1, 0, 0, 0, 0, 1],
 [0, 0, 0, 1, 0, 1, 0, 0],
 [0, 0, 0, 1, 0, 0, 1, 0],
 [0, 0, 0, 1, 0, 0, 0, 1],
 [0, 0, 0, 0, 1, 1, 0, 0],
 [0, 0, 0, 0, 0, 1, 1, 0],
 [0, 0, 0, 0, 0, 1, 0, 1],
 [0, 0, 0, 0, 0, 0, 1, 1],
 [1, 1, 1, 0, 0, 0, 0, 0],
 [1, 1, 0, 1, 0, 0, 0, 0],
 [1, 1, 0, 0, 1, 0, 0, 0],
 [1, 1, 0, 0, 0, 1, 0, 0],
 [1, 1, 0, 0, 0, 0, 1, 0],
 [1, 1, 0, 0, 0, 0, 0, 1],
 

<h2>Insert Selection Table

In [20]:
#insert selection_pop to db
insert_sel_sql = 'insert into selection ('+columns+') values (%s, %s, %s, %s, %s, %s, %s, %s)'
mycursor.executemany(insert_sel_sql, selection_pop)
try:
    mydb.commit()
    print(mycursor.rowcount, 'was inserted.')
except:
    mydb.rollback()
    print('rollbacked')

250 was inserted.


<h1> Crossover

In [21]:
#random pair of parent
#random where to split the range of spliting withh be in range [round(2/8*len(features)), round(6/8*len(features))]

In [22]:
def crossover(zeroOneList1, zeroOneList2, split_point):
    """
    Crossover zeroOnelist
    """
    return [
        zeroOneList1[:split_point] + zeroOneList2[split_point:],
        zeroOneList2[:split_point] + zeroOneList1[split_point:]
    ]

In [23]:
def np_crossover(np_zeroOneList1, np_zeroOneList2, split_point):
    """
    Crossover numpy array zeroOnelist
    support np array
    """
    return np.array([
        np.concatenate([np_zeroOneList1[:split_point], np_zeroOneList2[split_point:]]),
        np.concatenate([np_zeroOneList2[:split_point], np_zeroOneList1[split_point:]])
    ])

In [24]:
len(selection_pop)

250

In [25]:
split_point = random.randrange(round(0.3*len(features)), round(0.7*len(features)))
split_point

5

In [30]:
#normal list
import time
start = time.time()

crossover_pop = list()
# selection_pop = np.array(selection_pop)
# cross = list()
for pop in selection_pop:
    for p in selection_pop:
        cross = crossover(pop, p, split_point)
        crossover_pop.append(cross[0])
        crossover_pop.append(cross[1])
crossover_pop = list(set([str(i) for i in crossover_pop]))
crossover_pop = [ast.literal_eval(i) for i in crossover_pop]
# crossover_pop = [list(i) for i in np.unique(np.array(crossover_pop), axis=0)]
print(len(crossover_pop))



end = time.time()
print('used time :', end - start)
crossover_pop[:10]

256
used time : 0.2910349369049072


[[1, 1, 0, 0, 1, 0, 1, 0],
 [1, 0, 0, 1, 0, 1, 0, 1],
 [0, 1, 1, 1, 1, 1, 0, 1],
 [0, 1, 1, 1, 1, 0, 0, 1],
 [1, 0, 1, 1, 0, 1, 0, 1],
 [1, 1, 1, 1, 0, 0, 1, 0],
 [0, 0, 1, 1, 0, 1, 1, 0],
 [1, 1, 0, 1, 1, 1, 1, 0],
 [0, 0, 1, 1, 1, 1, 0, 1],
 [1, 0, 1, 0, 0, 0, 0, 1]]

In [25]:
#match all population for all prob
# TODO other except binary split

# #shuffle list
# random.shuffle(selection_pop)
# #will pair by even and odd number
# if len(selection_pop)%2 == 0: #len is even
#     crossover_pop = [crossover(selection_pop[i], selection_pop[i+1], split_point) for i in range(len(selection_pop)) if i%2 == 0]
#     print('even length')
# else: #len is odd
#     crossover_pop = [crossover(selection_pop[i], selection_pop[i+1], split_point) for i in range(len(selection_pop[:-1])) if i%2 == 0]
#     crossover_pop.append(crossover(selection_pop[-2], selection_pop[-1], split_point))
#     print('odd length')
# crossover_pop[:10]

even length


[[0, 1, 0, 1, 0, 0, 1, 0],
 [0, 1, 0, 0, 0, 0, 1, 1],
 [1, 0, 0, 1, 1, 0, 1, 0],
 [0, 1, 0, 0, 0, 1, 0, 0],
 [1, 0, 1, 0, 1, 1, 0, 1],
 [0, 1, 1, 0, 1, 1, 0, 0],
 [0, 0, 1, 1, 1, 0, 0, 1],
 [0, 0, 0, 0, 1, 0, 0, 1],
 [0, 1, 1, 0, 1, 1, 0, 1],
 [1, 1, 0, 1, 0, 1, 0, 1]]

<h2>Insert Crossover Table

In [31]:
#insert selection_pop to db
insert_cross_sql = 'insert into crossover ('+columns+') values (%s, %s, %s, %s, %s, %s, %s, %s)'
mycursor.executemany(insert_cross_sql, crossover_pop)
try:
    mydb.commit()
    print(mycursor.rowcount, 'was inserted.')
except:
    mydb.rollback()
    print('rollbacked')

256 was inserted.


<h1> Mutation

In [32]:
def mutation(zeroOneList, muta_bit_amount):
    """
    Random flip bits of zeroOneList
    """
    flip_index_bit = list()
    while len(flip_index_bit) != muta_bit_amount:
        index = random.randrange(8)
        flip_index_bit.append(index)
        flip_index_bit = list(set(flip_index_bit))
        #got random bit index

    for i in flip_index_bit:
        if zeroOneList[i] == 1: #1
            zeroOneList[i] = 0
        else: # 0
            zeroOneList[i] = 1
    return zeroOneList

In [33]:
#mutation rate = 0.3
muta_bit_amount = random.randrange(1, round(len(features)*0.3+0.5))
muta_bit_amount

1

In [34]:
mutation_pop = [mutation(i, muta_bit_amount) for i in crossover_pop]
print(len(mutation_pop))
mutation_pop[:10]

256


[[1, 1, 0, 0, 1, 0, 1, 1],
 [1, 0, 1, 1, 0, 1, 0, 1],
 [0, 1, 1, 0, 1, 1, 0, 1],
 [0, 1, 1, 1, 1, 0, 0, 0],
 [0, 0, 1, 1, 0, 1, 0, 1],
 [0, 1, 1, 1, 0, 0, 1, 0],
 [0, 1, 1, 1, 0, 1, 1, 0],
 [1, 1, 0, 0, 1, 1, 1, 0],
 [0, 0, 1, 1, 1, 1, 0, 0],
 [1, 1, 1, 0, 0, 0, 0, 1]]

<h2>Insert Mutation Table

In [35]:
#insert selection_pop to db
insert_muta_sql = 'insert into mutation ('+columns+') values (%s, %s, %s, %s, %s, %s, %s, %s)'
mycursor.executemany(insert_muta_sql, mutation_pop)
try:
    mydb.commit()
    print(mycursor.rowcount, 'was inserted.')
except:
    mydb.rollback()
    print('rollbacked')

256 was inserted.


<h1>Evaluate the top Best 3/4 of Population

In [36]:
selection_crossover_mutation = selection_pop + crossover_pop + mutation_pop
selection_crossover_mutation = list(set([str(i) for i in selection_crossover_mutation]))
selection_crossover_mutation = [ast.literal_eval(i) for i in selection_crossover_mutation]
selection_crossover_mutation[:10]

[[1, 1, 0, 0, 1, 0, 1, 0],
 [1, 0, 0, 1, 0, 1, 0, 1],
 [0, 1, 1, 1, 1, 1, 0, 1],
 [0, 1, 1, 1, 1, 0, 0, 1],
 [1, 0, 1, 1, 0, 1, 0, 1],
 [1, 1, 1, 1, 0, 0, 1, 0],
 [0, 0, 1, 1, 0, 1, 1, 0],
 [1, 1, 0, 1, 1, 1, 1, 0],
 [0, 0, 1, 1, 1, 1, 0, 1],
 [1, 0, 1, 0, 0, 0, 0, 1]]

In [37]:
scm_dict = dict()
for i in selection_crossover_mutation:
    scm_dict[str(i)] = fitness_func(features=features, zeroOneList=i)
#top 75% of all pop
top_75_chromosome = [ast.literal_eval(i[0]) for i in sorted(scm_dict.items(), key=lambda x: x[1], reverse=True)[:round(0.75*(len(selection_crossover_mutation)))]]
del scm_dict
print(len(top_75_chromosome))
top_75_chromosome

191


[[0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 1, 0, 0],
 [0, 1, 1, 0, 0, 0, 0, 0],
 [0, 1, 1, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0],
 [0, 1, 1, 1, 0, 1, 0, 0],
 [0, 1, 1, 0, 0, 1, 1, 0],
 [1, 1, 1, 0, 0, 1, 0, 0],
 [0, 0, 1, 1, 0, 1, 0, 0],
 [0, 0, 1, 0, 0, 1, 1, 0],
 [0, 1, 1, 0, 0, 1, 0, 1],
 [1, 0, 1, 0, 0, 1, 0, 0],
 [0, 1, 1, 1, 0, 0, 0, 0],
 [0, 1, 1, 0, 0, 0, 1, 0],
 [1, 1, 1, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 1, 0, 1],
 [0, 1, 0, 1, 0, 1, 0, 0],
 [0, 1, 1, 0, 0, 0, 0, 1],
 [0, 1, 0, 0, 0, 1, 1, 0],
 [0, 1, 1, 1, 0, 1, 1, 0],
 [0, 0, 1, 1, 0, 0, 0, 0],
 [1, 1, 1, 1, 0, 1, 0, 0],
 [1, 1, 0, 0, 0, 1, 0, 0],
 [1, 1, 1, 0, 0, 1, 1, 0],
 [0, 0, 1, 0, 0, 0, 1, 0],
 [0, 1, 1, 0, 1, 1, 0, 0],
 [0, 1, 1, 1, 0, 1, 0, 1],
 [1, 0, 1, 0, 0, 0, 0, 0],
 [0, 1, 1, 0, 0, 1, 1, 1],
 [1, 1, 1, 0, 0, 1, 0, 1],
 [0, 1, 0, 0, 0, 1, 0, 1],
 [0, 0, 1, 1, 0, 1, 1, 0],
 [1, 0, 1, 1, 0, 1, 0, 0],
 [1, 0, 1, 0, 0, 1, 1, 0],
 [0, 1, 1, 1, 0, 0, 1, 0],
 