In [36]:
import pandas as pd
import random as rd
from collections import Counter
import time
import numpy as np

In [2]:
df = pd.read_csv('male_applicants.csv')

In [3]:
df

Unnamed: 0,names,category,weight
0,rqtttt,Ot,0.5
1,zhuztd,Ot,0.5
2,hqxfqg,Ot,0.5
3,sanjtt,Fy,0.5
4,mjbgpk,Ot,0.5
...,...,...,...
7361,bpgonl,Fr,0.5
7362,cvflwj,Fy,0.5
7363,carssw,Ot,0.5
7364,vtofrz,St,1.0


In [4]:
def generate_random_solution():
    category = df.category.values.tolist()
    category_copy = category.copy()
    hard_constrait = []
    for cand in category_copy:
        if cand == 'Ht' or cand == 'St':
            hard_constrait.append(cand)
            category.remove(cand)

    empty_space = 2072 - len(hard_constrait)

    soft_constrait = []
    for cand in range(empty_space):
        soft_constrait.append(rd.choice(category))

    solution = hard_constrait + soft_constrait
    
    return solution, hard_constrait, category

In [5]:
solution, _ , _= generate_random_solution()
frequency = Counter(solution)

show = pd.DataFrame()
for i in frequency: show[i] = [frequency[i]]
show.head()

Unnamed: 0,St,Ht,Ot,Fr,Fy,Sp
0,260,60,894,430,360,68


In [6]:
data = {'Fy': 1360, 'Fr':1660, 'Ht':60, 'Sp':290, 'St':260, 'Ot':3736}

soft_constr_weight = 1368 + 1660 + 290 + 3736
weight = {'Fy': 1 - (1360/soft_constr_weight), 'Fr': 1-(1660/soft_constr_weight), 'Ht':1, 'Sp':1- (290/soft_constr_weight), 'St':1, 'Ot': 1- (3736/soft_constr_weight)}

In [7]:
print(weight)

{'Fy': 0.8072015877516303, 'Fr': 0.7646725262262546, 'Ht': 1, 'Sp': 0.9588885738588036, 'St': 1, 'Ot': 0.47037142047065494}


In [8]:
def evaluate_solution(solution, weight):
    frequency = Counter(solution)
    loss = 0
    for category in solution: loss += weight[category] * frequency[category]
    return loss

In [9]:
def mutate_solution(hard_constrait, category):
#   mutate the solution by generating new soft constrait solution
    empty_space = 2072 - len(hard_constrait)

    soft_constrait = []
    for cand in range(empty_space):
        soft_constrait.append(rd.choice(category))

    solution = hard_constrait + soft_constrait
    
    return solution

In [10]:
def hill_climbing_CA(num_itr = 200):
    start_time = time.time()
    solution, hard_sol, soft_cat = generate_random_solution()
    cost = evaluate_solution(solution, weight)
    
    best_solution = solution
    cost_best_solution = cost
    print("Iteration 0 ------ cost:- {}".format(cost))
    
    for itr in range(1, num_itr+1):
        solution = mutate_solution(hard_sol, soft_cat)
        cost = evaluate_solution(solution, weight)
        
        if cost > cost_best_solution:
            print("Iteration {} ------ Found a better solution with cost:- {}".format(itr, cost))
            best_solution = solution
            cost_best_solution = cost
            
    end_time = time.time()
    
    print("\n Time taken - {}seconds".format(end_time-start_time))
    return best_solution, cost_best_solution

In [153]:
best_solution_CA, _ = hill_climbing_CA(200)

frequency = Counter(best_solution_CA)

show = pd.DataFrame()
for i in frequency: show[i] = [frequency[i]]
show.head()

Iteration 1 ------ cost:- 710411.969379095
Iteration 0 ------ Found a better solution with cost:- 711838.8185426573
Iteration 7 ------ Found a better solution with cost:- 724499.969379102
Iteration 102 ------ Found a better solution with cost:- 730937.3178338665

 Time taken - 0.6491367816925049seconds


Unnamed: 0,St,Ht,Ot,Fy,Sp,Fr
0,260,60,982,304,56,410


In [190]:
HA_data = {'Akindeko': 625, 'Abiola':356, 'Adeniyi':195, 'New Male Hostel':792, 'Bisi Balogun':104}
order = ['Fy','Fr','Ht','Sp','St','Ot']

def generate_random_solution_HA(CA, HA_stat):
    rd.shuffle(CA)
    generated = {}
    CA_copy = CA.copy()
    for hall in HA_stat:
        candidates = []
        for count in range(HA_stat[hall]):
            student = rd.choice(CA_copy)
            candidates.append(student)
            CA_copy.remove(student)
        generated[hall] = candidates
    
    generated = list(generated.values())
    
    matrix_solution = []
    for hall in generated:
        solution = []
        
        frequency = Counter(hall) #count each category in each hall
        for category in order:
            solution.append(frequency[category]) #append count of each category in each hall
        
        matrix_solution.append(solution) # concatenate solutions of all categories in all halls together
    
#     transpose solution to allow rows represent each category and column represent each hall
    matrix_solution = np.array(matrix_solution).transpose()
    return matrix_solution

In [191]:
solution_HA = generate_random_solution_HA(best_solution, HA_data)

show = pd.DataFrame(solution_HA,
                    columns = ['Akindeko', 'Abiola', 'Adeniyi', 'New Male Hostel', 'Bisi Balogun'],
                    index = ['Fy','Fr','Ht','Sp','St','Ot'])

show

Unnamed: 0,Akindeko,Abiola,Adeniyi,New Male Hostel,Bisi Balogun
Fy,101,52,32,125,18
Fr,106,62,36,154,11
Ht,23,8,4,21,4
Sp,11,11,2,32,2
St,85,42,25,93,15
Ot,299,181,96,367,54


In [192]:
def evaluate_solution_FA(solution, data):
    solution = {i:j for i,j in zip(data, solution)}
    cost = 0
    for cat in solution: #loop through each category 
        proportion = data[cat] - (max(solution[cat]) - min(solution[cat]))
        N = sum(solution[cat] > 0 ) #number of halls that have some students of i-th category
        cost += proportion * N
    return cost

In [193]:
def hill_climbing_HA(num_itr = 200):
    start_time = time.time()
    solution = generate_random_solution_HA(best_solution_CA, HA_data)
    cost = evaluate_solution_FA(solution, data)
    
    best_solution = solution
    cost_best_solution = cost
    print("Iteration 0 ------ cost:- {}".format(cost))
    
    for itr in range(1, num_itr+ 1):
        solution = generate_random_solution_HA(best_solution_CA, HA_data)
        cost = evaluate_solution_FA(solution, data)
        
        if cost > cost_best_solution:
            print("Iteration {} ------ Found a better solution with cost:- {}".format(itr, cost))
            best_solution = solution
            cost_best_solution = cost
            
    end_time = time.time()
    
    print("\n Time taken - {}seconds".format(end_time-start_time))
    return best_solution

In [194]:
hill_climbing_HA()

Iteration 0 ------ cost:- 33380
Iteration 1 ------ Found a better solution with cost:- 33390

 Time taken - 1.3448424339294434seconds


array([[ 82,  53,  31, 118,  20],
       [125,  61,  46, 155,  23],
       [ 18,  13,   4,  22,   3],
       [ 16,   9,   8,  22,   1],
       [ 80,  42,  24, 105,   9],
       [304, 178,  82, 370,  48]])