In [14]:
import pandas as pd
import random as rd
from collections import Counter
import time
import numpy as np

In [15]:
df = pd.read_csv('male_applicants.csv')

In [16]:
df

Unnamed: 0,names,category,weight
0,rqtttt,Ot,0.5
1,zhuztd,Ot,0.5
2,hqxfqg,Ot,0.5
3,sanjtt,Fy,0.5
4,mjbgpk,Ot,0.5
...,...,...,...
7361,bpgonl,Fr,0.5
7362,cvflwj,Fy,0.5
7363,carssw,Ot,0.5
7364,vtofrz,St,1.0


In [17]:
def generate_random_solution():
    category = df.category.values.tolist()
    category_copy = category.copy()
    hard_constrait = []
    for cand in category_copy:
        if cand == 'Ht' or cand == 'St':
            hard_constrait.append(cand)
            category.remove(cand)

    empty_space = 2072 - len(hard_constrait)

    soft_constrait = []
    for cand in range(empty_space):
        soft_constrait.append(np.random.choice(category))

    solution = hard_constrait + soft_constrait
    
    return solution, hard_constrait, category

In [18]:
solution, _ , _= generate_random_solution()
frequency = Counter(solution)

show = pd.DataFrame()
for i in frequency: show[i] = [frequency[i]]
show.head()

Unnamed: 0,St,Ht,Ot,Fy,Sp,Fr
0,260,60,919,350,58,425


In [19]:
data = {'Fy': 1360, 'Fr':1660, 'Ht':60, 'Sp':290, 'St':260, 'Ot':3736}

soft_constr_weight = 1368 + 1660 + 290 + 3736
weight = {'Fy': 1 - (1360/soft_constr_weight), 'Fr': 1-(1660/soft_constr_weight), 'Ht':1, 'Sp':1- (290/soft_constr_weight), 'St':1, 'Ot': 1- (3736/soft_constr_weight)}

In [20]:
print(weight)

{'Fy': 0.8072015877516303, 'Fr': 0.7646725262262546, 'Ht': 1, 'Sp': 0.9588885738588036, 'St': 1, 'Ot': 0.47037142047065494}


In [21]:
def evaluate_solution(solution, weight):
    frequency = Counter(solution)
    loss = 0
    for category in solution: loss += weight[category] * frequency[category]
    return loss

In [25]:
def mutate_solution(hard_constrait, category):
#   mutate the solution by generating new soft constrait solution
    empty_space = 2072 - len(hard_constrait)

    soft_constrait = []
    for cand in range(empty_space):
        soft_constrait.append(np.random.choice(np.array(category)))

    solution = hard_constrait + soft_constrait
    
    return solution

In [26]:
def hill_climbing_CA(num_itr = 200):
    start_time = time.time()
    solution, hard_sol, soft_cat = generate_random_solution()
    cost = evaluate_solution(solution, weight)
    
    best_solution = solution
    cost_best_solution = cost
    print("Iteration 0 ------ cost:- {}".format(cost))
    
    for itr in range(1, num_itr+1):
        solution = mutate_solution(hard_sol, soft_cat)
        cost = evaluate_solution(solution, weight)
        
        if cost > cost_best_solution:
            print("Iteration {} ------ Found a better solution with cost:- {}".format(itr, cost))
            best_solution = solution
            cost_best_solution = cost
            
    end_time = time.time()
    
    print("\n Time taken - {}seconds".format(end_time-start_time))
    return best_solution, cost_best_solution

In [27]:
best_solution_CA, _ = hill_climbing_CA(200)

frequency = Counter(best_solution_CA)

best_solution_CA_dict = {i:j for i,j in frequency.items()}

show = pd.DataFrame()
for i in frequency: show[i] = [frequency[i]]
show.head()

Iteration 0 ------ cost:- 710345.087893367
Iteration 4 ------ Found a better solution with cost:- 714532.7306492835
Iteration 9 ------ Found a better solution with cost:- 721631.4105471976
Iteration 53 ------ Found a better solution with cost:- 722938.4077119324
Iteration 62 ------ Found a better solution with cost:- 735965.6878366881

 Time taken - 201.37776398658752seconds


Unnamed: 0,St,Ht,Ot,Fy,Fr,Sp
0,260,60,986,303,414,49


In [28]:
# cc = best_solution_CA.copy()
# stop = False
# generated = {i:[] for i in HA_data.keys()}
# while stop == False:
#     if len(cc) == 0: stop = True
#     for hall in HA_data:
#         if len(generated[hall]) < HA_data[hall]:
#             choice = rd.choice(cc)
#             generated[hall].append(choice)
#             cc.remove(choice)

In [34]:
HA_data = {'Akindeko': 625, 'Abiola':356, 'Adeniyi':195, 'New Male Hostel':792, 'Bisi Balogun':104}
order = ['Fy','Fr','Ht','Sp','St','Ot']

def generate_random_solution_HA(CA, HA_stat):
    generated = {}
    CA_copy = CA.copy()
    for hall in HA_stat:
        candidates = []
        for count in range(HA_stat[hall]):
            student = np.random.choice(CA_copy)
            candidates.append(student)
            CA_copy.remove(student)
        generated[hall] = candidates
    
    generated = list(generated.values())

#     CA_copy = CA.copy()
#     stop = False
#     generated = {i:[] for i in HA_stat.keys()}
#     while stop == False:
#         if len(CA_copy) == 0: stop = True
#         for hall in HA_stat:
#             if len(generated[hall]) < HA_stat[hall]:
#                 choice = np.random.choice(CA_copy)
#                 generated[hall].append(choice)
#                 CA_copy.remove(choice)
                
#     generated = list(generated.values())
    
    matrix_solution = []
    for hall in generated:
        solution = []
        
        frequency = Counter(hall) #count each category in each hall
        for category in order:
            solution.append(frequency[category]) #append count of each category in each hall
        
        matrix_solution.append(solution) # concatenate solutions of all categories in all halls together
    
#     transpose solution to allow rows represent each category and column represent each hall
    matrix_solution = np.array(matrix_solution).transpose()
    return matrix_solution

In [35]:
solution_HA = generate_random_solution_HA(best_solution_CA, HA_data)

show = pd.DataFrame(solution_HA,
                    columns = ['Akindeko', 'Abiola', 'Adeniyi', 'New Male Hostel', 'Bisi Balogun'],
                    index = ['Fy','Fr','Ht','Sp','St','Ot'])

show

Unnamed: 0,Akindeko,Abiola,Adeniyi,New Male Hostel,Bisi Balogun
Fy,102,48,31,103,19
Fr,125,72,31,167,19
Ht,17,13,8,20,2
Sp,10,7,8,22,2
St,71,43,27,102,17
Ot,300,173,90,378,45


In [36]:
def evaluate_solution_FA(solution, data):
    solution = {i:j for i,j in zip(data, solution)}
    cost = 0
    for cat in solution: #loop through each category 
        proportion = data[cat] - (max(solution[cat]) - min(solution[cat]))
        N = sum(solution[cat] > 0 ) #number of halls that have some students of i-th category
        cost += proportion * N
    return cost

In [37]:
def hill_climbing_HA(num_itr = 200):
    start_time = time.time()
    solution = generate_random_solution_HA(best_solution_CA, HA_data)
    cost = evaluate_solution_FA(solution, best_solution_CA_dict)
    
    best_solution = solution
    cost_best_solution = cost
    print("Iteration 0 ------ cost:- {}".format(cost))
    
    for itr in range(1, num_itr+ 1):
        solution = generate_random_solution_HA(best_solution_CA, HA_data)
        cost = evaluate_solution_FA(solution, data)
        
        if cost > cost_best_solution:
            print("Iteration {} ------ Found a better solution with cost:- {}".format(itr, cost))
            best_solution = solution
            cost_best_solution = cost
            
    end_time = time.time()
    
    print("\n Time taken - {}seconds".format(end_time-start_time))
    return best_solution

In [38]:
show = pd.DataFrame(hill_climbing_HA(200),
                    columns = ['Akindeko', 'Abiola', 'Adeniyi', 'New Male Hostel', 'Bisi Balogun'],
                    index = ['Fy','Fr','Ht','Sp','St','Ot'])

show

Iteration 0 ------ cost:- 6920
Iteration 1 ------ Found a better solution with cost:- 33122
Iteration 2 ------ Found a better solution with cost:- 33370
Iteration 3 ------ Found a better solution with cost:- 33385
Iteration 5 ------ Found a better solution with cost:- 33390

 Time taken - 99.20473980903625seconds


Unnamed: 0,Akindeko,Abiola,Adeniyi,New Male Hostel,Bisi Balogun
Fy,87,51,30,122,13
Fr,126,78,36,149,25
Ht,20,9,5,23,3
Sp,13,7,8,16,5
St,77,42,30,100,11
Ot,302,169,86,382,47
