In [1]:
import os
import time
import random
import numpy as np
import pandas as pd
from nltk.tokenize import WhitespaceTokenizer
from ortools.algorithms import pywrapknapsack_solver

In [2]:
white_space_tokenizer = WhitespaceTokenizer()

solver = pywrapknapsack_solver.KnapsackSolver(
        pywrapknapsack_solver.KnapsackSolver.
        KNAPSACK_MULTIDIMENSION_BRANCH_AND_BOUND_SOLVER, 'KnapsackExample')

# Sets a time limit of time_limit seconds.
time_limit = 120.0
solver.set_time_limit(time_limit)

In [3]:
def ReadContent(path_to_file):
    with open(path_to_file, "r") as f:
            content = f.readlines()
            f.close()
    return content

In [4]:
# def ConvertContent2Testcase(lines_in_file):
#     number_of_item = int(lines_in_file[1])
#     capacities = np.array([ int(lines_in_file[2]) ], dtype = np.int64)
#     values = np.empty((number_of_item), dtype = np.int64)
#     weights = np.empty((1, number_of_item), dtype = np.int64)
    
#     last_line = len(lines_in_file)
#     count_of_item = 0
#     for index_line in range(4, last_line):
#         value, weight = white_space_tokenizer.tokenize(lines_in_file[index_line])
#         values[count_of_item] = value
#         weights[0][count_of_item] = weight
#         count_of_item += 1
    
#     if count_of_item != number_of_item:
#         raise Exception("Some thing went wrong when reading test case, the number of items is not as expected !!!")
        
#     return number_of_item, capacities, values, weights

In [5]:
def ConvertContent2Testcase(lines_in_file):
    number_of_item = int(lines_in_file[1])
    capacities = [int(lines_in_file[2])]
    values = list()
    weights = [[]]
    
    last_line = len(lines_in_file)
    count_of_item = 0
    for index_line in range(4, last_line):
        value, weight = white_space_tokenizer.tokenize(lines_in_file[index_line])
        values.append(int(value))
        weights[0].append(int(weight))
        count_of_item += 1
    
    if count_of_item != number_of_item:
        raise Exception("Some thing went wrong when reading test case, the number of items is not as expected !!!")
        
    return number_of_item, capacities, values, weights

In [6]:
def GetDirPaths(dir_path):
    dir_paths = list()
    
    for name in os.listdir(dir_path):
        full_path = os.path.join(dir_path, name)
        
        if os.path.isdir(full_path):
            dir_paths.append(full_path)
            
    return dir_paths

In [7]:
def GetFilePaths(dir_path):
    file_paths = list()

    for name in os.listdir(dir_path):
        full_path = os.path.join(dir_path, name)
        
        if os.path.isfile(full_path):
            file_paths.append(full_path)
            
    return file_paths

In [8]:
def GetBasenameOfDir(dir_path):
    return os.path.basename(dir_path)

In [9]:
def GetBasenameWithoutExt(file_path):
    return os.path.splitext(os.path.basename(file_path))[0]

In [10]:
def ReadAllTestcases(test_plan_paths):
    test_cases = dict()
    
    for test_plan_path in GetDirPaths(test_plan_paths):     # 13 test plan
        test_plan_name = GetBasenameOfDir(test_plan_path)
        test_cases[test_plan_name] = dict()
        
        # Shuffle 8 test size để chọn ra ngẫu nhiên 5 test size
        test_size_paths = GetDirPaths(test_plan_path)       # 8 test size
        random.shuffle(test_size_paths)     

        for test_size_path in test_size_paths[0:5]:                # 5 test size
                
            test_size_name = GetBasenameOfDir(test_size_path)
            test_cases[test_plan_name][test_size_name] = dict()
            
            # Shuffle 2 test path để chọn ra ngẫu nhiên 1 test path
            test_paths = GetDirPaths(test_size_path)       # 2 test path
            random.shuffle(test_paths)     
            test_path = test_paths[0]
            
            test_path_name = GetBasenameOfDir(test_path)
            test_cases[test_plan_name][test_size_name][test_path_name] = dict()
            
            # Shuffle 100 test file để chọn ra ngẫu nhiên 1 test file
            test_file_paths = GetFilePaths(test_path)     # 100 test file
            random.shuffle(test_file_paths)
            test_file_path = test_file_paths[0]
            
            test_file_name = GetBasenameWithoutExt(test_file_path)
            test_cases[test_plan_name][test_size_name][test_path_name][test_file_name] = dict()
            
            # Đọc file test
            lines_in_file = ReadContent(test_file_path)
            number_of_item, capacities, values, weights = ConvertContent2Testcase(lines_in_file)
            
            # Thêm dữ liệu vào test_cases
            test_cases[test_plan_name][test_size_name][test_path_name][test_file_name]["number_of_item"] = number_of_item
            test_cases[test_plan_name][test_size_name][test_path_name][test_file_name]["capacities"] = capacities
            test_cases[test_plan_name][test_size_name][test_path_name][test_file_name]["values"] = values
            test_cases[test_plan_name][test_size_name][test_path_name][test_file_name]["weights"] = weights
                                  
    return test_cases

In [11]:
def SolveKnapSack(values, weights, capacities):  
    solver.Init(values, weights, capacities)

    start_time = time.process_time()  # start time
    computed_value = solver.Solve()
    end_time = time.process_time()    # end time

    return computed_value, (end_time - start_time)

In [14]:
test_plan_paths = "C:\\Users\\Hieu\\Desktop\\AI\\Bai_taps\\KnapSack\\kplib-master\\"
test_cases = ReadAllTestcases(test_plan_paths)
number_of_test_file = 1

output = {'STT':[], 'File_path':[], 'Computed_value':[], 'Time':[]}

for test_plan_name in test_cases.keys():     # 13 test plan
    test_plan = test_cases[test_plan_name]

    for test_size_name in test_plan.keys():     # 8 test size
        test_size = test_plan[test_size_name]

        for test_path_name in test_size.keys(): # 2 test path
            test_path = test_size[test_path_name]

            for test_file_name in test_path.keys():  # 100 test file
                test_file = test_path[test_file_name]

                number_of_item = test_file["number_of_item"]
                capacities = test_file["capacities"]
                values = test_file["values"]
                weights = test_file["weights"]

                file_info = os.path.join(os.path.join(test_plan_name, test_size_name), os.path.join(test_path_name, test_file_name))
                computed_value, computed_time = SolveKnapSack(values, weights, capacities)
                
                print("STT: ", number_of_test_file)
                print("File info: " ,file_info)
                print('Total value: ', computed_value)
                print('Time: ', computed_time, " seconds\n")
                
                output['STT'].append(number_of_test_file)
                output['File_path'].append(file_info)
                output['Computed_value'].append(computed_value)
                output['Time'].append(computed_time)  

                number_of_test_file += 1

#print("number_of_test_file: ", number_of_test_file)   

STT:  1
File info:  00Uncorrelated\n01000\R01000\s065
Total value:  402186
Time:  0.0  seconds

STT:  2
File info:  00Uncorrelated\n00050\R10000\s019
Total value:  181645
Time:  0.0  seconds

STT:  3
File info:  00Uncorrelated\n05000\R01000\s037
Total value:  2022527
Time:  0.0  seconds

STT:  4
File info:  00Uncorrelated\n10000\R10000\s032
Total value:  40169760
Time:  0.0  seconds

STT:  5
File info:  00Uncorrelated\n00200\R01000\s043
Total value:  81824
Time:  0.0  seconds

STT:  6
File info:  01WeaklyCorrelated\n00500\R01000\s094
Total value:  142384
Time:  0.0  seconds

STT:  7
File info:  01WeaklyCorrelated\n00050\R01000\s025
Total value:  12446
Time:  0.0  seconds

STT:  8
File info:  01WeaklyCorrelated\n01000\R10000\s023
Total value:  2726952
Time:  0.0  seconds

STT:  9
File info:  01WeaklyCorrelated\n00200\R01000\s078
Total value:  53669
Time:  0.0  seconds

STT:  10
File info:  01WeaklyCorrelated\n05000\R10000\s041
Total value:  13697339
Time:  0.015625  seconds

STT:  11
Fi

In [15]:
df_result = pd.DataFrame.from_dict(output)

In [16]:
df_result

Unnamed: 0,STT,File_path,Computed_value,Time
0,1,00Uncorrelated\n01000\R01000\s065,402186,0.000000
1,2,00Uncorrelated\n00050\R10000\s019,181645,0.000000
2,3,00Uncorrelated\n05000\R01000\s037,2022527,0.000000
3,4,00Uncorrelated\n10000\R10000\s032,40169760,0.000000
4,5,00Uncorrelated\n00200\R01000\s043,81824,0.000000
...,...,...,...,...
60,61,12Circle\n10000\R10000\s056,1658047833,81.734375
61,62,12Circle\n02000\R10000\s062,332772716,119.171875
62,63,12Circle\n01000\R10000\s028,165994205,109.828125
63,64,12Circle\n00200\R01000\s085,1028474,10.218750


In [17]:
df_result.to_excel("output.xlsx", index = False)

In [18]:
import pickle

test_cases_file = open('test_cases.pckl', 'wb')
pickle.dump(test_cases, test_cases_file)
test_cases_file.close()

In [21]:
f = open('store.pckl', 'rb')
obj = pickle.load(f)
f.close()

print(obj["00Uncorrelated"].keys())

dict_keys(['n01000', 'n00050', 'n05000', 'n10000', 'n00200'])
