In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from pulp import *
from copy import deepcopy
while "notebooks" in os.getcwd():
    os.chdir("..")

from src.preprocessing.parser import Parser
from src.preprocessing.preprocessor import Preprocessor
from src.solvers.solution import Solution
from src.solvers.greedy import GreedySolver
from src.solvers.pulp_solver import PuLPSolver
from src.solvers.dp import DPSolver
from src.solvers.base_solver import DPMethods
from src.solvers.bb import BBSolver

from time import time
from tqdm import tqdm
from typing import Dict

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
def get_num_triples(dataset : Dict):
    num_triples= 0
    for n in dataset.keys():
        num_triples += len(dataset[n])
    
    return num_triples

## Test 03

In [4]:
parser = Parser("data/testfiles/test3.txt")

In [5]:
t0 = time()
info = parser.read()
t1 = time()

print(f"Total execution time {t1 - t0} s")

28it [00:00, 28553.49it/s]

Total execution time 0.07176709175109863 s





In [6]:
dataset : Dict[int, pd.DataFrame ]= info['data']
p, K, M, N = info['p'], info['K'], info['M'], info['N']

In [7]:
preprocessing_results = {}

preprocessing_results['initial data'] = {
    "runtime": 0,
    "num_triples": get_num_triples(dataset)
}
preprocessor = Preprocessor(
    K, 
    M,
    N,
    p,
    dataset
)

t0 = time()

data_1 = preprocessor.remove_trivial_values()
t1 = time()

preprocessing_results['remove trivial values'] = {
    "runtime": (t1 - t0),
    "num_triples": get_num_triples(data_1)
}
t1 = time()

data_2 = preprocessor.remove_ip_dominated(dataset)
t2 = time()
preprocessing_results['remove IP dominated'] = {
    "runtime": t2-t1,
    "num_triples": get_num_triples(data_2)
}
t2 = time()

data_3 = preprocessor.remove_lp_dominated(data_2)
t3 = time()
preprocessing_results['remove LP dominated'] = {
    "runtime": t3-t2,
    "num_triples": get_num_triples(data_3)
}


100%|██████████| 4/4 [00:00<00:00, 1146.77it/s]
100%|██████████| 4/4 [00:00<00:00, 263.85it/s]
100%|██████████| 4/4 [00:00<00:00, 1316.07it/s]


In [8]:
pd.DataFrame(preprocessing_results)

Unnamed: 0,initial data,remove trivial values,remove IP dominated,remove LP dominated
runtime,0,0.025482,0.021489,0.027675
num_triples,24,24.0,13.0,9.0


## Greedy algorithm

In [10]:
lp_results = {}

In [11]:
pulp_solver = PuLPSolver(
    K, 
    M,
    N,
    p,
    data_3
)

t1 = time()
pulp_solver.solve()
t2 = time()

lp_results['pulp'] = {
    'runtime': t2-t1,
    'data_rate' : pulp_solver.solution.objective.value()
}

In [12]:
solver = GreedySolver(
    K, 
    M,
    N,
    p,
    data_3
)

t1 = time()
solver.solve(data_3)
t2 = time()

lp_results['greedy'] = {
    'runtime': t2-t1,
    'data_rate' : solver.solution.get_data_rate()
}


In [13]:
pd.DataFrame(lp_results)

Unnamed: 0,pulp,greedy
runtime,0.005713,0.016958
data_rate,372.153846,372.153846


## Solutions to the ILP

In [14]:
ilp_results ={}

In [15]:
dp_solver = DPSolver(
    K, 
    M,
    N,
    p,
    data_3
)

t1 = time()
dp_solver.solve(
    None,
    method = DPMethods.MAXIMIZE_R
)
t2 = time()
ilp_results['DP_maximize_r'] = {
    'runtime' : t2 - t1,
    "data_rate" : dp_solver.solution.get_data_rate()
}

pulp_solver.solve()
U = int(pulp_solver.solution.objective.value())

t1 = time()
dp_solver.solve(
    None,
    method = DPMethods.MINIMIZE_P,
    U = U
)
t2 = time()

ilp_results['DP_minimize_p'] = {
    'runtime' : t2 - t1,
    "data_rate" : dp_solver.solution.get_data_rate()
}

### Branch and Bound

In [19]:
bb_solver = BBSolver(
    K, 
    M,
    N,
    p,
    data_3
)

In [20]:
t1 = time()
bb_solver.solve()
t2 = time()

ilp_results['Branch&Bound'] = {
    'runtime' : t2 - t1,
    "data_rate" : bb_solver.best_rate
}

In [21]:
pd.DataFrame(ilp_results)

Unnamed: 0,DP_maximize_r,DP_minimize_p,Branch&Bound
runtime,0.002012,0.002494,0.048711
data_rate,350.0,350.0,350.0
