In [11]:
import statistics
import math

In [12]:
from benchmarkrewriter.benchmark_parser import WorkerBenchmarkParser, BenchmarkParser
import os
includeWorkers = True
if includeWorkers:
    path = r'C:\Users\huda\Downloads\benchmarks_with_workers\benchmarks_with_workers'
    parser = WorkerBenchmarkParser()
else:
    path = r'C:\Users\huda\Documents\GitHub\scheduling_model_jrc\code\upgrades\benchmarks\all'
    parser = BenchmarkParser()

In [13]:
def calculate_flexibility(machines_for_all_operations, n_machines): 
    counts = []

    for m in machines_for_all_operations:
        counts.append(len(m))
    
    return statistics.mean(counts) / n_machines

def calculate_duration_variety(durations):
    counts = []

    for d in durations:
        counts.append(len(d))

    return len(set(counts)) / len(counts)

def calculate_average_operations(job_sequence):
    n_jobs = 1
    counts = [1]
    for i in range(1, len(job_sequence)):
        if job_sequence[i] != job_sequence[i-1]:
            n_jobs+=1
            counts.append(0)
        counts[-1] += 1
    return (sum(counts))/n_jobs

def calculate_duration_features_workers(durations):
    all_durations = []
    for duration in durations:
        for d in duration:
            all_durations.extend(d)
    all_durations = [d for d in all_durations if d != 0]
    min_duration = min(all_durations)
    max_duration = max(all_durations)
    span = max_duration - min_duration
    stdev = statistics.stdev(all_durations)
    return min_duration, max_duration, statistics.mean(all_durations), span, stdev

def calculate_duration_features(durations):
    all_durations = []
    for duration in durations:
        all_durations.extend(duration)
    all_durations = [d for d in all_durations if d != 0]
    min_duration = min(all_durations)
    max_duration = max(all_durations)
    span = max_duration - min_duration
    #stdev = statistics.stdev(all_durations)
    mean = sum(all_durations)/len(all_durations)
    var = sum(pow(x-mean, 2) for x in all_durations) / len(all_durations)
    stdev = math.sqrt(var)
    return min_duration, max_duration, statistics.mean(all_durations), span, stdev

In [14]:
instances = os.listdir(path)
instance_data = []
feature_vectors = []
consider_duration_features = True
for instance in instances:
    encoding = parser.parse_benchmark(path + "\\" + instance)
    feature_vector = []
    min_d, max_d, d_mean, d_span, d_stdev = calculate_duration_features(encoding.durations()) if not includeWorkers else calculate_duration_features_workers(encoding.durations())
    #TODO: worker data, or just do it with base FJSSP since all benchmarks are transformed in the same way
    extracted_data = {
        "name": instance[:-4],
        "n_jobs": encoding.n_jobs(), 
        "n_machines": encoding.n_machines(),
        "n_operations": encoding.n_operations(),
        "durations": encoding.durations(),
        "job_sequence": encoding.job_sequence(),
        "machines_for_all_operations": encoding.get_all_machines_for_all_operations() if includeWorkers else encoding.get_machines_for_all_operations(),
        "flexibility": calculate_flexibility(encoding.get_all_machines_for_all_operations() if includeWorkers else encoding.get_machines_for_all_operations(), encoding.n_machines()),
        "duration_variety": calculate_duration_variety(encoding.durations()),
        "average_operations": calculate_average_operations(encoding.job_sequence()),
        "min_duration": min_d,
        "max_duration": max_d,
        "duration_span": d_span,
        "duration_std": d_stdev,
        "duration_mean": d_mean
    }
    feature_vector = [encoding.n_jobs(), encoding.n_machines(), encoding.n_operations(), extracted_data["flexibility"], extracted_data["duration_variety"], extracted_data["average_operations"]]
    if consider_duration_features:
        feature_vector.extend([min_d, max_d, d_span, d_stdev])
    feature_vectors.append(feature_vector)
    instance_data.append(extracted_data)

In [15]:
optimal_benchmarks = ['BrandimarteMk1', 'HurinkSdata10', 'HurinkSdata11', 'HurinkSdata12', 'HurinkSdata13', 'HurinkSdata14', 'HurinkSdata15', 'HurinkSdata16', 'HurinkSdata17', 'HurinkSdata18', 'HurinkSdata1', 'HurinkSdata34', 'HurinkSdata49', 'HurinkSdata4', 'HurinkSdata52', 'HurinkSdata8', 'HurinkSdata9', 'HurinkEdata1', 'HurinkEdata52', 'HurinkEdata53', 'HurinkEdata5', 'HurinkEdata7', 'HurinkEdata8', 'HurinkRdata19', 'HurinkRdata1', 'HurinkRdata45', 'HurinkRdata55', 'HurinkVdata19', 'HurinkVdata1', 'HurinkVdata20', 'HurinkVdata21', 'HurinkVdata22', 'HurinkVdata23', 'HurinkVdata2', 'HurinkVdata39', 'HurinkVdata40', 'HurinkVdata41', 'HurinkVdata42', 'HurinkVdata43', 'HurinkVdata45', 'HurinkVdata54', 'HurinkVdata55', 'HurinkVdata57', 'HurinkVdata59', 'HurinkVdata60', 'HurinkVdata61', 'HurinkVdata62', 'HurinkVdata64', 'HurinkVdata65', 'HurinkVdata66', 'Kacem1', 'Kacem2', 'Kacem3', 'Fattahi10', 'Fattahi11', 'Fattahi12', 'Fattahi13', 'Fattahi14', 'Fattahi15', 'Fattahi16', 'Fattahi1', 'Fattahi2', 'Fattahi3', 'Fattahi4', 'Fattahi5', 'Fattahi6', 'Fattahi7', 'Fattahi8', 'Fattahi9']

In [16]:
'abc' in 'abdc'

False

In [17]:
count = 0
for instance in instance_data:
    if instance['name'] in optimal_benchmarks:
        count += 1
print(count)

0


In [18]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['name'] in optimal_benchmarks:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [19]:
print(max(optimal_data['duration_span']))
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

ValueError: max() arg is an empty sequence

In [20]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['name'] in optimal_benchmarks and instance['duration_span'] > 100:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [21]:
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

StatisticsError: mean requires at least one data point

In [22]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['duration_span'] > 100:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [23]:
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

flexibility: Average: 0.2568950917117053 | Stdv: 0.17262359130141436
duration_variety: Average: 0.013631010733596117 | Stdv: 0.02110722537059106
operations: Average: 138.35087719298247 | Stdv: 83.10637336799945
machines: Average: 9.416666666666666 | Stdv: 3.217118766399817
duration_mean: Average: 116 | Stdv: 148.14857407346182
duration_std: Average: 64.26207023760195 | Stdv: 87.41568834088842
duration_span: Average: 251 | Stdv: 340.06911062312025


In [24]:
optimal_benchmarks = ['BrandimarteMk1', 'HurinkSdata10', 'HurinkSdata11', 'HurinkSdata12', 'HurinkSdata13', 'HurinkSdata14', 'HurinkSdata15', 'HurinkSdata16', 'HurinkSdata17', 'HurinkSdata18', 'HurinkSdata1', 'HurinkSdata34', 'HurinkSdata49', 'HurinkSdata4', 'HurinkSdata52', 'HurinkSdata8', 'HurinkSdata9', 'HurinkEdata1', 'HurinkEdata52', 'HurinkEdata53', 'HurinkEdata5', 'HurinkEdata7', 'HurinkEdata8', 'HurinkRdata19', 'HurinkRdata1', 'HurinkRdata45', 'HurinkRdata55', 'HurinkVdata19', 'HurinkVdata1', 'HurinkVdata20', 'HurinkVdata21', 'HurinkVdata22', 'HurinkVdata23', 'HurinkVdata2', 'HurinkVdata39', 'HurinkVdata40', 'HurinkVdata41', 'HurinkVdata42', 'HurinkVdata43', 'HurinkVdata45', 'HurinkVdata54', 'HurinkVdata55', 'HurinkVdata57', 'HurinkVdata59', 'HurinkVdata60', 'HurinkVdata61', 'HurinkVdata62', 'HurinkVdata64', 'HurinkVdata65', 'HurinkVdata66', 'Kacem2', 'Fattahi16']

In [25]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['name'] in optimal_benchmarks:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [26]:
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

StatisticsError: mean requires at least one data point

In [30]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
i = 25
c = 0
d = dict()
while c < (1000/i):
    for instance in instance_data:
        if instance['duration_span'] >= i*c and instance['duration_span'] <= (c+1)*i:
            optimal_data['flexibility'].append(instance['flexibility'])
            optimal_data['duration_variety'].append(instance['duration_variety'])
            optimal_data['operations'].append(instance['n_operations'])
            optimal_data['machines'].append(instance['n_machines'])
            optimal_data['duration_mean'].append(instance['duration_mean'])
            optimal_data['duration_std'].append(instance['duration_std'])
            optimal_data['duration_span'].append(instance['duration_span'])
    c+=1
    d[str(c)] = []
    plot_data = {
        'flexibility': [[],[]],
        'duration_variety': [[],[]],
        'operations': [[],[]],
        'machines': [[],[]],
        'duration_mean': [[],[]],
        'duration_std': [[],[]],
        'duration_span': [[],[]],
        'count': len(optimal_data['flexibility'])
    }
    for metric in optimal_data:
        plot_data[metric][0] = statistics.mean(optimal_data[metric])
        plot_data[metric][1] = statistics.stdev(optimal_data[metric])
    d[str(c)].append(plot_data)




In [31]:
instance = None
for i in instance_data:
    if i['name'] == '0_BehnkeGeiger_58_workers':
        ds = []
        for j in i['durations']:
            for k in j:
                ds.extend(k)
ds_set = set(k)
ds_set_self = []
count = 0
for value in ds:
    if value > 0:
        count += 1
        if value not in ds_set_self:
            ds_set_self.append(value)

In [32]:
len(ds_set_self)/count

5.666916766593299e-05

In [33]:
for i in instance_data:
    if i['name'] == '0_BehnkeGeiger_58_workers':
        print(*i['durations'])

[[11  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0 12  0]
 ...
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 ...
 [25 24 24 ... 24 25  0]
 [ 0 24  0 ...  0 25  0]
 [ 0  0  0 ...  0  0 16]] [[21 21  0 ...  0  0  0]
 [27 31 31 ... 27 28 30]
 [ 0  0  0 ...  0 23  0]
 ...
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 

In [34]:
ds

[np.int64(11),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(10),
 np.int64(11),
 np.int64(11),
 np.int64(0),
 np.int64(0),
 np.int64(10),
 np.int64(11),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(12),
 np.int64(0),
 np.int64(12),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(10),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(10),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(11),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(12),
 np.int64(10),
 np.int64(0),
 np.int64(0),
 np.int64(12),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(10),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(10),
 np.int64(10),
 np.int64(11),
 np.int64(0),
 np.int64(0),
 n

In [35]:
ds_set

{np.int64(0),
 np.int64(19),
 np.int64(20),
 np.int64(21),
 np.int64(22),
 np.int64(23)}

In [36]:
instance_data[0]

{'name': '0_BehnkeGeiger_10_workers',
 'n_jobs': 20,
 'n_machines': 20,
 'n_operations': 100,
 'durations': array([[[23, 26, 24, ...,  0,  0,  0],
         [ 0, 30,  0, ...,  0,  0, 27],
         [28, 26, 29, ...,  0, 28, 28],
         ...,
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0]],
 
        [[ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         ...,
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0]],
 
        [[ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         ...,
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0]],
 
        ...,
 
        [[ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ...,  0,  0,  0],
         [ 0,  0,  0, ..., 