In [None]:
import pandas as pd
import numpy as np
import toml
from os.path import join
import glob
import pcoss_scheduler_pkg.problem_input as cpi
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="darkgrid")

In [None]:
jobs_max_size = 64
machines_max_size = 64
# dir_path = r'../../data/auto/'
dir_path = r'C:\git\mgr\data\auto'
# base_url = r'https://localhost:5001/Operations'
# base_url = r'https://localhost:44320/Operations'
base_url = r'https://rapid-sphinx-291111.ey.r.appspot.com/Operations'

In [None]:
sizes = lambda size: [int(2**i) for i in range(1, int(np.log2(size)+1))]
job_sizes = sizes(jobs_max_size)
machine_sizes = sizes(machines_max_size)
conflicting_machines = {
    1: [],
    2: [],
    4: [(1, 2)],
    8: [(1, 2), (5, 7)],
    16: [(1, 2), (5, 7), (9, 10), (13, 14)],
    32: [(1, 2), (5, 7), (9, 10), (13, 14), (15, 16), (17, 18), (20, 25), (27, 30)],
    64: [(1, 2), (5, 7), (9, 10), (13, 14), (15, 16), (17, 18), (20, 25), (27, 30), (33, 35), (34, 36), (39, 44), (45, 46), (48, 55), (57, 58), (60, 61), (62, 63)]
}

for jobs_cnt in job_sizes:
    for machines_cnt in machine_sizes:
        problem_name = f'{jobs_cnt}j{machines_cnt}m0'
        
        data_file_name = f'{problem_name}_data'
        pt_file_name = f'{problem_name}_pt'

        data_csv_file_path = f'{join(dir_path, data_file_name)}.csv'
        pt_csv_file_path = f'{join(dir_path, pt_file_name)}.csv'

        np.savetxt(
            data_csv_file_path,
            np.random.rand(jobs_cnt, machines_cnt),
            fmt='%.2f',
            delimiter=',')

        np.savetxt(
            pt_csv_file_path,
            np.random.rand(jobs_cnt, machines_cnt) * 10_000,
            fmt='%.2f',
            delimiter=',')

        toml_dict = {
            'title': problem_name,
            'info': f'{jobs_cnt} jobs {machines_cnt} machines example 0',
            'files':
            {
                'data': data_csv_file_path,
                'processing_times': pt_csv_file_path
            },
            'problem_data':
            {
                'index_cols': [],
                'grouping_cols': [],
                'conflicting_machines': conflicting_machines[machines_cnt],
                'base_url': base_url
            },
            'algorithm_config':
            {
                'algorithm_name': 'insertion_beam',
                'objective': 'cmax',
                'beam_width': 5
            },
            'display_config':
            {
                'print_responses': True,
                'print_method_times': False,
                'show_conflict_graph': True,
                'show_result_schedule_graph': True,
                'show_gantt_plot': True
            }
        }

        with open(join(dir_path, f'{problem_name}.toml'), 'w') as f:
            f.write(toml.dumps(toml_dict).replace(',]', ' ]'))

In [None]:
glob.glob(r'../../data/auto/*.toml')

In [None]:
op_times_df = (
    pd.read_csv('testing_times.csv', usecols=['0','1','2'])
    .rename(columns={'0': 'file', '1': 'prep_time', '2': 'max_end_time'})
)
op_times_df

In [None]:
def calc_primitive_time(row, conflicts):
    l = []
    for c1, c2 in conflicts:
        l.append(row[c1] + row[c2])
    l.append(max(row))
    return max(l)

op_times_df['jobs'] = op_times_df['file'].str.extract(r'^[^\d]+(\d+)j.*$').astype(np.int)
op_times_df['machines'] = op_times_df['file'].str.extract(r'^[^\d]+\d+j(\d+)m.*$').astype(np.int)
op_times_df['cells'] = op_times_df.jobs * op_times_df.machines
op_times_df['primitive_time'] = 0
op_times_df['seq_op_time'] = 0

for idx, o in op_times_df.iterrows():
    pf = o.file
    # print(pf)
    pi = cpi.ProblemInput.from_toml(pf)
    op_times_df.loc[idx, 'primitive_time'] = sum(pi.processing_times.apply(lambda r: calc_primitive_time(r, pi.conflicting_machines_list), axis=1))
    op_times_df.loc[idx, 'seq_op_time'] = sum(pi.processing_times.apply(lambda r: r.sum(), axis=1))

op_times_df['time_ratio'] = op_times_df.max_end_time / op_times_df.primitive_time
op_times_df['time_ratio_2'] = op_times_df.max_end_time / op_times_df.seq_op_time
op_times_df.sort_values('cells')

g = sns.relplot(x="cells", y="time_ratio", data=op_times_df, kind="line",)
g = sns.relplot(x="cells", y="time_ratio_2", data=op_times_df, kind="line",)


In [None]:
# g = sns.relplot(x="machines", y="prep_time", data=op_times_df[op_times_df.jobs==128], kind="line",)
g = sns.relplot(x="machines", y="prep_time", data=op_times_df[op_times_df.jobs==32], kind="line",)

# g.fig.autofmt_xdate()

In [None]:
g = sns.relplot(x="jobs", y="prep_time", data=op_times_df[op_times_df.machines==16], kind="line",)
# g.fig.autofmt_xdate()

In [None]:
from scipy.optimize import curve_fit
from random import random

def f(x, a, b, c):
    return b * (x**a) + c

op_times_df

p1, p2 = curve_fit(f, xdata=op_times_df[op_times_df.machines==16].jobs, ydata=op_times_df[op_times_df.machines==16].prep_time)

p1