In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pickle
import itertools as it

from collections import Counter

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import pandas as pd

class data_object(object):
    def __init__(self, name):
        self.name = name

In [2]:
plt.style.use('seaborn-poster')

In [3]:
with open('random_states_3.pickle','rb') as f:
    random_states = pickle.load(f)
random_states = {i:random_states[i] for i in random_states}

# First, let's take a look at solution statistics

In [4]:
total_runs = []
finished_initialization = []
finished_DDF_product = []
finished_revenue = []
finished_DDF_reflux = []
finished_profit_3 = []
finished_profit_3_1 = []
finished_profit_3_2 = []

with open('./log/master/master_log.txt','r') as f:
    for key, group in it.groupby(f,lambda line: line.startswith('------------')):
        if not key:
            run_data = list(group)

            # get run number
            start = 'Preset_Case_'
            end = '_'
            s = run_data[2]
            run_case = int(s[s.find(start)+len(start):s.rfind(end)])
            total_runs.append(run_case)
            
            # get status
            if 'Success: > Initialization\n' in run_data:
                finished_initialization.append(True)
            else:
                finished_initialization.append(False)
                
            if 'Success: > Added DDF formulation - Product\n' in run_data:
                finished_DDF_product.append(True)
            else:
                finished_DDF_product.append(False)
                
            if 'Success: > One-step Optimization - Revenue\n' in run_data:
                finished_revenue.append(True)
            else:
                finished_revenue.append(False)
                
            if 'Success: > Added DDF formulation - Reflux\n' in run_data:
                finished_DDF_reflux.append(True)
            else:
                finished_DDF_reflux.append(False)
                
            if 'Success: > One-step Optimization - Profit 3\n' in run_data:
                finished_profit_3.append(True)
            else:
                finished_profit_3.append(False)
            
            if 'Success: > One-step Optimization - Profit 3-1\n' in run_data:
                finished_profit_3_1.append(True)
            else:
                finished_profit_3_1.append(False)
                
            if 'Success: > One-step Optimization - Profit 3-2\n' in run_data:
                finished_profit_3_2.append(True)
            else:
                finished_profit_3_2.append(False)
                
finished_initialization = np.array(finished_initialization)
finished_DDF_product = np.array(finished_DDF_product)
finished_revenue = np.array(finished_revenue)
finished_DDF_reflux = np.array(finished_DDF_reflux)
finished_profit_3 = np.array(finished_profit_3)
finished_profit_3_1 = np.array(finished_profit_3_1)
finished_profit_3_2 = np.array(finished_profit_3_2)

finished_optimization_any = np.any([finished_revenue,finished_profit_3,finished_profit_3_1,finished_profit_3_2],axis=0)
finished_optimization_all = np.all([finished_revenue,finished_profit_3,finished_profit_3_1,finished_profit_3_2],axis=0)

In [5]:
initialization_failure_case = [total_runs[j_] for j_, j in enumerate(finished_initialization) if not j]
DDF_product_failure_case = [total_runs[j_] for j_, j in enumerate(finished_DDF_product) if not j]
revenue_failure_case = [total_runs[j_] for j_, j in enumerate(finished_revenue) if not j]
DDF_reflux_failure_case = [total_runs[j_] for j_, j in enumerate(finished_DDF_reflux) if not j]
profit_3_failure_case = [total_runs[j_] for j_, j in enumerate(finished_profit_3) if not j]
profit_3_1_failure_case = [total_runs[j_] for j_, j in enumerate(finished_profit_3_1) if not j]
profit_3_2_failure_case = [total_runs[j_] for j_, j in enumerate(finished_profit_3_2) if not j]

## Total number of attempts

In [6]:
len(total_runs)

995

In [7]:
'invalid cases: ', set(i for i in random_states) - set(total_runs)

('invalid cases: ', {591, 809, 893, 967, 996})

## Successfully initialized

In [8]:
sum(finished_initialization), sum(finished_initialization) / len(total_runs)

(899, 0.9035175879396985)

## Successfully adopted product DDF formulation

In [9]:
sum(finished_DDF_product), sum(finished_DDF_product) / sum(finished_initialization)

(880, 0.978865406006674)

## Successfully reached revenue optimum solution

In [10]:
sum(finished_revenue), sum(finished_revenue) / sum(finished_DDF_product)

(564, 0.6409090909090909)

## Successfully adopted reflux DDF formulation

In [11]:
sum(finished_DDF_reflux), sum(finished_DDF_reflux) / sum(finished_revenue)

(564, 1.0)

## Successfully reached profit optimum solution: profit-3

In [12]:
sum(finished_profit_3), sum(finished_profit_3) / sum(finished_DDF_reflux)

(562, 0.9964539007092199)

### Increasing tray cost weight : profit-3-1

In [13]:
sum(finished_profit_3_1), sum(finished_profit_3_1) / sum(finished_DDF_reflux)

(550, 0.975177304964539)

### Increasing feed cost weight: profit-3-2

In [14]:
sum(finished_profit_3_2), sum(finished_profit_3_2) / sum(finished_DDF_reflux)

(552, 0.9787234042553191)

# Different starting points doesn't seem to drastically affect success rate

In [None]:
def divide_data(x):
    x = np.array(x)
    return x, x[finished_initialization], x[finished_optimization_any]

In [None]:
reflux_ratio_all = np.array([random_states[i].rr_ratio for i in random_states if i in total_runs])
intermediate_location_all = np.array([sorted(random_states[i].side_draw_flag.keys())[0] for i in random_states if i in total_runs])
intermediate_draw_all = np.array([list(random_states[i].side_draw_flag.values())[0] for i in random_states if i in total_runs])
gasoline_location_all = np.array([sorted(random_states[i].side_draw_flag.keys())[1] for i in random_states if i in total_runs])
gasoline_draw_all = np.array([list(random_states[i].side_draw_flag.values())[1] for i in random_states if i in total_runs])
diesel_location_all = np.array([sorted(random_states[i].side_draw_flag.keys())[2] for i in random_states if i in total_runs])
diesel_draw_all = np.array([list(random_states[i].side_draw_flag.values())[2] for i in random_states if i in total_runs])

fig, axs = plt.subplots(7,1,figsize=(16,14))

tmp_dic = {0:(reflux_ratio_all,'Reflux',(0.05,0.15),10),
          1:(intermediate_location_all,'Heavy naphtha Tray',(1,2),2),
          2:(intermediate_draw_all,'Heavy naphtha Draw',(0.01,0.03),10),
          3:(gasoline_location_all,'Gasoline Tray',(3,9),7),
          4:(gasoline_draw_all,'Gasoline Draw',(0.1,0.3),10),
          5:(diesel_location_all,'Diesel Tray',(10,18),9),
          6:(diesel_draw_all,'Diesel Draw',(0.2,0.5),10)}

for j in range(7):
    y, bins, patches = axs[j].hist((divide_data(tmp_dic[j][0])),bins=tmp_dic[j][3],histtype='bar',range=tmp_dic[j][2],alpha=0.7)
    ymax = max(y[0])
    axs[j].set_ylim(bottom=0,top=2*ymax)

    bincenters = 0.5*(bins[1:]+bins[:-1])
    ratio_init = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[1],y[0])]
    ratio_opt = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[2],y[1])]

    ax_ = plt.twinx(axs[j])
    ax_.plot(bincenters,ratio_init,'C1:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    ax_.plot(bincenters,ratio_opt,'C2:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    ax_.set_ylim(0,1.2)

    # ax.grid()
    axs[j].set_title(tmp_dic[j][1])

fig.legend(['Set-Point','Initialization','Optimized'])
plt.tight_layout()
plt.show()
plt.close()

In [None]:
reactive_stages = random_states[1].temperature_flag.keys()

In [None]:
temperature_all = {j:np.array([random_states[i].temperature_flag[j] for i in random_states if i in total_runs])\
                   for j in reactive_stages}

fig, axs = plt.subplots(len(reactive_stages),2,figsize=(16,14))

for j_, j in enumerate(reactive_stages):
    y, bins, patches = axs[j_][0].hist(divide_data(temperature_all[j]),bins=10,histtype='bar',range=(220,280),alpha=0.7)
    ymax = max(y[0])
    # ax.set_ylim(bottom=0,top=2*ymax)

    bincenters = 0.5*(bins[1:]+bins[:-1])
    ratio_init = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[1],y[0])]
    ratio_opt = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[2],y[1])]

    axs[j_][1].plot(bincenters,ratio_init,'C1:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    axs[j_][1].plot(bincenters,ratio_opt,'C2:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    axs[j_][1].set_ylim(0,1.2)
    axs[j_][1].set_xlim(220,280)

axs[0][0].set_title('Temperature')
axs[0][0].legend(['Set-Point','Initialization','Optimization'])

plt.tight_layout()
plt.show()
plt.close()

# Strong 1-sided start seems to increase success rate

In [None]:
catalyst_all = {j:np.array([random_states[i].catalyst_flag[j] for i in random_states if i in total_runs])\
                   for j in reactive_stages}

fig, axs = plt.subplots(len(reactive_stages),2,figsize=(16,14))

for j_, j in enumerate(reactive_stages):
    y, bins, patches = axs[j_][0].hist(divide_data(catalyst_all[j]),bins=10,histtype='bar',range=(0,8000),alpha=0.7)
    ymax = max(y[0])
    # ax.set_ylim(bottom=0,top=2*ymax)

    bincenters = 0.5*(bins[1:]+bins[:-1])
    ratio_init = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[1],y[0])]
    ratio_opt = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[2],y[1])]

    axs[j_][1].plot(bincenters,ratio_init,'C1:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    axs[j_][1].plot(bincenters,ratio_opt,'C2:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    axs[j_][1].set_ylim(0,1.2)
    axs[j_][1].set_xlim(0,8000)

axs[0][0].set_title('catalyst')
axs[0][0].legend(['Set-Point','Initialization','Optimization'])

plt.tight_layout()
plt.show()
plt.close()

In [None]:
feed_all = {j:np.array([random_states[i].feed_flag[j] for i in random_states if i in total_runs])\
                   for j in reactive_stages}

fig, axs = plt.subplots(len(reactive_stages),2,figsize=(16,14))

for j_, j in enumerate(reactive_stages):
    y, bins, patches = axs[j_][0].hist(divide_data(feed_all[j]),bins=10,histtype='bar',range=(0,3),alpha=0.7)
    ymax = max(y[0])
    # ax.set_ylim(bottom=0,top=2*ymax)

    bincenters = 0.5*(bins[1:]+bins[:-1])
    ratio_init = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[1],y[0])]
    ratio_opt = [float('nan') if j==0 or i==0 else i/j for i,j in zip(y[2],y[1])]

    axs[j_][1].plot(bincenters,ratio_init,'C1:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    axs[j_][1].plot(bincenters,ratio_opt,'C2:o',markeredgecolor='w',markersize=12,markeredgewidth = 1)
    axs[j_][1].set_ylim(0,1.2)
    axs[j_][1].set_xlim(0,3)

axs[0][0].set_title('feed')
axs[0][0].legend(['Set-Point','Initialization','Optimization'])

plt.tight_layout()
plt.show()
plt.close()

# Reduce dimension for visulization using PCA, uniform distribution, expect to yield little capture

In [None]:
x_data = np.stack((
    reflux_ratio_all,
    intermediate_location_all,
    intermediate_draw_all,
    gasoline_location_all,
    gasoline_draw_all,
    diesel_location_all,
    diesel_draw_all,
    *[temperature_all[j] for j in temperature_all],
    *[catalyst_all[j] for j in temperature_all],
    *[feed_all[j] for j in temperature_all]
),axis=1)

## 46 features

In [None]:
x_data.shape

In [None]:
center_scaled = StandardScaler().fit_transform(x_data)
pca = PCA(n_components=10)
scores = pca.fit_transform(center_scaled)

### Eigenvalues

In [None]:
pca.explained_variance_

### Captured Covariance

In [None]:
pca.explained_variance_ratio_

### Loadings - PC1 - Mostly independent, except temperature, which have same signs, meaning one temperature increases, the other ones increases, this response is modelled by sorting temperature from low to high

In [None]:
fig, axs = plt.subplots(figsize=(16,9))
x_loc = np.arange(1,len(pca.components_[0])+1)
grouping = {'Reflux':slice(0,1),'Product':slice(1,7),'Temperature':slice(7,20),\
            'Catalyst':slice(20,33),'Feed':slice(33,None)}

for key in grouping:
    axs.bar(x_loc[grouping[key]],pca.components_[0][grouping[key]])
axs.set_xticks([1,4.5,14,27,40])
axs.set_xticklabels(list(grouping.keys()))

axs.set_xlabel('Variable')
axs.set_ylabel('Loading')
plt.show()
plt.close()

### Scores - Success rate is not biased towards certain combination, uniform distributed set of initialized solutions for optimization multi-start

In [None]:
fig, axs = plt.subplots(figsize=(16,9))

targets = ['Random Start', 'Initialization Complete', 'Optimized Any', 'Optimized All']
colors = ['C1', 'C2', 'C3','C4']
masks = {'Random Start':True,\
         'Initialization Complete':finished_initialization,\
         'Optimized Any':finished_optimization_any,\
         'Optimized All':finished_optimization_all}

for target, color in zip(targets,colors):
    mask = masks[target]
    axs.scatter(scores[:,0][mask],scores[:,1][mask],color=color,label=target)

axs.legend()
axs.set_xlabel('Principal Component 1')
axs.set_ylabel('Principal Component 2')

plt.show()
plt.close()

# Second, let's take a look at cause of failure and performance

In [None]:
# initialization_failure_case
# DDF_product_failure_case
# revenue_failure_case
# DDF_reflux_failure_case
# profit_3_failure_case
# profit_3_1_failure_case
# profit_3_1_failure_case

In [None]:
failure_cause_list = []

for j in DDF_product_failure_case:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()
        for line in reversed(log_content):
            if line.startswith('>') or 'Working on' in line:
                break_cause = line.replace('> ','')
                failure_cause_list.append(break_cause)
                break

# print(len(failure_cause_list))

### Combine failure causes

In [None]:
failure_short = []
for line in failure_cause_list:
    
    if 'temperature' in line:
        start = 'Working on adjusting '
        end = 'temperature'
        failure_short.append(line[line.find(start) + len(start) : line.rfind(end)] + 'Temp')
    
    elif 'changing Q' in line:
        start = 'Working on '
        end = ','
        failure_short.append(line[line.find(start) + len(start) : line.rfind(end)] + ' duty')
    
    elif 'catalyst and feed' in line:
        start = 'alpha = '
        end = ':'
        failure_short.append('alpha: ' + line[line.find(start) + len(start) : line.rfind(end)] + ' feed/catalyst')
    
    elif 'Reflux' in line:
        start = 'PR ratio = '
        end = '\n'
        failure_short.append('PR-ratio: '+ line[line.find(start) + len(start) : line.rfind(end)] + ' reflux')
    
    elif 'Connect' in line:
        failure_short.append('stages 20 connect')
        
    elif 'DDF' in line:
        failure_short.append('add 3 DDF')
    
    else:
        failure_short.append(line.replace('\n',''))

In [None]:
failure_counts = Counter(failure_short)
failure_counts = sorted(dict(failure_counts).items(),key=lambda x: (x[0].split(' ')[-1],float(x[0].split(' ')[1])),reverse=False)
       
failure_dic = {}
for key, group in it.groupby(failure_counts,lambda pair: pair[0].split()[-1]):
    failure_dic[key] = list(group)

In [None]:
# failure_counts

In [None]:
failure_dic

In [None]:
sum(i[1] for i in failure_counts)

## Initialization failures

In [None]:
fig, axs = plt.subplots(figsize=(16,9))
loc = 0
names = []
for key in failure_dic.keys():
    frequencies = [j for i,j in failure_dic[key]]
#     names += [i[:i.rfind(' ')] for i,j in failure_dic[key]]
    
    for i,j in failure_dic[key]:
        if i == 'add 3 DDF':
            names += ['DDF']
        elif i == 'stages 20 connect':
            names += ['connect']
        else:
            names += [i[:i.rfind(' ')]]

    x_coordinates = np.arange(loc,len(failure_dic[key])+loc)
    axs.bar(x_coordinates, frequencies, align='center',label = key)
    loc += len(failure_dic[key])

axs.set_xticks(np.arange(loc))
axs.set_xticklabels(names,rotation=45,ha='right')

axs.set_ylabel('Occurrences')
axs.legend()
axs.set_title('Causes of Failure')
plt.show()

## Optimization Failure

In [33]:
revenue_fail_list = []
profit_3_fail_list = []
profit_3_1_fail_list = []
profit_3_2_fail_list = []

restoration_fail_case =[]

for j in [j for j in revenue_failure_case if j not in DDF_product_failure_case]:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()

        for line in reversed(log_content):
            if line.startswith('EXIT:'):
                if 'Optimal Solution Found' in line:
                    revenue_fail_list.append('Restoration Failed')
                    restoration_fail_case.append(j)
                else:
                    revenue_fail_list.append(line.replace('EXIT: ','').replace('\n',''))
                break


for j in [j for j in profit_3_failure_case if j not in DDF_reflux_failure_case]:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()

        for line in reversed(log_content):
            if line.startswith('EXIT:'):
                if 'Optimal Solution Found' in line:
                    profit_3_fail_list.append('Maximum Number of Iterations Exceeded.')
                else:
                    profit_3_fail_list.append(line.replace('EXIT: ','').replace('\n',''))
                break

        
for j in [j for j in profit_3_1_failure_case if j not in DDF_reflux_failure_case]:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()

        for line in reversed(log_content):
            if line.startswith('EXIT:'):
                if 'Optimal Solution Found' in line:
                    profit_3_1_fail_list.append('Maximum Number of Iterations Exceeded.')
                else:
                    profit_3_1_fail_list.append(line.replace('EXIT: ','').replace('\n',''))
                break


for j in [j for j in profit_3_2_failure_case if j not in DDF_reflux_failure_case]:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()

        for line in reversed(log_content):
            if line.startswith('EXIT:'):
                if 'Optimal Solution Found' in line:
                    profit_3_2_fail_list.append('Restoration Failed')
                    restoration_fail_case.append(j)
                else:
                    profit_3_2_fail_list.append(line.replace('EXIT: ','').replace('\n',''))
                break

In [34]:
Counter(revenue_fail_list), Counter(profit_3_fail_list), Counter(profit_3_1_fail_list), Counter(profit_3_2_fail_list) 

(Counter({'Maximum Number of Iterations Exceeded.': 304,
          'Restoration Failed': 11,
          'Converged to a point of local infeasibility. Problem may be infeasible.': 1}),
 Counter({'Maximum Number of Iterations Exceeded.': 2}),
 Counter({'Maximum Number of Iterations Exceeded.': 14}),
 Counter({'Restoration Failed': 1,
          'Maximum Number of Iterations Exceeded.': 11}))

In [None]:
# # j_list = [603, 712]
# j_list = [34, 47, 63, 133, 222, 251, 334, 383, 643, 832, 858, 915, 944, 988]
# for case in j_list:
#     file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(case)
#     with open('./log/text/'+file_name) as f:
#         log_content = f.readlines()

#         for j, line in enumerate(reversed(log_content)):
#             if '> One-step Optimization - Profit 3-1' in line:
#                 print('case-{}: True'.format(case))
#                 break
#         print(log_content[-j+46])

# Lastly, optimization results

In [None]:
revenue_obj_list = []
revenue_case_list = []
revenue_opt_results = []
revenue_opt_origin_results = []

for j_,j in enumerate(total_runs):
    if finished_revenue[j_]:
        file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
        case = j
        with open('./log/text/'+file_name) as f:
            log_content = f.readlines()
            
            for j, line in enumerate(reversed(log_content)):
                # get the right position, i know it is bad coding, but dont want make too much changes to the old code
                if '> One-step Optimization - Revenue' in line:
                    starter_position = j - 50            
            
            
            for j, line in enumerate(reversed(log_content)):

                if j <= starter_position:
                    continue
                
                if line.startswith('obj'):
                    start = 'obj '
                    end = '\n'
                    string = line[line.find(start)+len(start):line.rfind(end)]
                    string = string.strip()
                    revenue_obj_list.append(float(string))
                    revenue_case_list.append(case)
                    break
            
            revenue_opt_results.append([log_content[-j+4]]+log_content[-j+14:-j+27]+log_content[-j+32:-j+37])
            revenue_opt_origin_results.append([log_content[-j-47]]+log_content[-j-37:-j-24]+log_content[-j-19:-j-14])
            
revenue_opt_results = [[data.split() for data in case] for case in revenue_opt_results]
revenue_opt_origin_results = [[data.split() for data in case] for case in revenue_opt_origin_results]

In [None]:
profit_3_obj_list = []
profit_3_case_list = []
profit_3_opt_results = []
profit_3_opt_origin_results = []

for j_,j in enumerate(total_runs):
    if finished_profit_3[j_]:
        file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
        case = j
        with open('./log/text/'+file_name) as f:
            log_content = f.readlines()
            
            for j, line in enumerate(reversed(log_content)):
                # get the right position, i know it is bad coding, but dont want make too much changes to the old code
                if '> One-step Optimization - Profit 3' in line:
                    starter_position = j - 50            
            
            
            for j, line in enumerate(reversed(log_content)):

                if j <= starter_position:
                    continue
                
                if line.startswith('obj'):
                    start = 'obj '
                    end = '\n'
                    string = line[line.find(start)+len(start):line.rfind(end)]
                    string = string.strip()
                    profit_3_obj_list.append(float(string))
                    profit_3_case_list.append(case)
                    break
            profit_3_opt_results.append([log_content[-j+4]]+log_content[-j+14:-j+27]+log_content[-j+32:-j+39])
            profit_3_opt_origin_results.append([log_content[-j-47]]+log_content[-j-37:-j-24]+log_content[-j-19:-j-14])
            
profit_3_opt_results = [[data.split() for data in case] for case in profit_3_opt_results]
profit_3_opt_origin_results = [[data.split() for data in case] for case in profit_3_opt_origin_results]

In [None]:
profit_3_1_obj_list = []
profit_3_1_case_list = []
profit_3_1_opt_results = []
profit_3_1_opt_origin_results = []

for j_,j in enumerate(total_runs):
    if finished_profit_3_1[j_]:
        file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
        case = j
        with open('./log/text/'+file_name) as f:
            log_content = f.readlines()
            
            for j, line in enumerate(reversed(log_content)):
                # get the right position, i know it is bad coding, but dont want make too much changes to the old code
                if '> One-step Optimization - Profit 3-1' in line:
                    starter_position = j - 50            
            
            
            for j, line in enumerate(reversed(log_content)):

                if j <= starter_position:
                    continue
                
                if line.startswith('obj'):
                    start = 'obj '
                    end = '\n'
                    string = line[line.find(start)+len(start):line.rfind(end)]
                    string = string.strip()
                    profit_3_1_obj_list.append(float(string))
                    profit_3_1_case_list.append(case)
                    break
            profit_3_1_opt_results.append([log_content[-j+4]]+log_content[-j+14:-j+27]+log_content[-j+32:-j+39])
            profit_3_1_opt_origin_results.append([log_content[-j-100]]+log_content[-j-90:-j-77]+log_content[-j-72:-j-67])

            
profit_3_1_opt_results = [[data.split() for data in case] for case in profit_3_1_opt_results]
profit_3_1_opt_origin_results = [[data.split() for data in case] for case in profit_3_1_opt_origin_results]

In [None]:
profit_3_2_obj_list = []
profit_3_2_case_list = []
profit_3_2_opt_results = []
profit_3_2_opt_origin_results = []

for j_,j in enumerate(total_runs):
    if finished_profit_3_2[j_]:
        file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
        case = j
        with open('./log/text/'+file_name) as f:
            log_content = f.readlines()
            
            for j, line in enumerate(reversed(log_content)):
                # get the right position, i know it is bad coding, but dont want make too much changes to the old code
                if '> One-step Optimization - Profit 3-2' in line:
                    starter_position = j - 50            
            
            
            for j, line in enumerate(reversed(log_content)):

                if j <= starter_position:
                    continue
                
                if line.startswith('obj'):
                    start = 'obj '
                    end = '\n'
                    string = line[line.find(start)+len(start):line.rfind(end)]
                    string = string.strip()
                    profit_3_2_obj_list.append(float(string))
                    profit_3_2_case_list.append(case)
                    break
            profit_3_2_opt_results.append([log_content[-j+4]]+log_content[-j+14:-j+27]+log_content[-j+32:-j+37])
            profit_3_2_opt_origin_results.append([log_content[-j-153]]+log_content[-j-143:-j-130]+log_content[-j-125:-j-120])

profit_3_2_opt_results = [[data.split() for data in case] for case in profit_3_2_opt_results]
profit_3_2_opt_origin_results = [[data.split() for data in case] for case in profit_3_2_opt_origin_results]

In [None]:
revenue_case_list[np.argmax(revenue_obj_list)], max(revenue_obj_list)

In [None]:
profit_3_case_list[np.argmax(profit_3_obj_list)], max(profit_3_obj_list)

In [None]:
profit_3_1_case_list[np.argmax(profit_3_1_obj_list)], max(profit_3_1_obj_list)

In [None]:
profit_3_2_case_list[np.argmax(profit_3_2_obj_list)], max(profit_3_2_obj_list)

In [None]:
fig, axs = plt.subplots(figsize=(16,9))
y, bins, patches = axs.hist(revenue_obj_list,bins=50,histtype='bar',alpha=0.7)
axs.set_title('Obj - revenue')

fig, axs = plt.subplots(figsize=(16,9))
y, bins, patches = axs.hist(profit_3_obj_list,bins=50,histtype='bar',alpha=0.7)
axs.set_title('Obj - profit-3')

fig, axs = plt.subplots(figsize=(16,9))
y, bins, patches = axs.hist(profit_3_1_obj_list,bins=50,histtype='bar',alpha=0.7)
axs.set_title('Obj - profit-3-1')

fig, axs = plt.subplots(figsize=(16,9))
y, bins, patches = axs.hist(profit_3_2_obj_list,bins=50,histtype='bar',alpha=0.7)
axs.set_title('Obj - profit-3-2')

plt.show()

---

# Optimization result: obj-revenue

### Remove duplicates and sort

In [None]:
revenue_opt_results_matrix = np.stack((
    [float(case[0][-3])/float(case[0][-2]) for case in revenue_opt_results],
    [float(case[-1][-1]) for case in revenue_opt_results], # intermediate location
    [float(case[-1][-8]) for case in revenue_opt_results], # intermediate draw
    [float(case[-4][-1]) for case in revenue_opt_results], # gasoline location
    [float(case[-4][-8]) for case in revenue_opt_results], # gasoline draw
    [float(case[-3][-1]) for case in revenue_opt_results], # diesel location
    [float(case[-3][-8]) for case in revenue_opt_results], # diesel draw
#     [float(case[-5][-8]) for case in revenue_opt_results], # naphtha draw
#     [float(case[-2][-8]) for case in revenue_opt_results], # heavy draw
    *np.array([[float(obv[1]) for obv in case[1:14]] for case in revenue_opt_results]).T,
    *np.array([[float(obv[6]) for obv in case[1:14]] for case in revenue_opt_results]).T,
    *np.array([[float(obv[5]) for obv in case[1:14]] for case in revenue_opt_results]).T,
    [obj for obj in revenue_obj_list]
),axis=1)

In [None]:
revenue_opt_origin_matrix = np.stack((
    [float(case[0][-3])/float(case[0][-2]) for case in revenue_opt_origin_results],
    [float(case[-1][-1]) for case in revenue_opt_origin_results], # intermediate location
    [float(case[-1][-8]) for case in revenue_opt_origin_results], # intermediate draw
    [float(case[-4][-1]) for case in revenue_opt_origin_results], # gasoline location
    [float(case[-4][-8]) for case in revenue_opt_origin_results], # gasoline draw
    [float(case[-3][-1]) for case in revenue_opt_origin_results], # diesel location
    [float(case[-3][-8]) for case in revenue_opt_origin_results], # diesel draw
#     [float(case[-5][-8]) for case in revenue_opt_origin_results], # naphtha draw
#     [float(case[-2][-8]) for case in revenue_opt_origin_results], # heavy draw
    *np.array([[float(obv[1]) for obv in case[1:14]] for case in revenue_opt_origin_results]).T,
    *np.array([[float(obv[6]) for obv in case[1:14]] for case in revenue_opt_origin_results]).T,
    *np.array([[float(obv[5]) for obv in case[1:14]] for case in revenue_opt_origin_results]).T,
    [obj for obj in revenue_obj_list]
),axis=1)

In [None]:
revenue_opt_results_unique_unsorted, revenue_unique_index, revenue_unique_counts = \
np.unique(revenue_opt_results_matrix,axis=0,return_index=True,return_counts=True)

In [None]:
revenue_unique_global_index = [np.array(total_runs)[finished_revenue][j] for j in revenue_unique_index]
revenue_opt_results_unique = np.array(sorted(revenue_opt_results_unique_unsorted,key=lambda x: x[-1]))

In [None]:
pd.options.display.precision = 4
pd.DataFrame(sorted([i for i in zip(revenue_opt_results_unique_unsorted[:,-1],revenue_unique_counts,revenue_unique_global_index)],\
                    key=lambda x: x[0]),columns=['Objective Value','Occurances','Example Index'])

### Raw Data

In [None]:
pd.options.display.expand_frame_repr = False
pd.options.display.max_colwidth = 80000
pd.options.display.max_columns = None
pd.options.display.max_rows = 80000

In [None]:
pd.options.display.precision = 0
pd.DataFrame(revenue_opt_results_unique[::4,7:20].T,index=['Tem{}'.format(j) for j in range(8,21)])

In [None]:
pd.options.display.precision = 0
pd.DataFrame(revenue_opt_results_unique[::4,20:33].T,index=['Catalyst {}'.format(j) for j in range(8,21)])

In [None]:
pd.options.display.precision = 2
pd.DataFrame(revenue_opt_results_unique[::4,33:-1].T,index=['Feed {}'.format(j) for j in range(8,21)]).round(2)

# Analysis

In [None]:
center_scaled = StandardScaler().fit_transform(revenue_opt_results_unique[::4,:-1])
pca = PCA(n_components=10)
scores = pca.fit_transform(center_scaled)

### Eigenvalues

In [None]:
pca.explained_variance_

### Captured Covariance

In [None]:
pca.explained_variance_ratio_

In [None]:
sum(pca.explained_variance_ratio_[:5])

### Corrolation between PC1 and optimized objective value?

In [None]:
fig, axs = plt.subplots(2,1,figsize=(16,9))

'''
Scores
'''
x_loc = np.arange(1,len(scores[:,0])+1)
axs[0].bar(x_loc,scores[:,1],color = 'C1')

axs_ = axs[0].twinx()
axs_.plot(x_loc,revenue_opt_results_unique[::4,-1],'C2:o',markeredgecolor='w',markeredgewidth = 1,markersize=12)



# axs[1].legend()
axs[0].set_xlabel('Local Minimums')
axs[0].set_ylabel('Score - PC1')
axs_.set_ylabel('Revenue')

'''
Loading
'''
x_loc = np.arange(1,len(pca.components_[0])+1)
grouping = {'Reflux':slice(0,1),'Product':slice(1,7),'Temperature':slice(7,20),\
            'Catalyst':slice(20,33),'Feed':slice(33,None)}

for key in grouping:
    axs[1].bar(x_loc[grouping[key]],pca.components_[0][grouping[key]])
axs[1].set_xticks([1,4.5,14,27,40])
axs[1].set_xticklabels(list(grouping.keys()))

axs[1].set_ylabel('Loading - PC1')

plt.tight_layout()
plt.show()
plt.close()

### Predictions using pre-optimization data

In [None]:
revenue_opt_origin_scaled = StandardScaler().fit_transform(revenue_opt_origin_matrix[:,:-1])

In [None]:
predicted_scores = pca.transform(revenue_opt_origin_scaled)

In [None]:
fig, axs = plt.subplots(figsize=(16,9))

targets = ['Bad','Middle','Good','Super']
colors = ['C1', 'C2','C3','C4']
masks = {'Bad':[[0,21.54],slice(0,10),80],\
         'Middle':[[21.55,21.80],slice(10,20),10],\
         'Good':[[21.81,22.07],slice(20,60),10],\
         'Super':[[22.08,22.33],slice(60,87),80]}

for target, color in zip(targets,colors):
    mask = masks[target]
    axs.scatter(scores[:,0][mask[1]],scores[:,1][mask[1]],color=color,label=target)
    tmp = [j for j,run in enumerate(revenue_opt_results_matrix) if run[-1] >= mask[0][0] and run[-1] <= mask[0][1]]

axs.legend()
axs.set_xlabel('Principal Component 1')
axs.set_ylabel('Principal Component 2')

plt.show()
plt.close()

### Separating between "Middle" and "Good" will involve 3rd and 4th component?

In [None]:
fig, axs = plt.subplots(figsize=(16,9))

targets = ['Middle','Good']
colors = ['C2','C3']
masks = {'Middle':[[21.55,21.80],slice(20,60),20],\
         'Good':[[21.81,22.07],slice(60,87),20]}

for target, color in zip(targets,colors):
    mask = masks[target]
    axs.scatter(scores[:,2][mask[1]],scores[:,3][mask[1]],color=color,label=target)
    tmp = [j for j,run in enumerate(revenue_opt_results_matrix) if run[-1] >= mask[0][0] and run[-1] <= mask[0][1]]

axs.legend()
axs.set_xlabel('Principal Component 3')
axs.set_ylabel('Principal Component 4')

plt.show()
plt.close()

---

# Optimization results: profit-3

### Remove duplicates and sort

In [None]:
pd.options.display.max_colwidth = 80
pd.options.display.max_rows = 80

In [None]:
profit_3_opt_results_matrix = np.stack((
    [float(case[0][-3])/float(case[0][-2]) for case in profit_3_opt_results],
    [float(case[-3][-1]) for case in profit_3_opt_results], # intermediate location
    [float(case[-3][-8]) for case in profit_3_opt_results], # intermediate draw
    [float(case[-6][-1]) for case in profit_3_opt_results], # gasoline location
    [float(case[-6][-8]) for case in profit_3_opt_results], # gasoline draw
    [float(case[-5][-1]) for case in profit_3_opt_results], # diesel location
    [float(case[-5][-8]) for case in profit_3_opt_results], # diesel draw
#     [float(case[-7][-8]) for case in profit_3_opt_results], # naphtha draw
#     [float(case[-4][-8]) for case in profit_3_opt_results], # heavy draw
    *np.array([[float(obv[1]) for obv in case[1:14]] for case in profit_3_opt_results]).T, # Temperature
    *np.array([[float(obv[6]) for obv in case[1:14]] for case in profit_3_opt_results]).T, # Catalyst
    *np.array([[float(obv[5]) for obv in case[1:14]] for case in profit_3_opt_results]).T, # Feed
    [float(case[-2][-1]) for case in profit_3_opt_results], # Reflux tray
    [float(case[-1][-1]) for case in profit_3_opt_results], # Total feed
    [obj for obj in profit_3_obj_list]
),axis=1)

In [None]:
profit_3_opt_origin_matrix = np.stack((
    [float(case[0][-3])/float(case[0][-2]) for case in profit_3_opt_origin_results],
    [float(case[-1][-1]) for case in profit_3_opt_origin_results], # intermediate location
    [float(case[-1][-8]) for case in profit_3_opt_origin_results], # intermediate draw
    [float(case[-4][-1]) for case in profit_3_opt_origin_results], # gasoline location
    [float(case[-4][-8]) for case in profit_3_opt_origin_results], # gasoline draw
    [float(case[-3][-1]) for case in profit_3_opt_origin_results], # diesel location
    [float(case[-3][-8]) for case in profit_3_opt_origin_results], # diesel draw
#     [float(case[-5][-8]) for case in profit_3_opt_origin_results], # naphtha draw
#     [float(case[-2][-8]) for case in profit_3_opt_origin_results], # heavy draw
    *np.array([[float(obv[1]) for obv in case[1:14]] for case in profit_3_opt_origin_results]).T, # Temperature
    *np.array([[float(obv[6]) for obv in case[1:14]] for case in profit_3_opt_origin_results]).T, # Catalyst
    *np.array([[float(obv[5]) for obv in case[1:14]] for case in profit_3_opt_origin_results]).T, # Feed
    [obj for obj in profit_3_obj_list]
),axis=1)

In [None]:
profit_3_opt_results_unique_unsorted, profit_3_unique_index, profit_3_unique_counts = np.unique(profit_3_opt_results_matrix,axis=0,return_index=True,return_counts=True)

In [None]:
profit_3_unique_global_index = [np.array(total_runs)[finished_profit_3][j] for j in profit_3_unique_index]
profit_3_opt_results_unique = np.array(sorted(profit_3_opt_results_unique_unsorted,key=lambda x: x[-1]))

In [None]:
pd.options.display.precision = 4
pd.DataFrame(sorted([i for i in zip(profit_3_opt_results_unique_unsorted[:,-1],profit_3_unique_counts,profit_3_unique_global_index)],\
                    key=lambda x: x[0]),columns=['Objective Value','Occurances','Example Index'])

### Raw Data

In [None]:
pd.options.display.expand_frame_repr = False
pd.options.display.max_colwidth = 80000
pd.options.display.max_columns = None
pd.options.display.max_rows = 80000

In [None]:
pd.options.display.precision = 0
pd.DataFrame(profit_3_opt_results_unique[::4,7:20].T,index=['Tem{}'.format(j) for j in range(8,21)])

In [None]:
pd.options.display.precision = 0
pd.DataFrame(profit_3_opt_results_unique[::4,20:33].T,index=['Catalyst {}'.format(j) for j in range(8,21)])

In [None]:
pd.options.display.precision = 2
pd.DataFrame(profit_3_opt_results_unique[::4,33:-3].T,index=['Feed {}'.format(j) for j in range(8,21)]).round(2)

# Analysis

In [None]:
center_scaled = StandardScaler().fit_transform(profit_3_opt_results_unique[::4,:-1])
pca = PCA(n_components=10)
scores = pca.fit_transform(center_scaled)

### Eigenvalues

In [None]:
pca.explained_variance_

### Captured Covariance

In [None]:
pca.explained_variance_ratio_

In [None]:
sum(pca.explained_variance_ratio_[:5])

### Corrolation between PC1 and optimized objective value?

In [None]:
fig, axs = plt.subplots(2,1,figsize=(16,9))

'''
Scores
'''
x_loc = np.arange(1,len(scores[:,0])+1)
axs[0].bar(x_loc,scores[:,0],color = 'C1')

axs_ = axs[0].twinx()
axs_.plot(x_loc,profit_3_opt_results_unique[::4,-1],'C2:o',markeredgecolor='w',markeredgewidth = 1,markersize=12)

# axs[1].legend()
axs[0].set_xlabel('Local Minimums')
axs[0].set_ylabel('Score - PC1')
axs_.set_ylabel('Profit-3')

'''
Loading
'''
x_loc = np.arange(1,len(pca.components_[0])+1)
grouping = {'Reflux':slice(0,1),'Product':slice(1,7),'Temperature':slice(7,20),\
            'Catalyst':slice(20,33),'Feed':slice(33,46),'R-Tray':slice(46,47),'T-Feed':slice(47,48)}

for key in grouping:
    axs[1].bar(x_loc[grouping[key]],pca.components_[0][grouping[key]])
axs[1].set_xticks([1,4.5,14,27,40,47,48])
axs[1].set_xticklabels(list(grouping.keys()))

axs[1].set_ylabel('Loading - PC1')

plt.tight_layout()
plt.show()
plt.close()

---

# Average Temp Comparison: Sucess vs Failure

In [None]:
revenue_failure_case_ = [case for case in revenue_failure_case if case not in DDF_product_failure_case]

In [None]:
revenue_success_case_ = [total_runs[j_] for j_, j in enumerate(finished_revenue) if j]

In [None]:
initial_temp_revenue_failure = []

for j in revenue_failure_case_:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()
            
        for j, line in enumerate(reversed(log_content)):
            if '> Added DDF formulation - Product' in line:
                break
        initial_temp_revenue_failure.append(log_content[-j+15:-j+28])
            
initial_temp_revenue_failure = [[data.split() for data in case] for case in initial_temp_revenue_failure]
initial_temp_revenue_failure = np.array([[float(obv[1]) for obv in case] for case in initial_temp_revenue_failure])

In [None]:
initial_temp_revenue_success = []

for j in revenue_success_case_:
    file_name = 'mul_onestep_Preset_Case_{}_pf.dat'.format(j)
    with open('./log/text/'+file_name) as f:
        log_content = f.readlines()
            
        for j, line in enumerate(reversed(log_content)):
            if '> Added DDF formulation - Product' in line:
                break
        initial_temp_revenue_success.append(log_content[-j+15:-j+28])
            
initial_temp_revenue_success = [[data.split() for data in case] for case in initial_temp_revenue_success]
initial_temp_revenue_success = np.array([[float(obv[1]) for obv in case] for case in initial_temp_revenue_success])

In [None]:
ave_temp_revenue_failure = np.mean(initial_temp_revenue_failure, axis=0)

In [None]:
ave_temp_revenue_success = np.mean(initial_temp_revenue_success, axis=0)

In [None]:
ave_temp_revenue_failure

In [None]:
ave_temp_revenue_success

---

# Restoration Fail

In [20]:
restoration_fail_case

[74, 124, 168, 233, 263, 419, 453, 459, 552, 778, 782, 1]

In [23]:
restoration_phase = []
restoration_phase_case = []

for j in restoration_fail_case:
    file_name = 'Preset_Case_{}_pf.output'.format(j)
    case = j
    with open('./tmp/'+file_name) as f:
        log_content = f.readlines()
            
        for j, line in enumerate(reversed(log_content)):
            if line.startswith('Number of Iterations....'):
                break
        if j < 50:
            restoration_phase.append(log_content[-j+2:-j+4])
            restoration_phase_case.append(case)
            
restoration_phase = [[data.split() for data in case] for case in restoration_phase]
restoration_phase_obj = np.array([[float(obv[1]) for obv in case[0:1]] for case in restoration_phase])
restoration_phase_inf = np.array([[float(obv[2]) for obv in case[1:2]] for case in restoration_phase])

In [32]:
for case,i,j in zip(restoration_phase_case,restoration_phase_obj, restoration_phase_inf):
    print('case:\t{}\t\t{:2.2f}\t\t{:.2e}'.format(case,i[0],j[0]))

case:	124		-17.27		9.90e+12
case:	168		-24.26		1.72e+16
case:	419		-0.01		1.33e+17
case:	453		-10.69		1.00e+02
case:	459		4.51		8.97e+16
case:	552		3.58		1.01e+02
case:	778		-16.54		5.33e+07
case:	782		8.03		2.49e+16
case:	1		-4.51		9.01e+07
