# Cost Discovery
Understand how to cost works and using which features

## Root folder and read env variables

In [1]:
import os
# fix root path to save outputs
actual_path = os.path.abspath(os.getcwd())
list_root_path = actual_path.split('\\')[:-1]
root_path = '\\'.join(list_root_path)
os.chdir(root_path)
print('root path: ', root_path)

root path:  D:\github-mi-repo\Optimization-Industrial-Process


In [2]:
import os
from dotenv import load_dotenv, find_dotenv # package used in jupyter notebook to read the variables in file .env

""" get env variable from .env """
load_dotenv(find_dotenv())

""" Read env variables and save it as python variable """
PROJECT_GCP = os.environ.get("PROJECT_GCP", "")

## RUN

In [3]:
import pandas as pd
import numpy as np
from google.cloud import bigquery
import gcsfs
import pickle

In [4]:
### desarrollo

PROJECT_ID = PROJECT_GCP
! gcloud config set project $PROJECT_ID

Updated property [core/project].


### 1. Read data

In [5]:
path_data = 'artifacts/data/data.pkl'
data = pd.read_pickle(path_data)
data.head()

Unnamed: 0_level_0,230AIT446.PNT,240AIC022.MEAS,240AIC126.MEAS,240AIC224.MEAS,240AIC286.MEAS,240AIC324.MEAS,240AIC433.MEAS,240AIT063A.PNT,240AIT063B.PNT,240AIT225A.PNT,...,S240ALDP022,S240ALDP031,S240ALDP032,S276PER002,S2MAQUINAT07,S76ALE017,SSTRIPPING015,calc_prod_d0,calc_prod_d1,calc_prod_p
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-01 00:05:00,11.55504,2.983948,11.346645,4.413519,4.352375,10.441675,4.292521,5.86932,62.37495,1.837519,...,91.49,1.8,11.4,11.77,1.5712,173.6,964.0,3240.8635,3313.6215,3259.3745
2021-01-01 00:10:00,11.55232,3.015669,11.353215,4.413179,4.347186,10.43217,4.289684,5.86932,62.37495,1.81402,...,91.49,1.8,11.4,11.77,1.5712,173.6,964.0,3260.7475,3301.692,3208.6785
2021-01-01 00:15:00,11.549955,3.018903,11.355525,4.408321,4.355828,10.410115,4.284427,5.86932,62.37495,1.81402,...,91.49,1.8,11.4,11.77,1.5712,173.6,964.0,3265.5765,3284.133,3210.779
2021-01-01 00:20:00,11.547145,3.001164,11.326725,4.408659,4.361292,10.379145,4.285478,5.83575,62.37495,1.81402,...,91.49,1.7,11.3,11.77,1.5712,173.6,964.0,3253.775,3271.926,3221.7745
2021-01-01 00:25:00,11.54316,3.017393,11.336345,4.408596,4.356374,10.387205,4.304148,5.802179,62.37495,1.81402,...,91.49,1.6,11.2,11.77,1.5712,173.6,964.0,3236.979,3267.305,3227.6935


### 2. Read Prices Chemicals

In [6]:
# chemicals are used in optimization part
path_price_chemicals = 'config/config_optimization/optimization_engine/price-chemicals.xlsx'
price_chemicals = pd.read_excel(path_price_chemicals)
price_chemicals

Unnamed: 0,acido,peroxido,soda,oxigeno,dioxido
0,0.275,0.698,0.655,0.092,1.4


### 3. Costs D0EOP

In [7]:
def costs_bleaching_sf2_d0eop(df_blanq, df_precios):
    '''
    calcular costos modelo D0EOP
        -df_blanq: dataframe con los consumos
        -df_precios: dataframe con los precios
    '''
    # precios
    Precio_Acido = df_precios['acido'].values
    Precio_Peroxido = df_precios['peroxido'].values
    Precio_Soda = df_precios['soda'].values
    Precio_Oxigeno = df_precios['oxigeno'].values
    Precio_Dioxido = df_precios['dioxido'].values

    # features
    especifico_dioxido_d0 = '240FY050.RO02' # vc
    especifico_soda_eop = '240FY107A.RO01' # vc
    especifico_peroxido_eop = '240FY11PB.RO01' # vc
    especifico_oxigeno_eop = '240FY118B.RO01' # vc


    # calculate costs
    df_blanq['costo_Adt_D0']= (df_blanq[especifico_dioxido_d0] * Precio_Dioxido)
    df_blanq['costo_Adt_EOP'] = (df_blanq[especifico_soda_eop]*Precio_Soda + df_blanq[especifico_peroxido_eop]*Precio_Peroxido + df_blanq[especifico_oxigeno_eop]*Precio_Oxigeno)
  
    df_blanq['costo_Adt_D0EOP'] = df_blanq['costo_Adt_D0'] + df_blanq['costo_Adt_EOP']
    return df_blanq['costo_Adt_D0EOP']

In [8]:
# evaluate if the code to calculate costs using VC and prices works
costs_bleaching_sf2_d0eop(df_blanq = data, 
                          df_precios = price_chemicals
                         )

datetime
2021-01-01 00:05:00    18.732232
2021-01-01 00:10:00    18.585658
2021-01-01 00:15:00    18.529378
2021-01-01 00:20:00    18.591321
2021-01-01 00:25:00    18.595393
                         ...    
2022-12-31 23:45:00    22.143516
2022-12-31 23:50:00    22.175825
2022-12-31 23:55:00    22.288313
2023-01-01 00:00:00    22.214813
2023-01-01 00:05:00    22.121555
Name: costo_Adt_D0EOP, Length: 143200, dtype: float64

### 4. Costs D1

In [9]:
def costs_bleaching_sf2_d1(df_blanq, df_precios):
    '''
    calcular costos modelo D1
        -df_blanq: dataframe con los consumos
        -df_precios: dataframe con los precios
    '''
    # precios
    Precio_Acido = df_precios['acido'].values
    Precio_Peroxido = df_precios['peroxido'].values
    Precio_Soda = df_precios['soda'].values
    Precio_Oxigeno = df_precios['oxigeno'].values
    Precio_Dioxido = df_precios['dioxido'].values

    # features
    especifico_acido_d1 = '240FY210A.RO01'
    especifico_dioxido_d1 = '240FY218.RO02'

    
    # calculate costs
    df_blanq['costo_Adt_D1'] = (df_blanq[especifico_acido_d1]*Precio_Acido + df_blanq[especifico_dioxido_d1]*Precio_Dioxido)
    return df_blanq['costo_Adt_D1']

In [10]:
# evaluate if the code to calculate costs using VC and prices works
costs_bleaching_sf2_d1(df_blanq = data, 
                       df_precios = price_chemicals
                         )

datetime
2021-01-01 00:05:00    3.878702
2021-01-01 00:10:00    3.901367
2021-01-01 00:15:00    3.910487
2021-01-01 00:20:00    3.937230
2021-01-01 00:25:00    3.918317
                         ...   
2022-12-31 23:45:00    6.480512
2022-12-31 23:50:00    6.442193
2022-12-31 23:55:00    6.446304
2023-01-01 00:00:00    6.509864
2023-01-01 00:05:00    6.512968
Name: costo_Adt_D1, Length: 143200, dtype: float64

### 5. Costs P

In [11]:
def costs_bleaching_sf2_p(df_blanq, df_precios):
    '''
    calcular costos modelo P
        -df_blanq: dataframe con los consumos
        -df_precios: dataframe con los precios
    '''
    # precios
    Precio_Acido = df_precios['acido'].values
    Precio_Peroxido = df_precios['peroxido'].values
    Precio_Soda = df_precios['soda'].values
    Precio_Oxigeno = df_precios['oxigeno'].values
    Precio_Dioxido = df_precios['dioxido'].values

    # features
    especifico_acido_p = '240FY430.RO01'
    especifico_soda_p = '240FY312.RO01'
    especifico_peroxido_p = '240FY397.RO01'

    # calculate costs
    df_blanq['costo_Adt_P'] = (df_blanq[especifico_acido_p]*Precio_Acido + df_blanq[especifico_soda_p]*Precio_Soda+ df_blanq[especifico_peroxido_p]*Precio_Peroxido)
    return df_blanq['costo_Adt_P']

In [12]:
# evaluate if the code to calculate costs using VC and prices works
costs_bleaching_sf2_p(df_blanq = data, 
                      df_precios = price_chemicals
                     )

datetime
2021-01-01 00:05:00    2.480980
2021-01-01 00:10:00    2.480704
2021-01-01 00:15:00    2.469222
2021-01-01 00:20:00    2.481941
2021-01-01 00:25:00    2.480421
                         ...   
2022-12-31 23:45:00    5.285881
2022-12-31 23:50:00    5.274175
2022-12-31 23:55:00    5.250339
2023-01-01 00:00:00    5.248585
2023-01-01 00:05:00    5.264989
Name: costo_Adt_P, Length: 143200, dtype: float64