In [3]:
import pandas as pd
import numpy as np
import win32com.client
import time

In [47]:
file = "C:/Users\lukec\OneDrive - University of Cambridge\Projects\PhD\Data\IHS\Ethylene\ETHYLENE FROM WIDE RANGE NAPHTHA, MAXIMUM ETHYLENE, FRONT END DEMETHANIZER - Germany.xlsx"
footer = 11
headerLines = 5
colSep = 6

In [52]:
def refresh_excel_file(file:str, pause=False):
    # Refresh Excel file to have numbers for formula cells
    xlapp = win32com.client.DispatchEx("Excel.Application")
    wb = xlapp.Workbooks.Open(file)
    wb.RefreshAll()
    if pause: time.sleep(pause)
    wb.Save()
    wb.Close()
    xlapp.Quit()
    return file

In [167]:
def extract_properties(df):
    properties = list(map(lambda x: x.strip(), df.loc[2][0].split(",")))+['Name: '+df.loc[1][0]]
    properties = dict(map(lambda x: x.split(": "), properties))
    return properties

def add_property_columns(df, properties):
    print(list(properties.values()))
    df[list(properties.keys())] = list(properties.values())
    return df

def excel_multilayer_table_extract(df, dataType='Attribute', header=0, colStart=0, colEnd=-1, properties=False):
    data = df[header:][df.columns[colStart:colEnd]]
    data.columns = [dataType] + list(data.iloc[0][1:].values)
    data.columns = list(map(lambda x: 'Unit' if x is np.nan else x, data.columns.values))
    data.dropna(subset=[data.columns[0]], inplace=True)

    typesInd = [0] + list(np.where(data.index[1:] - data.index[:-1] != 1)[0]+1)
    data['Type'] = list(map(lambda x: data[dataType].values[typesInd[np.sum(np.array(typesInd) < x+1)-1]], range(len(data))))
    data.dropna(subset=data.columns[1:-1], how='all', inplace=True)
    data.reset_index(drop=True, inplace=True)
    if properties: data = add_property_columns(data, properties)
    return data

In [171]:
# Import sheet
sheet = pd.read_excel(refresh_excel_file(file),skipfooter=footer)
properties = extract_properties(sheet)
materials = excel_multilayer_table_extract(sheet, dataType='Material', header=headerLines, colEnd=colSep,
                                           properties=properties)
cost = excel_multilayer_table_extract(sheet, dataType='Cost', header=headerLines, colStart=colSep+1,
                                      properties=properties)

['Germany', 'ETHYLENE', '1087', '135.83 ¢/KG', '2M-1622', '1.06 (1US $ = 0.85)', '2021 Q3', '2014', 'ETHYLENE FROM WIDE RANGE NAPHTHA, MAXIMUM ETHYLENE, FRONT END DEMETHANIZER']


In [127]:
from pandas_xlsx_tables import df_to_xlsx_table
df_to_xlsx_table(cost, "Costs")

In [None]:
#!pip install pivottablejs
# from pivottablejs import pivot_ui
# pivot_ui(cost,outfile_path='pivottablejs.html')

# Extract from multiline file

In [None]:
import pyarrow

In [37]:
def del_empty_cols(df): return df.dropna(axis=1, how='all').reset_index(drop=True)

class ProcessData(object):
    def __init__(self):
        self.filePath = "C:/Users\lshc3\Documents\IHS_data\ethylene_demo\ethylene_all_horizontal_english.xlsx"
        self.data = del_empty_cols(pd.read_excel(self.filePath,header=6,skipfooter=1)).dropna(axis=0, how='all')
        self.products = del_empty_cols(self.data.loc[self.data['Type']=='Product'])
        self.materials = del_empty_cols(self.data.loc[self.data['Type']!='Product'])

In [61]:
data.materials.to_feather('.\materials.feather')

## Interacting

In [44]:
import ipywidgets as ipw

In [None]:
selectors = [ipw.SelectMultiple(options=data.products[i].unique(),rows=10,description=i, disabled=False) for i in ['Name', 'Process', 'Geography', 'Research Year']]

In [47]:
i = 'Name'
sel = ipw.SelectMultiple(options=data.products[i].unique(),rows=10,description=i, disabled=False)

In [56]:
data.products.to_feather('.\products.feather')

In [51]:
data.materials

Unnamed: 0,Code,Geography,Data Version,Research Year,Process,Type,Name,Unit Cost,Cost Unit,Unit Consumption,Consumption Unit,Variable Cost
0,1E-981,"United States, Gulf Coast",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,Raw Material,CATALYST,,,,,1.930000
1,1E-981,"United States, Gulf Coast",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,Raw Material,CAUSTIC SODA (50%),24.500000,LB,0.006260,LB,0.153370
2,1E-981,"United States, Gulf Coast",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,Raw Material,"METHANOL, CRUDE",109.000000,GAL,0.894110,GAL,97.457990
3,1E-981,"United States, Gulf Coast",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,By-Product,C4-C5 MIXTURE,30.330000,LB,-0.436700,LB,-13.245111
4,1E-981,"United States, Gulf Coast",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,By-Product,ETHANE-RICH GAS,11.390000,LB,-0.031340,LB,-0.356963
...,...,...,...,...,...,...,...,...,...,...,...,...
2917,4M-1603,"China, Shanghai",2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,By-Product,PROPANE-RICH GAS,62.236497,KG,-0.027350,TONNE,-1.702168
2918,4M-1603,"China, Shanghai",2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,By-Product,"PROPYLENE, CHEM GRADE",104.168419,KG,-1.111190,TONNE,-115.750905
2919,4M-1603,"China, Shanghai",2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,Utilities,COOLING WATER,4.596596,M3,262.880116,M3,1.208354
2920,4M-1603,"China, Shanghai",2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,Utilities,ELECTRICITY,10.970000,KWH,108.026508,KWH,1.185051


In [59]:
prods = pd.read_feather('./products.feather')

In [60]:
prods

Unnamed: 0,Code,Geography,Data Version,Research Year,Process,Unit,Base Capacity (MM unit/year),Investment (MM US$),Type,Name,...,Fixed Costs,"Overhead + Tax, Ins.",Plant Cash Cost,Depreciation,Plant Gate Costs,"G&A, Sales, Res.",Production Cost,ROI (15%),Product Value,Product Price
0,1E-981,"United States, Gulf Coast",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,LB,882.000000,884.905431,Product,ETHYLENE,...,2.673230,3.005282,15.714177,10.032941,25.747119,1.261748,27.008867,15.049412,42.058279,45.550000
1,2E-981,Germany,2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,LB,882.000000,937.999757,Product,ETHYLENE,...,2.974026,3.289281,41.048387,10.634918,51.683304,2.091825,53.775130,15.952377,69.727506,61.610000
2,3E-981,Japan,2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,LB,882.000000,778.716780,Product,ETHYLENE,...,2.331937,2.413579,48.259253,8.828988,57.088241,2.175208,59.263449,13.243483,72.506932,45.520000
3,4E-981,"China, Shanghai",2021 Q3,2002.0,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,LB,882.000000,584.037585,Product,ETHYLENE,...,1.697407,1.566805,36.587246,6.621741,43.208987,1.643555,44.852542,9.932612,54.785154,44.240000
4,1E-1595,"United States, Gulf Coast",2021 Q3,2014.0,ETHYLENE FROM 100% ETHANE,LB,3306.930000,2603.121527,Product,ETHYLENE,...,2.395145,2.581849,14.965808,7.871716,22.837524,1.071498,23.909022,11.807575,35.716597,45.550000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,4M-1602,"China, Shanghai",2021 Q3,2014.0,METHANOL TO OLEFINS BY THE DMTO-II PROCESS,KG,298.010187,394.934144,Product,ETHYLENE,...,3.570702,3.305171,104.954207,13.252371,118.206578,1.394799,119.601377,19.878556,139.479933,97.532505
240,1M-1603,"United States, Gulf Coast",2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,KG,288.938340,604.837579,Product,ETHYLENE,...,6.383301,6.666587,41.930773,20.933102,62.863875,0.952157,63.816032,31.399653,95.215685,100.420560
241,2M-1603,Germany,2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,KG,288.938340,641.127834,Product,ETHYLENE,...,7.194883,7.383074,104.769784,22.189088,126.958872,1.618611,128.577483,33.283632,161.861116,135.826800
242,3M-1603,Japan,2021 Q3,2014.0,METHANOL TO OLEFINS BY UOP ADVANCED MTO PROCESS,KG,288.938340,532.257070,Product,ETHYLENE,...,5.554709,5.286335,129.071071,18.421130,147.492201,1.768928,149.261130,27.631695,176.892824,100.354422
