In [20]:
import pandas as pd
import numpy as np
#data_path = "C:/Users/lukec/OneDrive - University of Cambridge/PhD/Data/"
data_path = "C:/Users/lshc3/Documents/"
class LCA(object):
    def __init__(self, data_path):
        filepath = data_path+"EcoInvent\Basic_chemicals_201\GWP100a_IPCC2013.csv"
        raw = pd.read_csv(filepath)
        self.data = raw[['name','location','CO2e']][~raw['generalComment'].str.contains('market')].sort_values('name')

    def location(self, location='Global'):
        loc_filepath = data_path+"EcoInvent\EcoInvent_locations.csv"
        loc_codes = pd.read_csv(loc_filepath)
        region_code = loc_codes['Code'][np.where(loc_codes['Name'] == location)[0]].iloc[0]
        rows = []
        for product in self.data['name'].unique():
            for code in [region_code, 'RoW', 'GLO']:
                found = self.data[(self.data['name']==product) & (self.data['location']==code)]
                if not found.index.empty:
                    rows += list(found.index)
                    break
        return self.data.loc[rows].groupby(['name','location']).mean().reset_index()

class IHSData(object):
    def __init__(self, data_path):
        product_file = data_path+"IHS/US/products.csv"
        material_file = data_path+"IHS/US/materials.csv"
        self.products = pd.read_csv(product_file,index_col=0).reset_index(drop=True)
        self.materials = pd.read_csv(material_file,index_col=0).reset_index(drop=True)

class ICISData(object):
    def __init__(self):
        self.countryCol = 'COUNTRY/TERRITORY'
        path = "C:\ICIS_data/US_allchemicals.xlsx"
        self.properties = ['Capacity','Statistic Production', 'Import','Export','Consumption']
        in_file = pd.ExcelFile(path)
        self.plants, self.prod, self.imps, self.exps, self.cons = [pd.DataFrame()]*5

        def append_data(prev,region,breaks,start):
          br_end = breaks[start+1] if start+1 < len(breaks) else -1
          return prev.append(region[breaks[start]+1:br_end].dropna(subset=[self.countryCol]), ignore_index=True)

        for sheet in in_file.sheet_names[2:]:
          region = pd.read_excel(in_file, sheet_name=sheet)
          region['Region'] = [sheet]*len(region)
          breaks = region.loc[region['PRODUCT'].isin(self.properties)].index
          self.plants, self.prod, self.imps, self.exps, self.cons = [append_data(attr,region,breaks,i) for i, attr in enumerate([self.plants, self.prod, self.imps, self.exps, self.cons])]

lca = LCA(data_path).location('United States')
materials = IHSData(data_path).materials
icis = ICISData()

In [32]:
merge = pd.merge(lca, icis.plants, left_on=lca['name'].str.lower(), right_on=icis.plants['PRODUCT'].str.lower(), how="right")

In [33]:
merge#merge['PRODUCT'].unique()

Unnamed: 0,name,location,CO2e,PRODUCT,COUNTRY/TERRITORY,STATE,COMPANY,SITE,#,ROUTE,...,None.58,2037,None.59,2038,None.60,2039,None.61,2040,None.62,Region
0,acetaldehyde,RoW,1.91996,ACETALDEHYDE,UNITED STATES,TEXAS,CELANESE,BAY CITY/TX,1.0,ETHYLENE,...,,-,,-,,-,,-,,CountryTerritory Summary
1,acetaldehyde,RoW,1.91996,ACETALDEHYDE,UNITED STATES,TEXAS,CELANESE,CLEAR LAKE/TX,1.0,ETHYLENE,...,,-,,-,,-,,-,,CountryTerritory Summary
2,acetaldehyde,RoW,1.91996,ACETALDEHYDE,UNITED STATES,TEXAS,EASTMAN CHEMICAL,LONGVIEW/TX,1.0,ETHYLENE,...,,115,,115,,115,,115,,CountryTerritory Summary
3,acetaldehyde,RoW,1.91996,ACETALDEHYDE,UNITED STATES,TENNESSEE,TENNESSEE EASTMAN,KINGSPORT/TN,1.0,ETHANOL,...,,-,,-,,-,,-,,CountryTerritory Summary
4,acetaldehyde,RoW,1.91996,ACETALDEHYDE,UNITED STATES,W.VIRGINIA,UNION CARBIDE,INSTITUTE/WV,1.0,ETHANOL,...,,-,,-,,-,,-,,CountryTerritory Summary
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4412,,,,VISBREAKING,UNITED STATES,CALIFORNIA,UNOCAL CORP.,LOS ANGELES/CA,1.0,VISBREAKING,...,,-,,-,,-,,-,,CountryTerritory Summary
4413,,,,VISBREAKING,UNITED STATES,TEXAS,VALERO REFINING,CORPUS CHRISTI/TX,2.0,VISBREAKING,...,,-,,-,,-,,-,,CountryTerritory Summary
4414,,,,VISBREAKING,UNITED STATES,LOUISIANA,VALERO REFINING,GOOD HOPE/LA,1.0,VISBREAKING,...,,-,,-,,-,,-,,CountryTerritory Summary
4415,,,,VISBREAKING,UNITED STATES,TEXAS,VALERO REFINING,HOUSTON/TX,1.0,VISBREAKING,...,,-,,-,,-,,-,,CountryTerritory Summary


In [30]:
mergeIHS = pd.merge(lca, materials, left_on=lca['name'].str.lower(), right_on=materials['Product'].str.lower(), how="left")
mergeIHS['Product'].unique()

array([nan, '2,6-DI-TERT-BUTYLPHENOL', 'ACETALDEHYDE', 'ACETIC ANHYDRIDE',
       'ACETONITRILE', 'ACETYLENE', 'ACROLEIN', 'ACRYLONITRILE',
       'ADIPIC ACID', 'ALLYL CHLORIDE', 'ANILINE', 'ANTHRAQUINONE',
       'BENZOIC ACID', 'BUTADIENE', 'ISOPROPANOL'], dtype=object)

In [27]:
materials

Unnamed: 0,Code,Data Version,Source,Target,Research Year,Geography,Plant capacity,Capacity unit,Product,Value,...,Product fixed costs,"Product Overhead + Tax, Ins.",Product Plant Cash Cost,Product Depreciation,Product Plant Gate Costs,"Product G&A, Sales, Res.",Production Cost,Product ROI (15%),Product Value,Product Price
0,1M-981,2021 Q3,CATALYST,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,2002.0,"United States, Gulf Coast",400.068470,MM KG/yr,ETHYLENE,,...,5.893464,6.625514,34.643831,22.118850,56.762680,2.781679,59.544360,33.178274,92.722634,100.420560
1,1M-981,2021 Q3,CAUSTIC SODA (50%),ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,2002.0,"United States, Gulf Coast",400.068470,MM KG/yr,ETHYLENE,0.006260,...,5.893464,6.625514,34.643831,22.118850,56.762680,2.781679,59.544360,33.178274,92.722634,100.420560
2,1M-981,2021 Q3,"METHANOL, CRUDE",ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,2002.0,"United States, Gulf Coast",400.068470,MM KG/yr,ETHYLENE,5.941383,...,5.893464,6.625514,34.643831,22.118850,56.762680,2.781679,59.544360,33.178274,92.722634,100.420560
3,1M-981,2021 Q3,C4-C5 MIXTURE,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,2002.0,"United States, Gulf Coast",400.068470,MM KG/yr,ETHYLENE,-0.436700,...,5.893464,6.625514,34.643831,22.118850,56.762680,2.781679,59.544360,33.178274,92.722634,100.420560
4,1M-981,2021 Q3,ETHANE-RICH GAS,ETHYLENE BY THE UOP/HYDRO METHANOL TO OLEFINS ...,2002.0,"United States, Gulf Coast",400.068470,MM KG/yr,ETHYLENE,-0.031340,...,5.893464,6.625514,34.643831,22.118850,56.762680,2.781679,59.544360,33.178274,92.722634,100.420560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2903,1M-1457,2021 Q3,HYDROGEN.,"BIO-BASED 1,4-BUTANEDIOL VIA SUCCINIC ACID ROU...",2011.0,"United States, Gulf Coast",29.937096,MM KG/yr,"1,4-BUTANEDIOL",0.109000,...,27.305847,28.141565,673.899861,62.139265,736.039126,43.644633,779.683759,93.208898,872.892658,172.776275
2904,1M-1457,2021 Q3,COOLING WATER,"BIO-BASED 1,4-BUTANEDIOL VIA SUCCINIC ACID ROU...",2011.0,"United States, Gulf Coast",29.937096,MM KG/yr,"1,4-BUTANEDIOL",69.099916,...,27.305847,28.141565,673.899861,62.139265,736.039126,43.644633,779.683759,93.208898,872.892658,172.776275
2905,1M-1457,2021 Q3,ELECTRICITY,"BIO-BASED 1,4-BUTANEDIOL VIA SUCCINIC ACID ROU...",2011.0,"United States, Gulf Coast",29.937096,MM KG/yr,"1,4-BUTANEDIOL",529.109429,...,27.305847,28.141565,673.899861,62.139265,736.039126,43.644633,779.683759,93.208898,872.892658,172.776275
2906,1M-1457,2021 Q3,PROCESS WATER,"BIO-BASED 1,4-BUTANEDIOL VIA SUCCINIC ACID ROU...",2011.0,"United States, Gulf Coast",29.937096,MM KG/yr,"1,4-BUTANEDIOL",27.039098,...,27.305847,28.141565,673.899861,62.139265,736.039126,43.644633,779.683759,93.208898,872.892658,172.776275


In [31]:
lca

Unnamed: 0,name,location,CO2e
0,"1,1-difluoroethane, HFC-152a",US,5.916414
1,1-butanol,RoW,3.045196
2,1-pentanol,RoW,5.078633
3,1-propanol,RoW,4.532686
4,"2,4-di-tert-butylphenol",GLO,4.543894
...,...,...,...
511,xylene,RoW,1.703801
512,zinc monosulfate,RoW,0.702319
513,zinc oxide,RoW,0.815520
514,zinc sulfide,RoW,2.135332
