In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import itertools
import pandas as pd
from more_itertools import locate
from torch_scatter import scatter
import torch
from torch_geometric.data import Data
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import to_networkx
from torch_geometric.transforms import RandomNodeSplit

In [2]:
# Import IHS data
class IHSData(object):
    def __init__(self):
        product_file = "C:/Users\lukec\OneDrive - University of Cambridge\PhD\Data\IHS/US\products.csv"
        material_file = "C:/Users\lukec\OneDrive - University of Cambridge\PhD\Data\IHS/US\materials.csv"
        self.products = pd.read_csv(product_file,index_col=0).reset_index(drop=True)
        self.materials = pd.read_csv(material_file,index_col=0).reset_index(drop=True)
        # self.edgeColumns = ['Process','Geography']
        # self.weights = [10,1]
        # self.adjMatrix, self.edgeWeights = pd_to_adj_matrix(self.products,self.edgeColumns,self.weights)
        # self.nodeTypes = self.products['Investment (MM US$)'].values
        # self.valueColumns = ['Variable Cost','Fixed Costs','Overhead + Tax, Ins.','Depreciation','G&A, Sales, Res.']
        #self.products.iloc[:,np.where(['float' in i or 'int' in i for i in self.products.dtypes.astype(str)])[0]].values

    # def to_pyg_graph(self):
    #     edge_index = torch.tensor(self.adjMatrix, dtype=torch.long)
    #     x = torch.tensor(self.products[self.valueColumns].values, dtype=torch.float)
    #     y = torch.tensor(np.digitize(self.nodeTypes,bins=np.linspace(min(self.nodeTypes),max(self.nodeTypes),4))-1, dtype=torch.long)
    #     return Data(x=x, edge_index=edge_index, y=y)

In [9]:
ihs = IHSData()

In [22]:
# Read in impact factors
def read_xml_attributes(filepath:str,branches:list,attributes:list,df=False):
    """This function creates a dataframe of attributes within an xml file.\n
    Inputs:\n
    filepath - path to xml file\n
    branches - list of successive branch choices\n
    attributes - attributes to be read from chosen branch\n
    Outputs:\n
    df - pandas dataframe of attributes for each end branch\n
    Requirements: pandas as pd, xml.etree.ElementTree as ET"""
    if df is False: df = pd.DataFrame([],columns=[i for sublist in utl.to_listlist(attributes) for i in sublist])
    tree_loc = ET.parse(filepath).getroot()
    branches, attributes = utl.to_listlist(branches), utl.to_listlist(attributes)
    attr_values = dict()
    for pathnum, path in enumerate(branches):
        for branch in path:
            for num,val in enumerate([branch in i.tag for i in tree_loc]):
                if val:
                    if branch is path[-1]:
                        attr_values.update(dict(zip(attributes[pathnum],[tree_loc[num].attrib[i] if i else tree_loc[num].text for i in attributes[pathnum]])))
                        if path is branches[-1]: df = df.append(attr_values,ignore_index=True)
                    else:
                        tree_loc=tree_loc[num]
                        break
    return df

def import_xml_batch(directory:str,path:list,attributes:list,co2e=False,impacts=False):
    """This function imports xml data into a pandas dataframe inputs:\n
    directory - directory of xml files\n
    path - location of attributes within xml\n
    attributes - names of attributes/column names\n
    outputs:\n
    df - dataframe of requested attributes\n
    Requirements: os, pandas as pd, xml.etree.ElementTree as ET"""
    file_list = os.listdir(directory)
    attributes = utl.to_listlist(attributes)
    df = pd.DataFrame([],columns=[j for i in attributes for j in i])
    if co2e is not False: df[co2e] = None
    for file in tqdm(file_list):
        df = read_xml_attributes(os.path.join(directory, file),path,attributes,df)
        if co2e is not False: df[co2e].iloc[-1] = calculate_co2e(os.path.join(directory, file), impacts)
    return df, file_list

processes, _ = import_xml_batch(lcaDataPath,[['dataset','meta','process','reference'],['dataset','meta','process','geo']],[['name','generalComment'],['location']],'CO2e',impacts)

array(['CATALYST', 'CAUSTIC SODA (50%)', 'METHANOL, CRUDE',
       'CATALYST AND CHEMICALS', 'ETHANE', 'ETHANE PROPANE FEED',
       'GAS OIL, ATMOSPHERIC', 'CONDENSATE', 'CRUDE OIL (TAPIS)',
       'CRUDE OIL', 'PROPANE', 'ETHANOL', 'ETHANOL.', 'NAPHTHA, LIGHT',
       'N-BUTANE', 'NAPHTHA', 'NATURAL GAS',
       'CATALYST, METHANOL SYNTHESIS', 'CATALYST, REFORMING',
       'MOLECULAR SIEVES', 'MONOETHANOLAMINE', 'OXYGEN (HIGH USAGE)',
       'ADSORBENT', 'ALUMINUM CHLORIDE', 'CUPROUS CHLORIDE',
       'DESICCANT AND INHIBITORS', 'REFINERY GAS', 'TOLUENE',
       'GAS OIL, VACUUM', 'HYDROGEN (95 VOL%)', 'CATALYST, HYDROGENATION',
       'N-METHYL-2-PYRROLIDONE', 'CATALYST, METHANATION', 'HYDROGEN',
       'OXYGEN (95 MOLE %)', 'SHIFT CATALYST', 'CATALYST.', 'METHANOL',
       'MISC CHEMICALS', 'CAUSTIC SODA.(50%)', 'INITIATOR',
       'METHYL ACRYLATE', 'METHYL METHACRYLATE', 'AZOBISISOBUTYRONITRILE',
       'RELEASE AGENT, COVER FILM', 'HYDROGEN FLUORIDE',
       'METHYL CHLOROFORM',

In [None]:
class EmissionData(object):
    folderPath = "C:/Users\lukec\OneDrive - University of Cambridge\PhD\Data\EcoInvent\Basic_chemicals_201"



In [19]:
[ihs.materials['Source type'] == 'Raw Material']

[0        True
 1        True
 2        True
 3       False
 4       False
         ...  
 2903     True
 2904    False
 2905    False
 2906    False
 2907    False
 Name: Source type, Length: 2908, dtype: bool]

In [25]:
ihs.products.Name.unique()

array(['ETHYLENE', 'POLYMETHYLMETHACRYLATE', 'HCFC-141B',
       '1,12-DODECANEDIAMINE', '1,12-DODECANEDIOIC ACID', 'DINCH',
       '1,3-BUTADIENE', 'BUTADIENE', '1,3-PROPANEDIOL',
       '1,4-BUTANEDIOL AND TETRAHYDROFURAN', '1,4-BUTANEDIOL',
       '1,4-CYCLOHEXANEDIMETHANOL', '1-PHENYLETHYLAMINE®',
       '2246-ANTIOXIDANT', '2,4-DIMETHYL BENZALDEHYDE',
       '2,6-DI-TERT-BUTYLPHENOL', '2,6-DIETHYLANILINE',
       '2,6-DIMETHYLPHENOL', '2-ETHYLHEXANOL', '2-ETHYLHEXYL ACRYLATE',
       '2-ETHYLHEXYL DIPHENYL PHOSPHATE',
       '2-HYDROXY-4-METHYLTHIOBUTANOIC ACID', '2-H-4-O-BENZOPHENONE',
       '2-MERCAPTOBENZOTHIAZOLE', '2-PICOLINE', '3-HYDROXYPROPIONIC ACID',
       "4,4'-DICHLORODIPHENYL SULFONE", "4,4'-DIHYDROXYDIPHENYL",
       "4,4'-METHYLENEDIANILINE", '4-CHLORO-4-HYDROXY-BENZOPHENONE',
       '4-METHYLPENTENE-1', '4-MP-1/1-DECENE COPOLYMER',
       '6-HYDROXY-2-NAPHTHOIC ACID', 'ABS RESIN', 'ACESULFAME-K',
       'ACETAL HOMOPOLYMER', 'ACETALDEHYDE', 'ACETAMINOPHEN',
      