**Gas Plant Nodes and Relationships**

It's common for a plant to have multiple units within a plant. It is not uncommon for each unit within a plant to have different owners from the other plant units. Therefore we would want each node to be a plant unit. 

Each unique plant unit in gas_plants is a node. 

Each gas plant unit will have at least one edge and each row in the gas_df dataframe is one edge. The is between the gas plant unit and the plant owner. Edge weight can be found in the Owner Ownership % column.

**Gas Owner Entity Nodes and Relationships**

Entity nodes and possible attributes can be found in gas_entities_df.

Entity relationships can be found in gas_entity_relationships_df.

Together, both of these dataframes represent all entities in all ownership trees for each gas plant.

In [194]:
import pandas as pd
import numpy as np

In [195]:
def db_energy_asset_exposure(exposure,asset_type):

    company_owner_df = pd.read_csv("Data/company_owners_July_2024.csv")

    # =====================
    # Loop figures out all exposed energy entities and calculates their energy exposure
    if asset_type in ["coal plant","bioenergy","gas plant"]:
        capacity = "Capacity (MW)" 
    elif asset_type == "steel":
        capacity = "Nominal crude steel capacity (ttpa)"   
    elif asset_type == "blast furnace":
        capacity = "Nominal iron capacity (ttpa)"
    elif asset_type == 'emissions':
        capacity = "emissions_quantity" 
    elif asset_type == "coal mine":
        capacity = "Capacity (Mtpa)"            

    #Add immediate owner to dictionary so can create a while loop
    immediate_owners = []
    exposure_dic = {}
    if asset_type == 'emissions':
        statuses = exposure['gas'].unique().tolist()
    else:    
        statuses = exposure['Status'].unique().tolist()

    for index, row in exposure.iterrows():
        #Check if the share value is null and assign assumed value instead if it is null
        if pd.isnull(row['share']):
            number_of_owners = exposure[exposure['Unit ID'] == row['Unit ID']].shape[0]
            number_of_null_owners = exposure[(exposure['Unit ID'] == row['Unit ID']) & (exposure['share'].isnull())].shape[0]
            total_shares_claimed = exposure[exposure['Unit ID'] == row['Unit ID']]['share'].sum()
            assumed_share_frac = (1 - total_shares_claimed) / number_of_null_owners
            owners = {f"{row['company_id']}":f"{assumed_share_frac}"}

        else:
            owners = {f"{row['company_id']}":f"{row['share']}"}

        owners_list = [row['company_id']]

        for owner in owners_list:
            data = company_owner_df[company_owner_df['company_id'] == owner]

            if data.shape[0] == 0:
                continue

            if data.shape[0] == 1 and data['owner_id'].to_string(index=False) in owners_list:
                continue

            else:    
                for index, row1 in data.iterrows():
                    if row1['owner_id'] in owners_list:
                        continue
                    else:
                        if pd.isnull(row1['share']):
                            temporary_frac = 1 / data.shape[0]

                            owners.update({f"{row1['owner_id']}":f"{float(temporary_frac) * float(owners.get(owner))}"})
                            owners_list.append(row1['owner_id'])

                        else:
                            owners.update({f"{row1['owner_id']}":f"{(float(row1['share'])/100) * float(owners.get(owner))}"})
                            owners_list.append(row1['owner_id'])
        
                       
        #unit_tests.testing_parent_ownership_share_unit_test(row['company_id'],owners)

        immediate_owners.append(owners)

        # ==============
        # Add the exposure values to the entity dictionary

        for company in owners.keys():

            if company == 'nan':
                continue

            else:    
                if company not in exposure_dic:
                    entity_dic = {}

                    for status in statuses:
                        entity_dic[f"{status}"] = 0

                    exposure_dic[ f'{company}'] = entity_dic
                if asset_type == 'emissions':
                    exposure_dic[f'{company}'][f"{row['gas']}"] = exposure_dic[f"{company}"][f"{row['gas']}"] + (float(row[f"{capacity}"]) * float(owners.get(owner)))
                else:    
                    exposure_dic[f"{company}"][f"{row['Status']}"] = exposure_dic[f"{company}"][f"{row['Status']}"] + (float(row[f'{capacity}']) * float(owners.get(owner)))

    #Create a dataframe out of the dictionary
    entities_exposure = pd.DataFrame.from_dict(exposure_dic, orient='index').reset_index().rename(columns={'index':'ID'})
    statuses = list(entities_exposure.columns.values)
    statuses.remove("ID")
    entities_exposure['sum'] = entities_exposure[statuses].sum(axis=1)


    #unit_tests.negative_capacity_value_unit_test(entities_exposure,statuses)
    #unit_tests.testing_capacity_calculations_unit_test(original_capacity_sum,entities_exposure)

    return entities_exposure   

In [196]:
# Energy Projects
gas_plants = pd.read_csv("Data/GOGPT-download-2024-07-16T001922.csv")
coal_plants = pd.read_csv("Data/coal-export-2024-07-15T234731.csv")
bio_plants = pd.read_csv("Data/bioenergy-export-2024-07-16T003535.csv")

# Extractive and Heavy Industry Projects
steel_plants = pd.read_excel("Data/Global-Steel-Plant-Tracker-April-2024-Standard-Copy-V1.xlsx",sheet_name="Steel Plants")

#Removing non-operating plants
bio_plants = bio_plants[bio_plants["Operating status"].isin(["operating","construction","pre-construction","announced"])]
gas_plants = gas_plants[gas_plants["Status"].isin(["operating","announced","construction","pre-construction"])]
coal_plants = coal_plants[coal_plants["Status"].isin(["operating","construction","permitted","pre-permit","announced"])]
steel_plants = steel_plants[steel_plants["Capacity operating status"].isin(["operating","announced","construction","operating pre-retirement"])]


# Emissions
combustion_emissions = pd.read_csv("Data/electricity-generation_emissions_sources.csv")
#steel_emissions = pd.read_csv("Data/steel_emissions_sources.csv")

# Ownership connections
plant_owners = pd.read_csv("Data/plant_owner_July_2024.csv")
entity_owners = pd.read_csv("Data/company_owners_July_2024.csv")

#Entity information
entities = pd.read_csv("Data/entities-20240716000321.csv")
entities["Full Name"] = entities["Name"].fillna('').astype(str) + " " + entities["Legal Entity Type"].fillna('').astype(str)
entities = entities.map(lambda x: x.strip() if isinstance(x, str) else x)

In [197]:
def find_relevent_entities_in_tree(immediate_owner,entity_owners):           

    #Add owner to list so can create a while loop
    owners = [immediate_owner]

    for owner in owners:
        data = entity_owners[entity_owners['company_id'] == owner]

        if data.shape[0] == 0:
            continue

        if data.shape[0] == 1 and data['owner_id'].to_string(index=False) in owners:
            continue

        else:    
            for index, row1 in data.iterrows():
                if row1['owner_id'] in owners:
                    continue
                else:
                    owners.append(row1['owner_id'])
                         

    # ==============
    # Add the exposure values to the entity dictionary
                    
    df = entity_owners[(entity_owners["company_id"].isin(owners)) & (entity_owners["owner_id"].isin(owners))]                


    return df    

# Oil and Gas Plants

## Gas Plant emissions

In [198]:
#Only look at co2 for this study
combustion_emissions = combustion_emissions[combustion_emissions["gas"] == "co2"]

In [199]:
#pd.merge(gas_plants,combustion_emissions.rename(columns={"source_name":"Plant Name"})[["Plant Name",""]])

## Gas plant nodes and relationships

In [200]:
# There are multiple immediate owners in each cell so we need to seperate them
gas_plants["Owner(s)"] = gas_plants["Owner(s)"].str.split(";",expand=False)
gas_plants = gas_plants.explode("Owner(s)")

In [201]:
# Quick test to make sure that it will only split into two columns on the ] delimiter
gas_plants["Owner(s)"].str.split("[",expand=True)

Unnamed: 0,0,1
0,City of Holland,100%]
1,City of Holland,100%]
2,Cairo Electricity Production Co,100%]
3,Cairo Electricity Production Co,100%]
4,Cairo Electricity Production Co,100%]
...,...,...
13468,General Electricity Company of Libya,100%]
13469,General Electricity Company of Libya,100%]
13470,General Electricity Company of Libya,100%]
13471,General Electricity Company of Libya,100%]


In [202]:
gas_plants[["Owner(s)","Owner Ownership %"]] = gas_plants["Owner(s)"].str.split("[",expand=True)
gas_plants["Owner Ownership %"] = gas_plants["Owner Ownership %"].str.replace("%]","",regex=True)

In [203]:
gas_plants = gas_plants.map(lambda x: x.strip() if isinstance(x, str) else x)

In [204]:
gas_df = pd.merge(gas_plants,entities.rename(columns={"Full Name":"Owner(s)"})[["Owner(s)","ID"]],on="Owner(s)",how="left")

In [205]:
# Removing the owners that haven't been identified
gas_df = gas_df[gas_df["Owner(s)"] != "not found"]

In [206]:
# Look if there are owners that didn't match -> an empty series means all owners matched
gas_df[gas_df["ID"].isnull()]["Owner(s)"].value_counts()

Series([], Name: count, dtype: int64)

## Finding all entity nodes and relationships for the Oil and Gas Subgraph

In [207]:
#Finding all unique immediate owners in gas network
gas_immediate_owners = gas_df["ID"].unique().tolist()

# Filter entity_owners dataframe to only include connections relevant to gas subgraph
gas_entity_relationships_df = pd.DataFrame(columns=entity_owners.columns.tolist())

for owner in gas_immediate_owners:
    df = find_relevent_entities_in_tree(owner,entity_owners=entity_owners)
    gas_entity_relationships_df = pd.concat([gas_entity_relationships_df,df],axis=0)

  gas_entity_relationships_df = pd.concat([gas_entity_relationships_df,df],axis=0)


In [208]:
#Remove duplicate rows
gas_entity_relationships_df = gas_entity_relationships_df.drop_duplicates()

In [209]:
gas_entities = gas_entity_relationships_df["company_id"].to_list() + gas_entity_relationships_df["owner_id"].to_list()

#Remove duplicates
gas_entities = list(set(gas_entities))

gas_entities_df = entities[entities["ID"].isin(gas_entities)].reset_index().drop(columns={"index"})


In [210]:
gas_df["Owner Ownership %"] = gas_df["Owner Ownership %"].astype(float)
gas_entity_capacities = db_energy_asset_exposure(gas_df.rename(columns={"Owner Ownership %":"share","GEM unit ID":"Unit ID","ID":"company_id"}),"gas plant")

gas_entities_df = pd.merge(gas_entities_df,gas_entity_capacities.rename(columns={"sum":"gas capacity"})[["ID","gas capacity"]],on="ID",how="left")

# Coal Plants

## Coal plant nodes and relationships

In [211]:
# There are multiple immediate owners in each cell so we need to seperate them
coal_plants["Owner"] = coal_plants["Owner"].str.split(";",expand=False)
coal_plants = coal_plants.explode("Owner")

In [212]:
# Quick test to make sure that it will only split into two columns on the ] delimiter
coal_plants["Owner"].str.split("[",expand=True)

Unnamed: 0,0,1
0,Abakanskaya CHPP JSC,100%]
1,Abakanskaya CHPP JSC,100%]
2,Abakanskaya CHPP JSC,100%]
3,Abakanskaya CHPP JSC,100%]
12,EDP España SA,100%]
...,...,...
13994,Construction Investment Zunhua Thermal Power C...,100%]
13997,Huaneng Zuoquan Coal&Power Co Ltd,100%]
13998,Huaneng Zuoquan Coal&Power Co Ltd,100%]
13999,Tameh Holding SP zoo,100%]


In [213]:
coal_plants[["Owner","Owner Ownership %"]] = coal_plants["Owner"].str.split("[",expand=True)
coal_plants["Owner Ownership %"] = coal_plants["Owner Ownership %"].str.replace("%]","",regex=True)
coal_plants = coal_plants.map(lambda x: x.strip() if isinstance(x, str) else x)
coal_df = pd.merge(coal_plants,entities.rename(columns={"Full Name":"Owner"})[["Owner","ID"]],on="Owner",how="left")

In [214]:
# Removing the owners that haven't been identified
coal_df = coal_df[coal_df["Owner"] != "to be determined"]

In [215]:
# Look if there are owners that didn't match -> an empty series means all owners matched
coal_df[coal_df["ID"].isnull()]["Owner"].value_counts()

Series([], Name: count, dtype: int64)

## Finding all entity nodes and relationships for the Coal Plant Subgraph

In [216]:
#Finding all unique immediate owners in gas network
coal_immediate_owners = coal_df["ID"].unique().tolist()

# Filter entity_owners dataframe to only include connections relevant to gas subgraph
coal_entity_relationships_df = pd.DataFrame(columns=entity_owners.columns.tolist())

for owner in coal_immediate_owners:
    df = find_relevent_entities_in_tree(owner,entity_owners=entity_owners)
    coal_entity_relationships_df = pd.concat([coal_entity_relationships_df,df],axis=0)


#Remove duplicate rows
coal_entity_relationships_df = coal_entity_relationships_df.drop_duplicates()

coal_entities = coal_entity_relationships_df["company_id"].to_list() + coal_entity_relationships_df["owner_id"].to_list()

#Remove duplicates
coal_entities = list(set(coal_entities))

coal_entities_df = entities[entities["ID"].isin(coal_entities)].reset_index().drop(columns={"index"})

  coal_entity_relationships_df = pd.concat([coal_entity_relationships_df,df],axis=0)


In [217]:
coal_df["Owner Ownership %"] = coal_df["Owner Ownership %"].astype(float)
coal_entity_capacities = db_energy_asset_exposure(coal_df.rename(columns={"Owner Ownership %":"share","GEM unit/phase ID":"Unit ID","ID":"company_id"}),"coal plant")

coal_entities_df = pd.merge(coal_entities_df,coal_entity_capacities.rename(columns={"sum":"coal plant capacity"})[["ID","coal plant capacity"]],on="ID",how="left")

# Bioenergy

## Bioenergy plant nodes and relationships

In [218]:
# There are multiple immediate owners in each cell so we need to seperate them
bio_plants["Owner"] = bio_plants["Owner"].str.split(";",expand=False)
bio_plants = bio_plants.explode("Owner")

In [219]:
# Quick test to make sure that it will only split into two columns on the ] delimiter
bio_plants["Owner"].str.split("[",expand=True)

Unnamed: 0,0,1
0,BH Energy Gap,100%]
1,Metsä Fibre Oy,100%]
2,Renergy Group Partners LLC,
3,E.ON Värme Sverige AB,100%]
4,FRI-EL Group,100%]
...,...,...
2057,Everbright Environmental Energy (Zhejiang) Hol...,70%]
2057,Hangzhou Environment Group Co Ltd,20%]
2057,Hangzhou Yuhang Urban Construction Group Co Ltd,10%]
2058,Zibo Hongda Thermoelectricity Co Ltd,100%]


In [220]:
bio_plants[["Owner","Owner Ownership %"]] = bio_plants["Owner"].str.split("[",expand=True)
bio_plants["Owner Ownership %"] = bio_plants["Owner Ownership %"].str.replace("%]","",regex=True)
bio_plants = bio_plants.map(lambda x: x.strip() if isinstance(x, str) else x)
bio_df = pd.merge(bio_plants,entities.rename(columns={"Full Name":"Owner"})[["Owner","ID"]],on="Owner",how="left")

In [221]:
# Removing the owners that haven't been identified
bio_df = bio_df[bio_df["Owner"] != "other"]

In [222]:
# Look if there are owners that didn't match -> an empty series means all owners matched
bio_df[bio_df["ID"].isnull()]["Owner"].value_counts()

Series([], Name: count, dtype: int64)

## Find all entity nodes and relationships for the Bioenergy Subgraph

In [223]:
#Finding all unique immediate owners in gas network
bio_immediate_owners = bio_df["ID"].unique().tolist()

# Filter entity_owners dataframe to only include connections relevant to gas subgraph
bio_entity_relationships_df = pd.DataFrame(columns=entity_owners.columns.tolist())

for owner in bio_immediate_owners:
    df = find_relevent_entities_in_tree(owner,entity_owners=entity_owners)
    bio_entity_relationships_df = pd.concat([bio_entity_relationships_df,df],axis=0)


#Remove duplicate rows
bio_entity_relationships_df = bio_entity_relationships_df.drop_duplicates()

bio_entities = bio_entity_relationships_df["company_id"].to_list() + bio_entity_relationships_df["owner_id"].to_list()

#Remove duplicates
bio_entities = list(set(bio_entities))

bio_entities_df = entities[entities["ID"].isin(bio_entities)].reset_index().drop(columns={"index"})

  bio_entity_relationships_df = pd.concat([bio_entity_relationships_df,df],axis=0)


In [224]:
bio_df

Unnamed: 0,Date last researched,Country,Project name,Unit name,Project name in local language / script,Other name(s),Capacity (MW),Fuel,Operating status,Is conversion?,...,State/Province,Subregion,Region,GEM location ID,GEM phase ID,Other IDs (location),Other IDs (unit/phase),Wiki URL,Owner Ownership %,ID
0,6/11/24,United Kingdom,3Rs Walsall power station,1,,Walsall EFW,44.0,bioenergy: refuse (municipal and industrial wa...,pre-construction,,...,England,Northern Europe,Europe,L100000201233,G100000201467,REPD: 12292,,https://gem.wiki/3Rs_Walsall_power_station,100,E100000132532
1,10/3/22,Finland,Äänekoski Bioproduct Mill power station,1,,Metsa Aanekoski (Sellu),260.0,"bioenergy: wood & other biomass (solids), bioe...",operating,,...,,Northern Europe,Europe,L100000200209,G100000200235,ENTSO-E: 44W-T-YT-000017B,,https://gem.wiki/Äänekoski_Bioproduct_Mill_pow...,100,E100001000052
2,6/26/24,Egypt,Abu Rawash power station,1,,ReNergy Abu Rawash,30.0,bioenergy: refuse (municipal and industrial wa...,pre-construction,,...,Giza Governorate,Northern Africa,Africa,L100001017968,G100001019958,,,https://www.gem.wiki/Abu_Rawash,,E100001016778
3,5/2/23,Sweden,Åbyverket power station,G3,,,100.0,"bioenergy: wood & other biomass (solids), foss...",operating,,...,,Northern Europe,Europe,L100000201212,G100000201440,ENTSO-E: 46WPU0000000063L,,https://gem.wiki/Åbyverket_power_station,100,E100000132631
4,10/14/22,Italy,Acerra power station,1,,,68.0,bioenergy: biodiesel,operating,,...,Campania,Southern Europe,Europe,L100000200210,G100000200236,,,https://gem.wiki/Acerra_power_station,100,E100000132666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,5/5/23,China,Zhejiang Yuhang Jiufeng power station,2,杭州九峰垃圾焚烧发电项目,,35.0,bioenergy: refuse (municipal and industrial wa...,operating,,...,Zhejiang,Eastern Asia,Asia,L100000200172,G100000200193,,,https://gem.wiki/Zhejiang_Yuhang_Jiufeng_power...,70,E100000130456
2496,5/5/23,China,Zhejiang Yuhang Jiufeng power station,2,杭州九峰垃圾焚烧发电项目,,35.0,bioenergy: refuse (municipal and industrial wa...,operating,,...,Zhejiang,Eastern Asia,Asia,L100000200172,G100000200193,,,https://gem.wiki/Zhejiang_Yuhang_Jiufeng_power...,20,E100000129695
2497,5/5/23,China,Zhejiang Yuhang Jiufeng power station,2,杭州九峰垃圾焚烧发电项目,,35.0,bioenergy: refuse (municipal and industrial wa...,operating,,...,Zhejiang,Eastern Asia,Asia,L100000200172,G100000200193,,,https://gem.wiki/Zhejiang_Yuhang_Jiufeng_power...,10,E100000129696
2498,7/2/24,China,Zibo Hongda power station,"Unit 3, timepoint 2",淄博宏达热电,,50.0,bioenergy: agricultural waste (solids),operating,,...,Shandong,Eastern Asia,Asia,L100000101441,G100001018667,,,https://www.gem.wiki/Zibo_Hongda_power_station,100,E100000122865


In [225]:
bio_df["Owner Ownership %"] = bio_df["Owner Ownership %"].astype(float)
bio_entity_capacities = db_energy_asset_exposure(bio_df.rename(columns={"Owner Ownership %":"share","GEM phase ID":"Unit ID","ID":"company_id","Operating status":"Status"}),"bioenergy")

bio_entities_df = pd.merge(bio_entities_df,bio_entity_capacities.rename(columns={"sum":"bioenergy plant capacity"})[["ID","bioenergy plant capacity"]],on="ID",how="left")

# Coal Mines

##  Coal Mine nodes and relationships

In [226]:
coal_mines = pd.read_excel("Data/Global-Coal-Mine-Tracker-April-2024.xlsx",sheet_name="Global Coal Mine Tracker (Non-C")
coal_mine_crosswalk = pd.read_excel("Data/Global Coal Mine Tracker owner comparison 2024-03-15_0909.xlsx",sheet_name="Crosswalk")


#Remove non-coal mine entities from crosswalk
coal_mine_crosswalk = coal_mine_crosswalk[coal_mine_crosswalk['source'].notnull()].reset_index()

#Remove Chinese coal mines 
coal_mines = coal_mines[coal_mines["Country"] != "China"]
coal_mines["Full Mine Name"] = coal_mines["Mine Name"].astype(str) + " " + coal_mines["Mine Name AKAs"].astype(str)

#Merge and data cleaning
coal_mines = pd.merge(coal_mines[["Owners","Capacity (Mtpa)","GEM Mine ID","Status","Owner Share","Full Mine Name","Status Detail","Project Type","Project Phase","Country"]],coal_mine_crosswalk.rename(columns={"name + LET":"Owners"}),on="Owners",how="left")

num_missing_coal_mine_owners = coal_mines[(coal_mines["Owners"] == "-") & (coal_mines["Owners"] == "-")].shape[0]

#Remove coal mines without owners
coal_mines = coal_mines[coal_mines["Owners"] != "unknown"]
coal_mines = coal_mines[coal_mines["Owners"] != "-"]

coal_mines['Capacity (Mtpa)'].replace('*', '0', inplace=True)
coal_mines['Capacity (Mtpa)'].replace('-', '0', inplace=True)
coal_mines['Capacity (Mtpa)'].replace('0.15-0.18', '0.165', inplace=True)
coal_mines = coal_mines.rename(columns={"Owners":"Owner","GEM Mine ID":"Unit ID","Owner Share":"share","id":"company_id"})

coal_mines["Full Mine Name"] = coal_mines["Full Mine Name"].map(lambda x: x.rstrip("nan"))
coal_mines = coal_mines.map(lambda x: x.strip() if isinstance(x, str) else x)

#Remove non-operating coal mines
coal_mines = coal_mines[coal_mines["Status"].isin(["Operating","Proposed"])]


## Find all entity nodes and relationships for the Coal Mine Subgraph

In [227]:
#Finding all unique immediate owners in gas network
coal_mine_immediate_owners = coal_mines["company_id"].unique().tolist()

# Filter entity_owners dataframe to only include connections relevant to gas subgraph
coal_mine_entity_relationships_df = pd.DataFrame(columns=entity_owners.columns.tolist())

for owner in coal_mine_immediate_owners:
    df = find_relevent_entities_in_tree(owner,entity_owners=entity_owners)
    coal_mine_entity_relationships_df = pd.concat([coal_mine_entity_relationships_df,df],axis=0)


#Remove duplicate rows
coal_mine_entity_relationships_df = coal_mine_entity_relationships_df.drop_duplicates()

coal_mine_entities = coal_mine_entity_relationships_df["company_id"].to_list() + coal_mine_entity_relationships_df["owner_id"].to_list()

#Remove duplicates
coal_mine_entities = list(set(coal_mine_entities))

coal_mine_entities_df = entities[entities["ID"].isin(coal_mine_entities)].reset_index().drop(columns={"index"})

  coal_mine_entity_relationships_df = pd.concat([coal_mine_entity_relationships_df,df],axis=0)


In [228]:
coal_mines["share"] = coal_mines["share"].astype(float)
coal_mine_entity_capacities = db_energy_asset_exposure(coal_mines,"coal mine")

coal_mine_entities_df = pd.merge(coal_mine_entities_df,coal_mine_entity_capacities.rename(columns={"sum":"coal mine capacity"})[["ID","coal mine capacity"]],on="ID",how="left")

# Steel Plants

In [229]:
#steel_emissions = steel_emissions[steel_emissions["gas"] == "co2"]
#steel_emissions = steel_emissions.groupby(["source_id","source_name"])["emissions_quantity"].sum()
#steel_emissions = steel_emissions.to_frame().reset_index()

In [230]:
#steel_w_emissions = pd.merge(steel_plants,steel_emissions.rename(columns={"source_name":"Plant name (English)"})[["Plant name (English)","emissions_quantity","source_id"]],on="Plant name (English)",how="left")

In [231]:
#steel_w_emissions.to_excel("crosswalk_between_steel_and_emissions.xlsx",index=False)

In [232]:
#steel_w_emissions.info()

In [233]:
#steel_w_emissions[steel_w_emissions["Parent GEM ID"].str.contains("E100000001319")][["source_id","Plant name (English)","Country","Owner","Owner GEM ID","Parent","Parent GEM ID","emissions_quantity"]]

## Steel Plant Nodes and Relationships

In [234]:
#Remove steel plants without owners
steel_plants = steel_plants[steel_plants["Owner"] != "unknown"]
steel_plants['Nominal crude steel capacity (ttpa)'].replace('>0', '0', inplace=True)

## Find all entity nodes and relationships for the Steel Plant Subgraph

In [235]:
#Finding all unique immediate owners in gas network
steel_plant_immediate_owners = steel_plants["Owner GEM ID"].unique().tolist()

# Filter entity_owners dataframe to only include connections relevant to gas subgraph
steel_plant_entity_relationships_df = pd.DataFrame(columns=entity_owners.columns.tolist())

for owner in steel_plant_immediate_owners:
    df = find_relevent_entities_in_tree(owner,entity_owners=entity_owners)
    steel_plant_entity_relationships_df = pd.concat([steel_plant_entity_relationships_df,df],axis=0)


#Remove duplicate rows
steel_plant_entity_relationships_df = steel_plant_entity_relationships_df.drop_duplicates()

steel_plant_entities = steel_plant_entity_relationships_df["company_id"].to_list() + steel_plant_entity_relationships_df["owner_id"].to_list()

#Remove duplicates
steel_plant_entities = list(set(steel_plant_entities))

steel_plant_entities_df = entities[entities["ID"].isin(steel_plant_entities)].reset_index().drop(columns={"index"})

steel_plants["share"] = "100"

  steel_plant_entity_relationships_df = pd.concat([steel_plant_entity_relationships_df,df],axis=0)


In [236]:
steel_plants

Unnamed: 0,Plant ID,Plant name (English),Plant name (other language),Other plant names (English),Other plant names (other language),Owner,Owner (other language),Owner GEM ID,Owner PermID,SOE Status,...,ISO 14001,ISO 50001,ResponsibleSteel Certification,Main production process,Main production equipment,Detailed production equipment,Power source,Iron ore source,Met coal source,share
0,P100000120882,Aba Iron and Steel Payas plant,ABA DEMİR ÇELİK (Turkish),"EEY Iron and Steel, Nursan Steek Payas Plant (...",,ABA Çelik Demir AŞ,,E100000131190,unknown,,...,,,,electric,EAF,1 EAF (110-tonne),,,,100
1,P100000120753,Abba Steel Ohangwena steel plant,,Groot Suisse Oshana plant,,Abba Steel Ltd.,,E100001012072,unknown,,...,,,,electric,EAF,EAF (# unknown),,,,100
2,P100000120802,Abinsk Electric Steel Works,Абинский ЭлектроМеталлургический завод (Russian),AEMZ; AESW; AEMK; ASW,,Abinski Elektrometallurgicheski Zavod OOO,,E100000130999,5039667129,,...,2022,,,electric,EAF,1 EAF (130-tonne),unknown,,unknown,100
3,P100000120020,Abul Khair Steel Sitakunda plant,,AKS Sitakund; AKS Long Steel; AKS Melting,,Abul Khair Steel Ltd,,E100000131068,5074007077,,...,,,,electric,EAF,1 EAF,unknown,,,100
4,P100000120620,Acciaierie d'Italia Taranto steel plant,,ILVA Taranto steel plant (predecessor); ILVA S...,,Acciaierie d'italia SpA,,E100001010116,5067495106,Partial,...,2022,2022,,integrated (BF),"BF, BOF",coking plant; sinter plant; 4 BOF,,,,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1843,P100000120403,Zunyi Changling Special Steel Co Ltd,遵义长岭特殊钢有限公司 110电炉,,,"Zunyi Changling Special Steel Co., Ltd.",遵义长岭特殊钢有限公司,E100000127332,unknown,,...,,,,electric,EAF,1 EAF (1x110-tonne),Waste Heat Power Generation,,,100
1844,P100000120403,Zunyi Changling Special Steel Co Ltd,遵义长岭特殊钢有限公司 110电炉,,,"Zunyi Changling Special Steel Co., Ltd.",遵义长岭特殊钢有限公司,E100000127332,unknown,,...,,,,electric,EAF,1 EAF (1x110-tonne),unknown,,,100
1845,P100000120359,Zunyi Fuxin Special Steel Equipment Manufactur...,遵义福鑫特殊钢装备制造有限公司 75*2电炉,,,Zunyi Fuxin Special Steel Equipment Manufactur...,遵义福鑫特殊钢装备制造有限公司,E100000126901,unknown,,...,Yes,unknown,,electric,EAF,2 EAF (2x75-tonne),State Grid,,,100
1846,P100000120359,Zunyi Fuxin Special Steel Equipment Manufactur...,遵义福鑫特殊钢装备制造有限公司 100电炉,,,Zunyi Fuxin Special Steel Equipment Manufactur...,遵义福鑫特殊钢装备制造有限公司,E100000126901,unknown,,...,Yes,unknown,,electric,EAF,"2 EAF (1x100-tonne, 1x50-tonne)",State Grid,,,100


In [237]:
steel_plants["share"] = steel_plants["share"].astype(float)
steel_entity_capacities = db_energy_asset_exposure(steel_plants.rename(columns={"Capacity operating status":"Status","Owner GEM ID":"company_id"}),"steel")

steel_plant_entities_df = pd.merge(steel_plant_entities_df,steel_entity_capacities.rename(columns={"sum":"steel plant capacity"})[["ID","steel plant capacity"]],on="ID",how="left")

# Loading into CSV for use in Neo4J upload

## Energy Project Nodes

In [238]:
gas_plants["Energy_Project_Node_Name"] = gas_plants["Plant name"] + " " + gas_plants["Unit name"]
gas_plants["Energy_Project_Type"] = "Gas or Oil Plant"
gas_plants = gas_plants.rename(columns={"Country/Area":"Country"})

In [239]:
coal_plants["Energy_Project_Node_Name"] = coal_plants["Plant name"] + " " + coal_plants["Unit name"]
coal_plants["Energy_Project_Type"] = "Coal Plant"

In [240]:
bio_plants["Energy_Project_Type"] = "Bioenergy Plant"
bio_plants = bio_plants.rename(columns={"Operating status":"Status","Project name":"Energy_Project_Node_Name"})

In [241]:
coal_mines["Energy_Project_Type"] = "Coal Mine"
coal_mines = coal_mines.rename(columns={"Full Mine Name":"Energy_Project_Node_Name"})

In [242]:
steel_plants["Energy_Project_Type"] = "Steel Plant"
steel_plants = steel_plants.rename(columns={"Plant name (English)":"Energy_Project_Node_Name","Capacity operating status":"Status"})

In [243]:
energy_project_nodes = gas_plants[["Energy_Project_Node_Name","Energy_Project_Type","Capacity (MW)","Status","Country"]]
energy_project_nodes = pd.concat([energy_project_nodes,coal_plants[["Energy_Project_Node_Name","Energy_Project_Type","Capacity (MW)","Status","Country"]]],axis=0)
energy_project_nodes = pd.concat([energy_project_nodes,bio_plants[["Energy_Project_Node_Name","Energy_Project_Type","Capacity (MW)","Status","Country"]]],axis=0)
energy_project_nodes = pd.concat([energy_project_nodes,coal_mines[["Energy_Project_Node_Name","Energy_Project_Type","Capacity (Mtpa)","Status","Country"]]],axis=0)
energy_project_nodes = pd.concat([energy_project_nodes,steel_plants[["Energy_Project_Node_Name","Energy_Project_Type","Nominal crude steel capacity (ttpa)","Status","Country"]]],axis=0)
energy_project_nodes.to_csv("Data/energy_project_nodes.csv",index=False)

## Entity Nodes

In [244]:
entity_nodes = gas_entities_df[["ID","Name","Entity Type","Legal Entity Type","PubliclyListed","Country","gas capacity"]]
entity_nodes = pd.concat([entity_nodes,coal_entities_df[["ID","Name","Entity Type","Legal Entity Type","PubliclyListed","Country","coal plant capacity"]]],axis=0)
entity_nodes = pd.concat([entity_nodes,bio_entities_df[["ID","Name","Entity Type","Legal Entity Type","PubliclyListed","Country","bioenergy plant capacity"]]],axis=0)
entity_nodes = pd.concat([entity_nodes,coal_mine_entities_df[["ID","Name","Entity Type","Legal Entity Type","PubliclyListed","Country","coal mine capacity"]]],axis=0)
entity_nodes = pd.concat([entity_nodes,steel_plant_entities_df[["ID","Name","Entity Type","Legal Entity Type","PubliclyListed","Country","steel plant capacity"]]],axis=0)

#entity_nodes = entity_nodes.drop_duplicates(subset="ID", keep="first")

# Removing some entities that aren't actually linked
entity_nodes = entity_nodes[entity_nodes.Name != "small shareholder(s)"]
entity_nodes = entity_nodes[entity_nodes.Name != "unknown"]
entity_nodes = entity_nodes[entity_nodes.Name != "other"]
entity_nodes = entity_nodes[entity_nodes.Name != "natural person(s)"]
entity_nodes = entity_nodes[entity_nodes.Name != "Co-investment by natural persons"]

In [245]:
entity_nodes['gas capacity'] = entity_nodes['gas capacity'].fillna(0)
entity_nodes['coal plant capacity'] = entity_nodes['coal plant capacity'].fillna(0)
entity_nodes['bioenergy plant capacity'] = entity_nodes['bioenergy plant capacity'].fillna(0)
entity_nodes['coal mine capacity'] = entity_nodes['coal mine capacity'].fillna(0)
entity_nodes['steel plant capacity'] = entity_nodes['steel plant capacity'].fillna(0)

In [246]:
entity_nodes = entity_nodes.groupby(["ID","Name","Entity Type","Legal Entity Type","PubliclyListed","Country"])[["gas capacity","coal plant capacity","bioenergy plant capacity","coal mine capacity","steel plant capacity"]].sum().reset_index()

In [247]:
entity_nodes.to_csv("Data/entity_nodes.csv",index=False)

## Relationships

In [248]:
gas_entity_relationships_df = gas_entity_relationships_df.rename(columns={"company_id":"from","owner_id":"to"})
gas_df["from"] = gas_df["Plant name"] + " " + gas_df["Unit name"]
gas_df = gas_df.rename(columns={"ID":"to","Owner Ownership %":"share"})

coal_entity_relationships_df = coal_entity_relationships_df.rename(columns={"company_id":"from","owner_id":"to"})
coal_df["from"] = coal_df["Plant name"] + " " + coal_df["Unit name"]
coal_df = coal_df.rename(columns={"ID":"to","Owner Ownership %":"share"})

bio_entity_relationships_df = bio_entity_relationships_df.rename(columns={"company_id":"from","owner_id":"to"})
bio_df["from"] = bio_df["Project name"]
bio_df = bio_df.rename(columns={"ID":"to","Owner Ownership %":"share"})

coal_mine_entity_relationships_df = coal_mine_entity_relationships_df.rename(columns={"company_id":"from","owner_id":"to"})
coal_mines["from"] = coal_mines["Energy_Project_Node_Name"]
coal_mines = coal_mines.rename(columns={"company_id":"to"})

steel_plant_entity_relationships_df = steel_plant_entity_relationships_df.rename(columns={"company_id":"from","owner_id":"to"})
steel_plants["from"] = steel_plants["Energy_Project_Node_Name"]
steel_plants = steel_plants.rename(columns={"Owner GEM ID":"to"})

In [249]:
relationships = pd.concat([gas_entity_relationships_df[["from","to","share"]],gas_df[["from","to","share"]],
                           coal_entity_relationships_df[["from","to","share"]],coal_df[["from","to","share"]],
                           bio_entity_relationships_df[["from","to","share"]],bio_df[["from","to","share"]],
                           coal_mine_entity_relationships_df[["from","to","share"]],coal_mines[["from","to","share"]],
                           steel_plant_entity_relationships_df[["from","to","share"]],steel_plants[["from","to","share"]]],axis=0).reset_index().drop("index",axis=1)

In [250]:
relationships = relationships.drop_duplicates()

In [251]:
relationships.to_csv("Data/relationships.csv",index=False)