In [118]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

# Production and emission data for the oil and gas on the Norwegian Continental Shelf


This notebook serves to gather all relevant production and emission data for the oil and gas industry on the Norwegian Continental Shelf. The data is gathered from the Norwegian Petroleum Directorate (NPD) and the Norwegian Environment Agency (NEA). The data is then processed and stored in a structured format for further analysis.

The data is gathered from the following sources:

- [NPD](https://factpages.npd.no/factpages/Default.aspx?culture=en)
- [IEA](https://www.iea.org/)

General information about the Norwegian oil industry can be found on [Norsk Petroleum](https://www.norskpetroleum.no/en/).


## Table of Contents:

1. [Importing data](#Importing-and-preparing-data)
   1. [Production](#Production)
   2. [Operators](#Operators)
   3. [Licenses](#Licenses)
   4. [Investments](#Investments)
   5. [Future Investments](#Future-Investments)
   6. [Emissions](#Emissions)
2. [Data analysis](#Data-analysis)
   1. [Production](#Production)
   2. [Emissions](#Emissions)
   3. [Operators](#Operators)
   4. [Licenses](#Licenses)
3. [Data manipulation](#Data-manipulation)
4. [Data visualization](#Data-visualization)


## Data Importing and Preparation


### Production


Attribute information can be found at the [SODIR Factpages](https://factpages.sodir.no/en/field/Attributes) website.


In [119]:
def fetch_dataframe(url, sep=';', filetype='csv'):
	# Difi hotell uses comma separated values
	if filetype == 'csv':
		df = pd.read_csv(url, sep=sep)
	elif filetype == 'excel':
		df = pd.read_excel(url)
	return df


In [120]:
# ------ Legacy Data Sources ------
#production_url = 'https://hotell.difi.no/download/npd/field/production-monthly-by-field'

# ------ New Data Sources ------
production_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/field_production_monthly&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
production_monthly_df = fetch_dataframe(production_url, sep=',')

In [121]:
production_monthly_df

Unnamed: 0,prfInformationCarrier,prfYear,prfMonth,prfPrdOilNetMillSm3,prfPrdGasNetBillSm3,prfPrdNGLNetMillSm3,prfPrdCondensateNetMillSm3,prfPrdOeNetMillSm3,prfPrdProducedWaterInFieldMillSm3,prfNpdidInformationCarrier
0,16/1-12 Troldhaugen,2021,9,0.0,0.00173,0.0,0.00000,0.00173,0.00719,17196400
1,16/1-12 Troldhaugen,2021,10,0.0,0.00250,0.0,0.00000,0.00250,0.00912,17196400
2,16/1-12 Troldhaugen,2021,11,0.0,0.00199,0.0,0.00000,0.00199,0.01186,17196400
3,16/1-12 Troldhaugen,2021,12,0.0,0.00104,0.0,0.00000,0.00104,0.00418,17196400
4,16/1-12 Troldhaugen,2022,1,0.0,0.00062,0.0,0.00000,0.00062,0.00926,17196400
...,...,...,...,...,...,...,...,...,...,...
25422,AASTA HANSTEEN,2023,10,0.0,0.73437,0.0,0.01728,0.75165,0.00196,23395946
25423,AASTA HANSTEEN,2023,11,0.0,0.69432,0.0,0.01636,0.71068,0.00210,23395946
25424,AASTA HANSTEEN,2023,12,0.0,0.77522,0.0,0.01681,0.79203,0.00203,23395946
25425,AASTA HANSTEEN,2024,1,0.0,0.72317,0.0,0.01580,0.73897,0.00194,23395946


### Operators


In [122]:
operators_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/field_operator_hst&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
operators_df = fetch_dataframe(operators_url)

In [123]:

display(operators_df)


Unnamed: 0,"fldName,cmpLongName,fldOperatorFrom,fldOperatorTo,fldNpdidField,cmpNpdidCompany,fldOperatorDateUpdated,datesyncNPD"
0,"ALBUSKJELL,Phillips Petroleum Company Norway,2..."
1,"ALBUSKJELL,ConocoPhillips Norge,23.10.2002,31...."
2,"ALBUSKJELL,Phillips Petroleum Norsk AS,01.04.2..."
3,"ALBUSKJELL,ConocoPhillips Skandinavia AS,06.05..."
4,"ALVE,Statoil ASA (old),16.03.2007,30.09.2007,4..."
...,...
553,"ÅSGARD,StatoilHydro Petroleum AS,01.01.2009,31..."
554,"ÅSGARD,Statoil Petroleum AS,01.11.2009,15.05.2..."
555,"ÅSGARD,Equinor Energy AS,16.05.2018,,43765,320..."
556,"AASTA HANSTEEN,Statoil Petroleum AS,07.06.2013..."


### Licenses


In [124]:
licensees_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/field_licensee_hst&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
licensees_df = fetch_dataframe(licensees_url)

In [125]:
display(licensees_df)

Unnamed: 0,"fldName,fldOwnerName,fldOwnerKind,fldOwnerFrom,fldOwnerTo,fldLicenseeFrom,fldLicenseeTo,cmpLongName,fldCompanyShare,fldSdfiShare,fldNpdidField,cmpNpdidCompany,fldLicenseeDateUpdated,DatesyncNPD"
0,"ALBUSKJELL,018,PRODUCTION LICENSE,25.04.1975,,..."
1,"ALBUSKJELL,018,PRODUCTION LICENSE,25.04.1975,,..."
2,"ALBUSKJELL,018,PRODUCTION LICENSE,25.04.1975,,..."
3,"ALBUSKJELL,018,PRODUCTION LICENSE,25.04.1975,,..."
4,"ALBUSKJELL,018,PRODUCTION LICENSE,25.04.1975,,..."
...,...
9429,"AASTA HANSTEEN,218,PRODUCTION LICENSE,07.06.20..."
9430,"AASTA HANSTEEN,218,PRODUCTION LICENSE,07.06.20..."
9431,"AASTA HANSTEEN,218,PRODUCTION LICENSE,07.06.20..."
9432,"AASTA HANSTEEN,218,PRODUCTION LICENSE,07.06.20..."


### Investments


In [126]:
investments_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/field_investment_yearly&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
investments_df = fetch_dataframe(investments_url)

In [127]:
display(investments_df)

Unnamed: 0,"prfInformationCarrier,prfYear,prfInvestmentsMillNOK,prfNpdidInformationCarrier,dateSyncNPD"
0,"ALBUSKJELL,1974,116,43437,21.05.2024"
1,"ALBUSKJELL,1975,319,43437,21.05.2024"
2,"ALBUSKJELL,1976,879,43437,21.05.2024"
3,"ALBUSKJELL,1977,398,43437,21.05.2024"
4,"ALBUSKJELL,1978,235,43437,21.05.2024"
...,...
3152,"AASTA HANSTEEN,2021,119,23395946,21.05.2024"
3153,"AASTA HANSTEEN,2022,298,23395946,21.05.2024"
3154,"AASTA HANSTEEN,2023,0,23395946,21.05.2024"
3155,"AASTA HANSTEEN,2024,0,23395946,21.05.2024"


### Future investments


In [128]:
future_investments_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/field_investment_expected&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
future_investments_df = fetch_dataframe(future_investments_url, sep=',')

In [129]:
display(future_investments_df)

Unnamed: 0,fldName,fldInvestmentExpected,fldInvExpFixYear,fldNpdidField
0,ALVE,1167,2023,4444332
1,ALVE NORD,6602,2023,42002483
2,ALVHEIM,14290,2023,2845712
3,BALDER,31306,2023,43562
4,BAUGE,74,2023,29446221
...,...,...,...,...
101,YME,972,2023,43807
102,ÆRFUGL NORD,29,2023,38542241
103,ØRN,6848,2023,42002484
104,ÅSGARD,14137,2023,43765


### Facilities (rigs)


#### 1. Fixed facilities (rigs)


In [130]:
fixed_facilities_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/facility_fixed&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
fixed_facilities_df = fetch_dataframe(fixed_facilities_url)

In [131]:
display(fixed_facilities_df)
display(fixed_facilities_df.fclKind.value_counts())

Unnamed: 0,"fclName,fclPhase,fclSurface,fclCurrentOperatorName,fclKind,fclBelongsToName,fclBelongsToKind,fclBelongsToS,fclStartupDate,fclGeodeticDatum,fclNsDeg,fclNsMin,fclNsSec,fclNsCode,fclEwDeg,fclEwMin,fclEwSec,fclEwCode,fclWaterDepth,fclFunctions,fclDesignLifetime,fclNationName,fclFactPageUrl,fclFactMapUrl,fclNpdidFacility,fclDateUpdated,datesyncNPD"
0,"1/2-1 IM Blane,IN SERVICE,N,Repsol Norge AS,SI..."
1,"1/2-1 PE Blane,IN SERVICE,N,Repsol Norge AS,SI..."
2,"1/2-1 PW Blane,IN SERVICE,N,Repsol Norge AS,SI..."
3,"10/1-CDP1,PARTLY REMOVED,Y,,CONCRETE STRUCTURE..."
4,"15/12-C Rev,SHUT DOWN,N,Repsol Norge AS,SINGLE..."
...,...
865,"AASTA HANSTEEN D,IN SERVICE,N,Equinor Energy A..."
866,"AASTA HANSTEEN E,IN SERVICE,N,Equinor Energy A..."
867,"AASTA HANSTEEN PLEM,IN SERVICE,N,Gassco AS,SUB..."
868,"AASTA HANSTEEN SPAR,IN SERVICE,Y,Equinor Energ..."


AttributeError: 'DataFrame' object has no attribute 'fclKind'

#### 2. Movable facilities (rigs)


In [None]:
movable_facilities_url = 'https://factpages.sodir.no/public?/Factpages/external/tableview/facility_moveable&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&IpAddress=not_used&CultureCode=en&rs:Format=CSV&Top100=false'
movable_facilities_df = fetch_dataframe(movable_facilities_url)

In [None]:
display(movable_facilities_df)

### Emissions


#### CO2 emissions


In [277]:
base_url = "https://github.com/percw/Norwegian_oil_gas_decarbonization/raw/main/data/raw_data/emission_and_production/"

emissions_co2_url = base_url + "Emissions_CO2.xlsx"
emissions_co2_df = fetch_dataframe(emissions_co2_url, filetype='excel')


In [278]:
# Setting row 0 as column names
emissions_co2_df.columns = emissions_co2_df.iloc[1]
emissions_co2_df = emissions_co2_df[2:]


In [279]:
display(emissions_co2_df)

1,Anleggsnavn,Fylke,Kommune,År,Årlig utslipp til luft,Grunnlagsverdi for luft,Årlig utslipp til vann,Grunnlagsverdi for vann,Årlig utslipp til undergrunn,Enhet,Org.nr.
2,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2022,180.655154,,,,,1000 tonn,993246298
3,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2023,188.26798,,,,,1000 tonn,993246298
4,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1997,761.336994,,,,,1000 tonn,893246592
5,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1998,418.303952,,,,,1000 tonn,893246592
6,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1999,347.05988,,,,,1000 tonn,893246592
...,...,...,...,...,...,...,...,...,...,...,...
1314,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2018,72.503572,,,,,1000 tonn,912731456
1315,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2019,177.647453,,,,,1000 tonn,912731456
1316,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2020,192.207951,,,,,1000 tonn,912731456
1317,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2021,191.436769,,,,,1000 tonn,912731456


#### Methane emissions


In [280]:
emissions_ch4_url = base_url + "Emissions_methane.xlsx"
emissions_ch4_df = fetch_dataframe(emissions_ch4_url, filetype='excel')

In [281]:
# Setting row 0 as column names
emissions_ch4_df.columns = emissions_ch4_df.iloc[1]
emissions_ch4_df = emissions_ch4_df[2:]

display(emissions_ch4_df)

1,Anleggsnavn,Fylke,Kommune,År,Årlig utslipp til luft,Grunnlagsverdi for luft,Årlig utslipp til vann,Grunnlagsverdi for vann,Årlig utslipp til undergrunn,Enhet,Org.nr.
2,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2022,36.071693,,,,,tonn,993246298
3,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2023,66.983955,,,,,tonn,993246298
4,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1997,383.058529,,,,,tonn,893246592
5,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1998,826.358898,,,,,tonn,893246592
6,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1999,1042.015537,,,,,tonn,893246592
...,...,...,...,...,...,...,...,...,...,...,...
1314,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2018,67.148045,,,,,tonn,912731456
1315,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2019,93.355221,,,,,tonn,912731456
1316,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2020,94.063341,,,,,tonn,912731456
1317,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2021,103.162276,,,,,tonn,912731456


#### NOX emissions


In [282]:
emissions_nox_url = base_url + "Emissions_NOX.xlsx"
emissions_nox_df = fetch_dataframe(emissions_nox_url, filetype='excel')

In [283]:
# Setting row 0 as column names
emissions_nox_df.columns = emissions_nox_df.iloc[1]
emissions_nox_df = emissions_nox_df[2:]

display(emissions_nox_df)

1,Anleggsnavn,Fylke,Kommune,År,Årlig utslipp til luft,Grunnlagsverdi for luft,Årlig utslipp til vann,Grunnlagsverdi for vann,Årlig utslipp til undergrunn,Enhet,Org.nr.
2,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2022,404.47072,,,,,tonn,993246298
3,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2023,306.38291,,,,,tonn,993246298
4,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1997,2944.992072,,,,,tonn,893246592
5,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1998,1792.114235,,,,,tonn,893246592
6,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1999,1412.344364,,,,,tonn,893246592
...,...,...,...,...,...,...,...,...,...,...,...
1314,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2018,988.44101,,,,,tonn,912731456
1315,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2019,202.908995,,,,,tonn,912731456
1316,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2020,147.469687,,,,,tonn,912731456
1317,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2021,138.530349,,,,,tonn,912731456


#### Oil spill emissions


In [284]:
emissions_oil_spill = base_url + "Emissions_oil.xlsx"
emissions_oil_spill_df = fetch_dataframe(emissions_oil_spill, filetype='excel')

In [285]:
# Setting row 0 as column names
emissions_oil_spill_df.columns = emissions_oil_spill_df.iloc[1]
emissions_oil_spill_df = emissions_oil_spill_df[2:]

In [286]:
emissions_oil_spill_df

1,Anleggsnavn,Fylke,Kommune,År,Årlig utslipp til luft,Grunnlagsverdi for luft,Årlig utslipp til vann,Grunnlagsverdi for vann,Årlig utslipp til undergrunn,Enhet,Org.nr.
2,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2022,,,16.327993,,,tonn,993246298
3,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2023,,,24.543974,,,tonn,993246298
4,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1997,,,1.967516,,,tonn,893246592
5,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1998,,,3.134315,,,tonn,893246592
6,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1999,,,11.709334,,,tonn,893246592
...,...,...,...,...,...,...,...,...,...,...,...
1156,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2018,,,0.183095,,,tonn,912731456
1157,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2019,,,0.771865,,,tonn,912731456
1158,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2020,,,1.465783,,,tonn,912731456
1159,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2021,,,0.397254,,,tonn,912731456


#### Emission to water


In [287]:
emissions_water = base_url + "Emissions_water.xlsx"
emissions_water_df = fetch_dataframe(emissions_water, filetype='excel')


In [288]:
# Setting row 0 as column names

emissions_water_df.columns = emissions_water_df.iloc[1]
emissions_water_df = emissions_water_df[2:]

In [289]:
emissions_water_df

1,Anleggsnavn,Fylke,Kommune,År,Årlig utslipp til luft,Grunnlagsverdi for luft,Årlig utslipp til vann,Grunnlagsverdi for vann,Årlig utslipp til undergrunn,Enhet,Org.nr.
2,Statfjord (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2005,,,78650815.6,,320517,m³,993246794
3,Statfjord (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2006,,,67105468,,,m³,993246794
4,Statfjord (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2007,,,76170997,,,m³,993246794
5,Statfjord (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2008,,,65541633,,,m³,993246794
6,Statfjord (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2009,,,43587479,,,m³,993246794
...,...,...,...,...,...,...,...,...,...,...,...
1131,Volve (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2008,,,407519.95147,,2417769.99149,m³,993246875
1132,Volve (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2009,,,261309.21675,,4392197.6152,m³,993246875
1133,Volve (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2010,,,76908.31861,,4569394.4024,m³,993246875
1134,Volve (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2011,,,78509.7732,,3442446.4979,m³,993246875


## Data Cleaning


In [290]:
# Renaming columns from Norwegian to English

def clean_emissions_df(df, emissions_type, unit, water_or_air='luft'):
	if water_or_air == 'vann':
		df = df.rename(columns={'År': 'year', 'Anleggsnavn': 'field', f'Årlig utslipp til {water_or_air}': f'yearly_{emissions_type}_emissions_{unit}', 'Org.nr.': 'org_number', 'Årlig utslipp til undergrunn': f'yearly_subsea_{emissions_type}_emissions'})
		df = df[['field', 'year', f'yearly_{emissions_type}_emissions_{unit}', 'org_number', f'yearly_subsea_{emissions_type}_emissions']]
	
	else:
		df = df.rename(columns={'År': 'year', 'Anleggsnavn': 'field', f'Årlig utslipp til {water_or_air}': f'yearly_{emissions_type}_emissions_{unit}', 'Org.nr.': 'org_number'})
		df = df[['field', 'year', f'yearly_{emissions_type}_emissions_{unit}', 'org_number']]
	
	df[f'yearly_{emissions_type}_emissions_{unit}'] = pd.to_numeric(df[f'yearly_{emissions_type}_emissions_{unit}'], errors='coerce')
	df['year'] = pd.to_numeric(df['year'], errors='coerce')
	df['operator'] = df['field'].str.extract(r'\((.*?)\)')
	df['field'] = df['field'].str.replace(r"\(.*\)","")
	return df


In [291]:
emissions_co2_df = clean_emissions_to_air_df(emissions_co2_df, 'co2', '1000_tonnes')

  df['field'] = df['field'].str.replace(r"\(.*\)","")


In [292]:
emissions_co2_df

1,field,year,yearly_co2_emissions_1000_tonnes,org_number,operator
2,Grane,2022,180.655154,993246298,Equinor energy as
3,Grane,2023,188.267980,993246298,Equinor energy as
4,Norne,1997,761.336994,893246592,Equinor energy as
5,Norne,1998,418.303952,893246592,Equinor energy as
6,Norne,1999,347.059880,893246592,Equinor energy as
...,...,...,...,...,...
1314,Aasta hansteen,2018,72.503572,912731456,Equinor energy as
1315,Aasta hansteen,2019,177.647453,912731456,Equinor energy as
1316,Aasta hansteen,2020,192.207951,912731456,Equinor energy as
1317,Aasta hansteen,2021,191.436769,912731456,Equinor energy as


In [293]:
emissions_ch4_df

1,Anleggsnavn,Fylke,Kommune,År,Årlig utslipp til luft,Grunnlagsverdi for luft,Årlig utslipp til vann,Grunnlagsverdi for vann,Årlig utslipp til undergrunn,Enhet,Org.nr.
2,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2022,36.071693,,,,,tonn,993246298
3,Grane (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2023,66.983955,,,,,tonn,993246298
4,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1997,383.058529,,,,,tonn,893246592
5,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1998,826.358898,,,,,tonn,893246592
6,Norne (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,1999,1042.015537,,,,,tonn,893246592
...,...,...,...,...,...,...,...,...,...,...,...
1314,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2018,67.148045,,,,,tonn,912731456
1315,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2019,93.355221,,,,,tonn,912731456
1316,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2020,94.063341,,,,,tonn,912731456
1317,Aasta hansteen (Equinor energy as),Kontinentalsokkelen,Kontinentalsokkelen,2021,103.162276,,,,,tonn,912731456


In [294]:
# Methane emissions

emissions_ch4_df = clean_emissions_to_air_df(emissions_ch4_df, 'ch4', 'tons')

  df['field'] = df['field'].str.replace(r"\(.*\)","")


In [295]:
emissions_ch4_df

1,field,year,yearly_ch4_emissions_tons,org_number,operator
2,Grane,2022,36.071693,993246298,Equinor energy as
3,Grane,2023,66.983955,993246298,Equinor energy as
4,Norne,1997,383.058529,893246592,Equinor energy as
5,Norne,1998,826.358898,893246592,Equinor energy as
6,Norne,1999,1042.015537,893246592,Equinor energy as
...,...,...,...,...,...
1314,Aasta hansteen,2018,67.148045,912731456,Equinor energy as
1315,Aasta hansteen,2019,93.355221,912731456,Equinor energy as
1316,Aasta hansteen,2020,94.063341,912731456,Equinor energy as
1317,Aasta hansteen,2021,103.162276,912731456,Equinor energy as


In [296]:
# NOX emissions

emissions_nox_df = clean_emissions_df(emissions_nox_df, 'nox', 'tons')
emissions_nox_df

  df['field'] = df['field'].str.replace(r"\(.*\)","")


1,field,year,yearly_nox_emissions_tons,org_number,operator
2,Grane,2022,404.470720,993246298,Equinor energy as
3,Grane,2023,306.382910,993246298,Equinor energy as
4,Norne,1997,2944.992072,893246592,Equinor energy as
5,Norne,1998,1792.114235,893246592,Equinor energy as
6,Norne,1999,1412.344364,893246592,Equinor energy as
...,...,...,...,...,...
1314,Aasta hansteen,2018,988.441010,912731456,Equinor energy as
1315,Aasta hansteen,2019,202.908995,912731456,Equinor energy as
1316,Aasta hansteen,2020,147.469687,912731456,Equinor energy as
1317,Aasta hansteen,2021,138.530349,912731456,Equinor energy as


In [297]:
# Emissions to water

emissions_water_df = clean_emissions_df(emissions_water_df, 'water', 'm3', 'vann')
emissions_water_df

  df['field'] = df['field'].str.replace(r"\(.*\)","")


1,field,year,yearly_water_emissions_m3,org_number,yearly_subsea_water_emissions,operator
2,Statfjord,2005,7.865082e+07,993246794,320517,Equinor energy as
3,Statfjord,2006,6.710547e+07,993246794,,Equinor energy as
4,Statfjord,2007,7.617100e+07,993246794,,Equinor energy as
5,Statfjord,2008,6.554163e+07,993246794,,Equinor energy as
6,Statfjord,2009,4.358748e+07,993246794,,Equinor energy as
...,...,...,...,...,...,...
1131,Volve,2008,4.075200e+05,993246875,2417769.99149,Equinor energy as
1132,Volve,2009,2.613092e+05,993246875,4392197.6152,Equinor energy as
1133,Volve,2010,7.690832e+04,993246875,4569394.4024,Equinor energy as
1134,Volve,2011,7.850977e+04,993246875,3442446.4979,Equinor energy as


In [298]:
emissions_oil_spill_df = clean_emissions_df(emissions_oil_spill_df, 'oil_spill', 'tons', 'vann')
emissions_oil_spill_df

  df['field'] = df['field'].str.replace(r"\(.*\)","")


1,field,year,yearly_oil_spill_emissions_tons,org_number,yearly_subsea_oil_spill_emissions,operator
2,Grane,2022,16.327993,993246298,,Equinor energy as
3,Grane,2023,24.543974,993246298,,Equinor energy as
4,Norne,1997,1.967516,893246592,,Equinor energy as
5,Norne,1998,3.134315,893246592,,Equinor energy as
6,Norne,1999,11.709334,893246592,,Equinor energy as
...,...,...,...,...,...,...
1156,Aasta hansteen,2018,0.183095,912731456,,Equinor energy as
1157,Aasta hansteen,2019,0.771865,912731456,,Equinor energy as
1158,Aasta hansteen,2020,1.465783,912731456,,Equinor energy as
1159,Aasta hansteen,2021,0.397254,912731456,,Equinor energy as


In [321]:
# Removing yearly_subsea_oil_spill_emissions column since it has only NaN values

emissions_oil_spill_df = emissions_oil_spill_df.drop(columns='yearly_subsea_oil_spill_emissions')


In [322]:
# Converting field and operator to lower case strings

emissions_dfs = [emissions_co2_df, emissions_ch4_df, emissions_nox_df, emissions_water_df, emissions_oil_spill_df]

for df in emissions_dfs:
	df['field'] = df['field'].str.lower()
	df['operator'] = df['operator'].str.lower()
	# orgnumber to int64
	df['org_number'] = pd.to_numeric(df['org_number'], errors='coerce')



## Data Merging


In [323]:
# Checking the shape of all emission dataframes

for df in emissions_dfs:
	print(df.shape)

(1317, 5)
(1317, 5)
(1317, 5)
(1134, 6)
(1159, 5)


In [324]:
# Checking the first and last years of all emission dataframes

for df in emissions_dfs:
	print(df.year.min(), df.year.max())

1997 2023
1997 2023
1997 2023
1997 2023
1997 2023


In [325]:
# Creating function to print all the missing years between 1997 and 2023 for all dfs

def print_missing_years(df):
	missing_years = set(range(1997, 2023)).difference(df.year)
	print(missing_years)


for df in emissions_dfs:
	print_missing_years(df)

set()
set()
set()
set()
set()


In [326]:
# Checking for NaN values for all emissions dataframes
for df in emissions_dfs:
	print(df.isnull().sum())

# Checking for NA values for all emissions dataframes
for df in emissions_dfs:
	print(df.isna().sum())


1
field                               0
year                                0
yearly_co2_emissions_1000_tonnes    0
org_number                          0
operator                            0
dtype: int64
1
field                        0
year                         0
yearly_ch4_emissions_tons    0
org_number                   0
operator                     0
dtype: int64
1
field                        0
year                         0
yearly_nox_emissions_tons    0
org_number                   0
operator                     0
dtype: int64
1
field                              0
year                               0
yearly_water_emissions_m3         38
org_number                         0
yearly_subsea_water_emissions    614
operator                           0
dtype: int64
1
field                              0
year                               0
yearly_oil_spill_emissions_tons    0
org_number                         0
operator                           0
dtype: int64
1
field           

In [327]:
def check_unique_org_number(df):
    """
    Checks if each combination of operator and field has a unique org_number.

    Parameters:
    df (pd.DataFrame): DataFrame containing the columns 'field', 'operator', and 'org_number'.

    Returns:
    bool: True if each combination of operator and field has a unique org_number, False otherwise.
    """
    # Check for missing values in 'operator' and 'org_number'
    if df[['operator', 'org_number']].isnull().any().any():
        print("Error: There are missing values in 'operator' or 'org_number'.")
        return False

    # Check for duplicate combinations of 'operator' and 'field' with different 'org_number'
    duplicates = df.groupby(['operator', 'field'])['org_number'].nunique().reset_index()
    if any(duplicates['org_number'] > 1):
        print("Error: There are duplicate combinations of 'operator' and 'field' with different 'org_number'.")
        return False

    return True


In [328]:

for df in emissions_co2_df, emissions_ch4_df, emissions_nox_df, emissions_water_df, emissions_oil_spill_df:
    if check_unique_org_number(df):
        print("Each combination of operator and field has a unique org_number.")
    else:
        print("There are inconsistencies in the org_number assignment.")


Each combination of operator and field has a unique org_number.
Each combination of operator and field has a unique org_number.
Each combination of operator and field has a unique org_number.
Each combination of operator and field has a unique org_number.
Each combination of operator and field has a unique org_number.


In [329]:
display(emissions_ch4_df)

1,field,year,yearly_ch4_emissions_tons,org_number,operator
2,grane,2022,36.071693,993246298,equinor energy as
3,grane,2023,66.983955,993246298,equinor energy as
4,norne,1997,383.058529,893246592,equinor energy as
5,norne,1998,826.358898,893246592,equinor energy as
6,norne,1999,1042.015537,893246592,equinor energy as
...,...,...,...,...,...
1314,aasta hansteen,2018,67.148045,912731456,equinor energy as
1315,aasta hansteen,2019,93.355221,912731456,equinor energy as
1316,aasta hansteen,2020,94.063341,912731456,equinor energy as
1317,aasta hansteen,2021,103.162276,912731456,equinor energy as


In [330]:
def check_operator_consistency(dfs, on_columns):
    """
    Checks if the same combination of field, year, and org_number corresponds to the same operator across all DataFrames.

    Parameters:
    dfs (list of pd.DataFrame): List of DataFrames to check.
    on_columns (list of str): List of column names to check for consistency.

    Returns:
    bool: True if the consistency check passes, False otherwise.
    """

    # Combine all unique combinations of on_columns and operator
    combined = pd.concat([df[on_columns + ['operator']].dropna().drop_duplicates() for df in dfs])
    
    # Group by on_columns and check for unique operator values
    consistency_check = combined.groupby(on_columns)['operator'].nunique().reset_index()
    
    # If any group has more than one unique operator, consistency check fails
    if any(consistency_check['operator'] > 1):
        print("Inconsistency found in operator assignments:")
        print(consistency_check[consistency_check['operator'] > 1])
        return False
    return True

In [331]:
dfs_to_check_operator = [emissions_co2_df, emissions_nox_df, emissions_water_df, emissions_oil_spill_df]
on_columns = ['year', 'field', 'org_number']

check_operator_consistency(dfs_to_check_operator, on_columns)

True

In [332]:
from functools import reduce

def merge_emission_data(dfs, on_columns, operator_df):
    """
    Merges a list of DataFrames on specified columns, keeping the operator from a specific DataFrame.

    Parameters:
    dfs (list of pd.DataFrame): List of DataFrames to merge.
    on_columns (list of str): List of column names to merge on.
    operator_df (pd.DataFrame): DataFrame to take the operator column from.

    Returns:
    pd.DataFrame: The merged DataFrame with the operator column from operator_df.
    """
    # Remove the operator column from all DataFrames except the operator_df
    dfs_no_operator = [df.drop(columns=['operator'], errors='ignore') for df in dfs if not df.equals(operator_df)]

    # Merge the DataFrames sequentially
    merged_df = reduce(lambda left, right: pd.merge(left, right, on=on_columns, how='outer'), dfs_no_operator)
    
    # Add the operator column from the specified DataFrame
    merged_df = pd.merge(merged_df, operator_df[on_columns + ['operator']], on=on_columns, how='left')
    
    return merged_df

In [333]:
# Check operator consistency across all DataFrames
if check_operator_consistency(emissions_dfs, on_columns):
    # Merged DataFrame, keeping the operator from emissions_co2_df
    emissions_df = merge_emission_data(emissions_dfs, on_columns, emissions_co2_df)
    print(emissions_df)
else:
    print("Consistency check failed. Operators are not consistent across DataFrames.")

1         field  year  yearly_ch4_emissions_tons  org_number  \
0        grane   2022                  36.071693   993246298   
1        grane   2023                  66.983955   993246298   
2        norne   1997                 383.058529   893246592   
3        norne   1998                 826.358898   893246592   
4        norne   1999                1042.015537   893246592   
...         ...   ...                        ...         ...   
1337       urd   2009                        NaN   893246592   
1338    tambar   2004                        NaN   993302392   
1339   snøhvit   2010                        NaN   912732207   
1340  brynhild   2021                        NaN   912729761   
1341    svalin   2013                        NaN   993246298   

1     yearly_nox_emissions_tons  yearly_water_emissions_m3  \
0                    404.470720                 1896859.00   
1                    306.382910                 2161262.82   
2                   2944.992072              

In [334]:
display(emissions_df)

1,field,year,yearly_ch4_emissions_tons,org_number,yearly_nox_emissions_tons,yearly_water_emissions_m3,yearly_subsea_water_emissions,yearly_oil_spill_emissions_tons,operator
0,grane,2022,36.071693,993246298,404.470720,1896859.00,2235043.7,16.327993,equinor energy as
1,grane,2023,66.983955,993246298,306.382910,2161262.82,2770666.43,24.543974,equinor energy as
2,norne,1997,383.058529,893246592,2944.992072,343.00,,1.967516,equinor energy as
3,norne,1998,826.358898,893246592,1792.114235,68208.00,,3.134315,equinor energy as
4,norne,1999,1042.015537,893246592,1412.344364,280113.00,,11.709334,equinor energy as
...,...,...,...,...,...,...,...,...,...
1337,urd,2009,,893246592,,,,0.046750,
1338,tambar,2004,,993302392,,,,0.008500,
1339,snøhvit,2010,,912732207,,,,0.000170,
1340,brynhild,2021,,912729761,,,,0.025500,


In [335]:
# Counting all NaN values in the merged emissions dataframe

emissions_df.isna().sum()

1
field                                0
year                                 0
yearly_ch4_emissions_tons           25
org_number                           0
yearly_nox_emissions_tons           25
yearly_water_emissions_m3          246
yearly_subsea_water_emissions      822
yearly_oil_spill_emissions_tons    183
operator                            25
dtype: int64

In [336]:
# Deleting the rows that has NaN values for the columns operator, yearly_nox_emissions_tons, and yearly_ch4_emissions_tons

emissions_df = emissions_df.dropna(subset=['operator', 'yearly_nox_emissions_tons', 'yearly_ch4_emissions_tons'])

In [337]:
emissions_df.isna().sum()

1
field                                0
year                                 0
yearly_ch4_emissions_tons            0
org_number                           0
yearly_nox_emissions_tons            0
yearly_water_emissions_m3          238
yearly_subsea_water_emissions      798
yearly_oil_spill_emissions_tons    181
operator                             0
dtype: int64

In [345]:
emissions_df

1,field,year,yearly_ch4_emissions_tons,org_number,yearly_nox_emissions_tons,yearly_water_emissions_m3,yearly_subsea_water_emissions,yearly_oil_spill_emissions_tons,operator
0,grane,2022,36.071693,993246298,404.470720,1.896859e+06,2235043.7,16.327993,equinor energy as
1,grane,2023,66.983955,993246298,306.382910,2.161263e+06,2770666.43,24.543974,equinor energy as
2,norne,1997,383.058529,893246592,2944.992072,3.430000e+02,,1.967516,equinor energy as
3,norne,1998,826.358898,893246592,1792.114235,6.820800e+04,,3.134315,equinor energy as
4,norne,1999,1042.015537,893246592,1412.344364,2.801130e+05,,11.709334,equinor energy as
...,...,...,...,...,...,...,...,...,...
1312,aasta hansteen,2018,67.148045,912731456,988.441010,8.498236e+03,,0.183095,equinor energy as
1313,aasta hansteen,2019,93.355221,912731456,202.908995,1.959150e+04,,0.771865,equinor energy as
1314,aasta hansteen,2020,94.063341,912731456,147.469687,3.099277e+04,,1.465783,equinor energy as
1315,aasta hansteen,2021,103.162276,912731456,138.530349,2.496023e+04,,0.397254,equinor energy as
