In [652]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os

pd.set_option("display.max_columns", None)

# Oil and Gas Production and Emissions Data on the Norwegian Continental Shelf

## Part 2: Data Building

---

This notebook is part of a series of notebooks. The series consist of other following parts:

#### Part 1: [Data Collection](https://github.com/percw/Norwegian_oil_gas_decarbonization/blob/main/notebooks/01_data_building/01_production_and_emission_data_building.ipynb)

#### Part 3: [Data Processing](https://github.com/percw/Norwegian_oil_gas_decarbonization/blob/main/notebooks/03_data_processing/03_production_and_emission_data_processing.ipynb)

#### Part 4: [Data Modeling](https://github.com/percw/Norwegian_oil_gas_decarbonization/blob/main/notebooks/04_data_modeling/04_data_modelling.ipynb)

---


This notebook serves to clean all relevant production and emission data for the oil and gas industry on the Norwegian Continental Shelf.
The data was downloaded from it's original source early may 2024. For consistency reason, i'm working with a downloaded version saved on my GitHub under the folder `data/output/emissions_and_production`. If you want to download the data yourself, you can find it and the downloading process in the `01_data_building` folder and notebook.

The aim is to clean and merge the data in one single dataframe, which will be saved in the `data/output/emissions_and_production/cleaned/` folder, with the name `fields_prod_emissions_1997_2023.csv`.


## Table of Contents:

1. [Fetching](Fetching)
   1. [Production and field](#Production-and-field)
   2. [Emissions](#Emissions)
2. [Cleaning data](#Cleaning)

   1. [Emissions](#Emissions)
   2. [Production and field data](#Production-and-field-data)
   3. [Field status](#Field-status)
   4. [Processing fields](#Processing-fields)
   5. [Wellbores](#Wellbores)
   6. [Investments](#Investments)
   7. [Licensees](#Licensees)

3. [Data mergin](#merging)
4. [Output](#output)


## Fetching


### Production and field

The data is stored in the `/emissions_and_production/` folder. The data is stored in the following files:

- `production_monthly.csv`
- `operators.csv`
- `movable_facilities.csv`
- `licensees.csv`
- `investments.csv`
- `future_investments.csv`
- `fixed_facilities.csv`
- `wellbores.csv`
- `field_description.csv`
- `field_status.csv`
- `field_overview.csv`
- `field_reserves.csv`


In [653]:
def fetch_dataframe(url, sep=",", filetype="csv"):
    if filetype == "csv":
        df = pd.read_csv(url, sep=sep)
    elif filetype == "excel":
        df = pd.read_excel(url)
    return df


base_output_url = "https://github.com/percw/Norwegian_oil_gas_decarbonization/raw/main/data/output/emissions_and_production/"
datafile_names = [
    "production_monthly",
    "operators",
    "movable_facilities",
    "licensees",
    "investments",
    "future_investments",
    "fixed_facilities",
    "wellbores",
    "field_description",
    "field_status",
    "field_overview",
    "field_reserves",
]

# Creating a dictionary to store the dataframes
dataframes = {}

for name in datafile_names:
    url = base_output_url + name + ".csv"
    dataframes[name] = fetch_dataframe(url)


# Setting the name of the df's	as the keys in the dictionary with _df appended
for name, df in dataframes.items():
    globals()[name + "_df"] = df

### Emissions

Emissions data is stored in the `/emissions_and_production/` folder. The data is stored in the following files:

- `emission_co2.csv`
- `emissions_methane.csv`
- `emissions_nox.csv`
- `emissions_oil.csv`
- `emissions_water.csv`


In [654]:
emission_file_names = [
    "emissions_co2",
    "emissions_methane",
    "emissions_nox",
    "emissions_oil",
    "emissions_water",
]

# Fetching the emissions dataframes from GitHub
emission_dataframes = {}

for name in emission_file_names:
    url = base_output_url + name + ".csv"
    emission_dataframes[name] = fetch_dataframe(url)

    # Setting the name of the df's	as the keys in the dictionary with _df appended
for name, df in emission_dataframes.items():
    globals()[name + "_df"] = df

## Cleaning


### Emissions


In [655]:
def clean_emissions_df(df, emissions_type, unit, water_or_air="luft"):
    if water_or_air == "vann":
        df = df.rename(
            columns={
                "År": "year",
                "Anleggsnavn": "field",
                f"Årlig utslipp til {water_or_air}": f"yearly_{emissions_type}_emissions_{unit}",
                "Org.nr.": "org_number",
                "Årlig utslipp til undergrunn": f"yearly_subsea_{emissions_type}_emissions",
            }
        )
        df = df[
            [
                "field",
                "year",
                f"yearly_{emissions_type}_emissions_{unit}",
                "org_number",
                f"yearly_subsea_{emissions_type}_emissions",
            ]
        ]

    else:
        df = df.rename(
            columns={
                "År": "year",
                "Anleggsnavn": "field",
                f"Årlig utslipp til {water_or_air}": f"yearly_{emissions_type}_emissions_{unit}",
                "Org.nr.": "org_number",
            }
        )
        df = df[
            ["field", "year", f"yearly_{emissions_type}_emissions_{unit}", "org_number"]
        ]

    df[f"yearly_{emissions_type}_emissions_{unit}"] = pd.to_numeric(
        df[f"yearly_{emissions_type}_emissions_{unit}"], errors="coerce"
    )
    df["year"] = pd.to_numeric(df["year"], errors="coerce")
    df["operator"] = df["field"].str.extract(r"\((.*?)\)")
    df["field"] = df["field"].str.replace(r"\(.*\)", "")
    return df

In [656]:
# Checking  all unique oerators in the emissions dataframes

for name, df in emission_dataframes.items():
    print(name, df["operator"].nunique())
    print("\n")

emissions_co2 13


emissions_methane 13


emissions_nox 13


emissions_oil 12


emissions_water 12




In [657]:
# Printing the unique operators in the emissions dataframes

for name, df in emission_dataframes.items():
    print(name, df["operator"].unique())
    print("\n")

emissions_co2 ['equinor energy as' 'aker bp asa' 'sval energi as' 'vår energi asa'
 'totalenergies ep norge as' 'repsol norge as' 'wintershall dea norge as'
 'dno norge as' 'gassco as' 'okea asa' 'a/s norske shell'
 'conocophillips skandinavia as' 'vår energi norge as']


emissions_methane ['equinor energy as' 'aker bp asa' 'sval energi as' 'vår energi asa'
 'totalenergies ep norge as' 'repsol norge as' 'wintershall dea norge as'
 'dno norge as' 'gassco as' 'okea asa' 'a/s norske shell'
 'conocophillips skandinavia as' 'vår energi norge as']


emissions_nox ['equinor energy as' 'aker bp asa' 'sval energi as' 'vår energi asa'
 'totalenergies ep norge as' 'repsol norge as' 'wintershall dea norge as'
 'dno norge as' 'gassco as' 'okea asa' 'a/s norske shell'
 'conocophillips skandinavia as' 'vår energi norge as']


emissions_oil ['equinor energy as' 'aker bp asa' 'sval energi as' 'vår energi asa'
 'repsol norge as' 'wintershall dea norge as' 'dno norge as' 'okea asa'
 'a/s norske shell' 'c

In [658]:
# Checking for missing values in all emissions dfs

for name, df in emission_dataframes.items():
    print(name, df.isnull().sum())
    print("\n")

emissions_co2 field                               0
year                                0
yearly_co2_emissions_1000_tonnes    0
org_number                          0
operator                            0
dtype: int64


emissions_methane field                        0
year                         0
yearly_ch4_emissions_tons    0
org_number                   0
operator                     0
dtype: int64


emissions_nox field                        0
year                         0
yearly_nox_emissions_tons    0
org_number                   0
operator                     0
dtype: int64


emissions_oil field                              0
year                               0
yearly_oil_spill_emissions_tons    0
org_number                         0
operator                           0
dtype: int64


emissions_water field                              0
year                               0
yearly_water_emissions_m3         38
org_number                         0
yearly_subsea_water_emissions  

In [659]:
# Dropping yearly_subsea_water_emissions column from emissions_water_df

emissions_water_df = emissions_water_df.drop(columns="yearly_subsea_water_emissions")

In [660]:
# Checking for nunique fields in the emissions dataframes

for name, df in emission_dataframes.items():
    print(name, df["field"].nunique())
    print("\n")

emissions_co2 89


emissions_methane 89


emissions_nox 89


emissions_oil 86


emissions_water 84




In [661]:
emissions_co2_df.describe()

Unnamed: 0,year,yearly_co2_emissions_1000_tonnes,org_number
count,1317.0,1317.0,1317.0
mean,2011.767654,213.257702,954290100.0
std,7.316805,280.550893,49788270.0
min,1997.0,0.0,812723500.0
25%,2006.0,13.201286,912729800.0
50%,2012.0,123.824173,993246300.0
75%,2018.0,270.683473,993246800.0
max,2023.0,1860.344868,997004800.0


In [662]:
# Merging all the emissions dataframes into one

emissions_df = emissions_co2_df.merge(
    emissions_methane_df, on=["field", "year", "org_number", "operator"], how="left"
)
emissions_df = emissions_df.merge(
    emissions_nox_df, on=["field", "year", "org_number", "operator"], how="left"
)
emissions_df = emissions_df.merge(
    emissions_oil_df, on=["field", "year", "org_number", "operator"], how="left"
)
emissions_df = emissions_df.merge(
    emissions_water_df, on=["field", "year", "org_number", "operator"], how="left"
)

emissions_df

Unnamed: 0,field,year,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3
0,grane,2022,180.655154,993246298,equinor energy as,36.071693,404.470720,16.327993,1.896859e+06
1,grane,2023,188.267980,993246298,equinor energy as,66.983955,306.382910,24.543974,2.161263e+06
2,norne,1997,761.336994,893246592,equinor energy as,383.058529,2944.992072,1.967516,3.430000e+02
3,norne,1998,418.303952,893246592,equinor energy as,826.358898,1792.114235,3.134315,6.820800e+04
4,norne,1999,347.059880,893246592,equinor energy as,1042.015537,1412.344364,11.709334,2.801130e+05
...,...,...,...,...,...,...,...,...,...
1312,aasta hansteen,2018,72.503572,912731456,equinor energy as,67.148045,988.441010,0.183095,8.498236e+03
1313,aasta hansteen,2019,177.647453,912731456,equinor energy as,93.355221,202.908995,0.771865,1.959150e+04
1314,aasta hansteen,2020,192.207951,912731456,equinor energy as,94.063341,147.469687,1.465783,3.099277e+04
1315,aasta hansteen,2021,191.436769,912731456,equinor energy as,103.162276,138.530349,0.397254,2.496023e+04


In [663]:
# removing tailing whitespace from the field column in the emissions_df

emissions_df["field"] = emissions_df["field"].str.strip()

In [664]:
emissions_df[emissions_df["field"] == "vale"]

Unnamed: 0,field,year,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3
123,vale,2001,2.9104,912732673,sval energi as,0.0,63.665,,
124,vale,2005,6.53072,912732673,sval energi as,0.0,142.86,0.031081,327.7
125,vale,2023,0.829612,912732673,sval energi as,0.0,0.014634,0.00139,366.0


In [665]:
len(emissions_df.field.unique())

89

### Production and field data


In [666]:
# Checking datatypes for all dfs

for name, df in dataframes.items():
    print(name, df.dtypes)
    print("\n")

# Checking for missing values in all dfs
for name, df in dataframes.items():
    print(name, df.isnull().sum())
    print("\n")

production_monthly prfInformationCarrier                 object
prfYear                                int64
prfMonth                               int64
prfPrdOilNetMillSm3                  float64
prfPrdGasNetBillSm3                  float64
prfPrdNGLNetMillSm3                  float64
prfPrdCondensateNetMillSm3           float64
prfPrdOeNetMillSm3                   float64
prfPrdProducedWaterInFieldMillSm3    float64
prfNpdidInformationCarrier             int64
dtype: object


operators fldName                   object
cmpLongName               object
fldOperatorFrom           object
fldOperatorTo             object
fldNpdidField              int64
cmpNpdidCompany            int64
fldOperatorDateUpdated    object
datesyncNPD               object
dtype: object


movable_facilities fclName                        object
fclCurrentRespCompanyName      object
fclKind                        object
fclFunctions                   object
fclStatus                      object
fclNationName   

In [667]:
# Renaming columns from Norwegian to English


def clean_production_df(df, names_dict):
    df = df.rename(columns=names_dict)
    return df


production_name_change = {
    "prfInformationCarrier": "field",
    "prfYear": "year",
    "prfMonth": "month",
    "prfPrdNGLNetMillSm3": "net_ngl_prod_monthly_sm3",
    "prfPrdOilNetMillSm3": "net_oil_prod_monthly_sm3",
    "prfPrdGasNetBillSm3": "net_gas_prod_monthly_sm3",
    "prfPrdCondensateNetMillSm3": "net_condensate_prod_monthly_sm3",
    "prfPrdOeNetMillSm3": "net_oil_eq_prod_monthly_sm3",
    "prfPrdProducedWaterInFieldMillSm3": "produced_water_in_field",
    "prfNpdidInformationCarrier": "field_id",
}

In [668]:
production_monthly_df = clean_production_df(
    production_monthly_df, production_name_change
)
display(production_monthly_df)

Unnamed: 0,field,year,month,net_oil_prod_monthly_sm3,net_gas_prod_monthly_sm3,net_ngl_prod_monthly_sm3,net_condensate_prod_monthly_sm3,net_oil_eq_prod_monthly_sm3,produced_water_in_field,field_id
0,16/1-12 Troldhaugen,2021,9,0.0,0.00173,0.0,0.00000,0.00173,0.00719,17196400
1,16/1-12 Troldhaugen,2021,10,0.0,0.00250,0.0,0.00000,0.00250,0.00912,17196400
2,16/1-12 Troldhaugen,2021,11,0.0,0.00199,0.0,0.00000,0.00199,0.01186,17196400
3,16/1-12 Troldhaugen,2021,12,0.0,0.00104,0.0,0.00000,0.00104,0.00418,17196400
4,16/1-12 Troldhaugen,2022,1,0.0,0.00062,0.0,0.00000,0.00062,0.00926,17196400
...,...,...,...,...,...,...,...,...,...,...
25518,AASTA HANSTEEN,2023,11,0.0,0.69432,0.0,0.01636,0.71068,0.00210,23395946
25519,AASTA HANSTEEN,2023,12,0.0,0.77522,0.0,0.01681,0.79203,0.00203,23395946
25520,AASTA HANSTEEN,2024,1,0.0,0.72317,0.0,0.01580,0.73897,0.00194,23395946
25521,AASTA HANSTEEN,2024,2,0.0,0.63856,0.0,0.01421,0.65278,0.00198,23395946


In [669]:
# Filtering out all data reported before 1990

production_monthly_full_df = production_monthly_df.copy()
production_monthly_df = production_monthly_df[production_monthly_df["year"] >= 1990]
production_monthly_df

Unnamed: 0,field,year,month,net_oil_prod_monthly_sm3,net_gas_prod_monthly_sm3,net_ngl_prod_monthly_sm3,net_condensate_prod_monthly_sm3,net_oil_eq_prod_monthly_sm3,produced_water_in_field,field_id
0,16/1-12 Troldhaugen,2021,9,0.0,0.00173,0.0,0.00000,0.00173,0.00719,17196400
1,16/1-12 Troldhaugen,2021,10,0.0,0.00250,0.0,0.00000,0.00250,0.00912,17196400
2,16/1-12 Troldhaugen,2021,11,0.0,0.00199,0.0,0.00000,0.00199,0.01186,17196400
3,16/1-12 Troldhaugen,2021,12,0.0,0.00104,0.0,0.00000,0.00104,0.00418,17196400
4,16/1-12 Troldhaugen,2022,1,0.0,0.00062,0.0,0.00000,0.00062,0.00926,17196400
...,...,...,...,...,...,...,...,...,...,...
25518,AASTA HANSTEEN,2023,11,0.0,0.69432,0.0,0.01636,0.71068,0.00210,23395946
25519,AASTA HANSTEEN,2023,12,0.0,0.77522,0.0,0.01681,0.79203,0.00203,23395946
25520,AASTA HANSTEEN,2024,1,0.0,0.72317,0.0,0.01580,0.73897,0.00194,23395946
25521,AASTA HANSTEEN,2024,2,0.0,0.63856,0.0,0.01421,0.65278,0.00198,23395946


In [670]:
# Make all field names lower case
production_monthly_df["field"] = production_monthly_df["field"].str.lower()

In [671]:
# Checking all field names containing æ,ø,å


def check_for_special_characters(df, column):
    special_char = ["æ", "ø", "å", "Æ", "Ø", "Å"]
    for char in special_char:
        print(df[df[column].str.contains(char, na=False)][column].unique())


check_for_special_characters(production_monthly_df, "field")

['ærfugl nord']
['bøyla' 'frøy' 'gjøa' 'gullfaks sør' 'kvitebjørn' 'nordøst frigg'
 'oseberg sør' 'oseberg øst' 'ringhorne øst' 'sleipner øst' 'snøhvit'
 'statfjord øst' 'visund sør' 'øst frigg']
['åsgard']
[]
[]
[]


In [672]:
# Checking if field_id and field match across the dataframe

print(
    "Number of fields in the field_id column:",
    production_monthly_df["field_id"].nunique(),
)
print("Number of fields in the field column:", production_monthly_df["field"].nunique())

Number of fields in the field_id column: 126
Number of fields in the field column: 126


In [673]:
# Checking that field_id and field match across the dataframe
field_id_field_match = (
    production_monthly_df.groupby(["field_id", "field"])
    .size()
    .reset_index(name="count")
)
field_id_field_match = field_id_field_match[field_id_field_match["count"] > 1]
print(field_id_field_match)

     field_id             field  count
0       43437        albuskjell    103
1       43444  tommeliten gamma    103
2       43451              varg    212
3       43457     sleipner vest    332
4       43464            gungne    336
..        ...               ...    ...
121  34833011           solveig     31
122  34833026              duva     34
123  38542241       ærfugl nord     30
124  38702206       breidablikk      8
125  40867462      tommeliten a      6

[125 rows x 3 columns]


In [674]:
# Creating a dict with field and field_id

field_field_id_dict = (
    production_monthly_df[["field", "field_id"]]
    .drop_duplicates()
    .set_index("field")
    .to_dict()["field_id"]
)

In [675]:
# Calculating yearly production for net_oil_prod_monthly_sm3, net_gas_prod_monthly_sm3,
# net_ngl_prod_monthly_sm3, net_condensate_prod_monthly_sm3, net_oil_eq_prod_monthly_sm3,
# produced_water_in_field, and adding it to a new df called production_yearly_df
production_yearly_df = (
    production_monthly_df.groupby(["field", "year"])
    .agg(
        {
            "net_oil_prod_monthly_sm3": "sum",
            "net_gas_prod_monthly_sm3": "sum",
            "net_ngl_prod_monthly_sm3": "sum",
            "net_condensate_prod_monthly_sm3": "sum",
            "net_oil_eq_prod_monthly_sm3": "sum",
            "produced_water_in_field": "sum",
        }
    )
    .reset_index()
)

# Renaming the columns in the production_yearly_df to reflect that they are yearly values
production_yearly_df = production_yearly_df.rename(
    columns={
        "net_oil_prod_monthly_sm3": "net_oil_prod_yearly_mill_sm3",
        "net_gas_prod_monthly_sm3": "net_gas_prod_yearly_bill_sm3",
        "net_ngl_prod_monthly_sm3": "net_ngl_prod_yearly_mill_sm3",
        "net_condensate_prod_monthly_sm3": "net_condensate_prod_yearly_mill_sm3",
        "net_oil_eq_prod_monthly_sm3": "net_oil_eq_prod_yearly_mill_sm3",
        "produced_water_in_field": "produced_water_yearly_mill_sm3",
    }
)

# Adding the field_id to the production_yearly_df from the field_field_id_dict
production_yearly_df["field_id"] = production_yearly_df["field"].map(
    field_field_id_dict
)

display(production_yearly_df)

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id
0,16/1-12 troldhaugen,2021,0.00000,0.00726,0.00000,0.00000,0.00726,0.03235,17196400
1,16/1-12 troldhaugen,2022,0.00788,0.01382,0.00000,0.00000,0.02170,0.16206,17196400
2,16/1-12 troldhaugen,2023,0.04920,0.01080,0.00000,0.00000,0.06000,0.10626,17196400
3,16/1-12 troldhaugen,2024,0.01025,0.00161,0.00000,0.00000,0.01186,0.02302,17196400
4,33/9-6 delta,2009,0.01022,0.00000,0.00026,0.00000,0.01048,0.01061,44576
...,...,...,...,...,...,...,...,...,...
2109,øst frigg,1994,0.00000,0.98259,0.00000,0.00696,0.98951,0.00000,43576
2110,øst frigg,1995,0.00000,0.31923,0.00000,0.00065,0.31988,0.00000,43576
2111,øst frigg,1996,0.00000,0.61029,0.00000,0.00035,0.61066,0.00000,43576
2112,øst frigg,1997,0.00000,0.09648,0.00000,0.00009,0.09658,0.00000,43576


In [676]:
# Function to compute monthly volatility and add it to the yearly DataFrame
def add_volatility_to_yearly(monthly_df, yearly_df, group_columns, value_columns):
    """
    Computes the monthly volatility for specified columns and adds them to the yearly DataFrame.

    Parameters:
    monthly_df (pd.DataFrame): DataFrame containing monthly production data.
    yearly_df (pd.DataFrame): DataFrame containing yearly aggregated data.
    group_columns (list of str): Columns to group by for calculating volatility.
    value_columns (list of str): Columns for which to compute volatility.

    Returns:
    pd.DataFrame: The yearly DataFrame with added volatility columns.
    """
    for col in value_columns:
        # Compute the standard deviation (volatility) for each group
        volatility = (
            monthly_df.groupby(group_columns)[col]
            .std()
            .reset_index(name=f"{col}_volatility")
        )
        # Replace NaN with 0
        volatility[f"{col}_volatility"].fillna(0, inplace=True)
        # Merge the volatility into the yearly DataFrame
        yearly_df = yearly_df.merge(volatility, on=group_columns, how="left")
    return yearly_df


# List of production columns
production_columns = [
    "net_oil_prod_monthly_sm3",
    "net_gas_prod_monthly_sm3",
    "net_ngl_prod_monthly_sm3",
    "net_condensate_prod_monthly_sm3",
    "net_oil_eq_prod_monthly_sm3",
    "produced_water_in_field",
]

# Compute and add volatility columns to the yearly DataFrame
production_yearly_df = add_volatility_to_yearly(
    production_monthly_df, production_yearly_df, ["field", "year"], production_columns
)

display(production_yearly_df.head())

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility
0,16/1-12 troldhaugen,2021,0.0,0.00726,0.0,0.0,0.00726,0.03235,17196400,0.0,0.000608,0.0,0.0,0.000608,0.003234
1,16/1-12 troldhaugen,2022,0.00788,0.01382,0.0,0.0,0.0217,0.16206,17196400,0.002275,0.000685,0.0,0.0,0.002493,0.006957
2,16/1-12 troldhaugen,2023,0.0492,0.0108,0.0,0.0,0.06,0.10626,17196400,0.002678,0.000706,0.0,0.0,0.003262,0.006208
3,16/1-12 troldhaugen,2024,0.01025,0.00161,0.0,0.0,0.01186,0.02302,17196400,0.000184,0.000495,0.0,0.0,0.000317,0.002425
4,33/9-6 delta,2009,0.01022,0.0,0.00026,0.0,0.01048,0.01061,44576,0.001568,0.0,8.5e-05,0.0,0.001652,0.00207


In [677]:
production_yearly_df.describe()

Unnamed: 0,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility
count,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0
mean,2011.062914,2.035864,1.304744,0.211464,0.058986,3.611051,1.879764,4155921.0,0.028209,0.027171,0.00428,0.001353,0.056797,0.02756
std,9.338526,4.174485,4.003833,0.487917,0.312616,6.642575,4.463345,8309069.0,0.050086,0.086715,0.008879,0.00759,0.105426,0.055898
min,1990.0,-0.00336,-0.03635,-0.00358,-0.00025,-0.04016,0.0,43437.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2004.0,0.070805,0.00024,0.0,0.0,0.28198,0.000215,43618.0,0.002,2.8e-05,0.0,0.0,0.006645,1.9e-05
50%,2013.0,0.50503,0.1291,0.01939,0.0,1.1464,0.11289,43771.0,0.010088,0.003493,0.000571,0.0,0.02237,0.003705
75%,2019.0,2.14108,0.775835,0.178077,0.0,3.987533,1.57187,3420717.0,0.032953,0.016034,0.0041,0.0,0.061689,0.028934
max,2024.0,41.28919,39.78421,5.07035,4.32901,48.50212,38.9668,40867460.0,0.832296,1.037163,0.089582,0.119442,1.10893,0.442994


In [678]:
yearly_production_columns = [
    "net_oil_prod_yearly_mill_sm3",
    "net_gas_prod_yearly_bill_sm3",
    "net_ngl_prod_yearly_mill_sm3",
    "net_condensate_prod_yearly_mill_sm3",
    "net_oil_eq_prod_yearly_mill_sm3",
    "produced_water_yearly_mill_sm3",
]

# Print all with negative values
for col in yearly_production_columns:
    print(
        f"Negative values in {col}: {production_yearly_df[production_yearly_df[col] < 0].shape[0]}"
    )
    # The negative number:
    print(production_yearly_df[production_yearly_df[col] < 0][col])

Negative values in net_oil_prod_yearly_mill_sm3: 6
159    -0.00067
1248   -0.00330
1714   -0.00066
1720   -0.00005
1936   -0.00051
2101   -0.00336
Name: net_oil_prod_yearly_mill_sm3, dtype: float64
Negative values in net_gas_prod_yearly_bill_sm3: 1
1411   -0.03635
Name: net_gas_prod_yearly_bill_sm3, dtype: float64
Negative values in net_ngl_prod_yearly_mill_sm3: 3
5      -0.00023
995    -0.00105
1411   -0.00358
Name: net_ngl_prod_yearly_mill_sm3, dtype: float64
Negative values in net_condensate_prod_yearly_mill_sm3: 1
1411   -0.00025
Name: net_condensate_prod_yearly_mill_sm3, dtype: float64
Negative values in net_oil_eq_prod_yearly_mill_sm3: 3
159    -0.00067
1411   -0.04016
1936   -0.00051
Name: net_oil_eq_prod_yearly_mill_sm3, dtype: float64
Negative values in produced_water_yearly_mill_sm3: 0
Series([], Name: produced_water_yearly_mill_sm3, dtype: float64)


In [679]:
# Setting all the negative values to 0
for col in yearly_production_columns:
    production_yearly_df[col] = production_yearly_df[col].clip(lower=0)

In [680]:
production_yearly_df.describe()

Unnamed: 0,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility
count,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0
mean,2011.062914,2.035868,1.304761,0.211466,0.058986,3.61107,1.879764,4155921.0,0.028209,0.027171,0.00428,0.001353,0.056797,0.02756
std,9.338526,4.174483,4.003827,0.487916,0.312616,6.642564,4.463345,8309069.0,0.050086,0.086715,0.008879,0.00759,0.105426,0.055898
min,1990.0,0.0,0.0,0.0,0.0,0.0,0.0,43437.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2004.0,0.070805,0.00024,0.0,0.0,0.28198,0.000215,43618.0,0.002,2.8e-05,0.0,0.0,0.006645,1.9e-05
50%,2013.0,0.50503,0.1291,0.01939,0.0,1.1464,0.11289,43771.0,0.010088,0.003493,0.000571,0.0,0.02237,0.003705
75%,2019.0,2.14108,0.775835,0.178077,0.0,3.987533,1.57187,3420717.0,0.032953,0.016034,0.0041,0.0,0.061689,0.028934
max,2024.0,41.28919,39.78421,5.07035,4.32901,48.50212,38.9668,40867460.0,0.832296,1.037163,0.089582,0.119442,1.10893,0.442994


In [681]:
# Printing out all fields with numbers in the name


def check_for_numbers(df, column):
    print(df[df[column].str.contains("\d", na=False)][column].unique())


discoveries = check_for_numbers(production_yearly_df, "field")

# Removing the fields with numbers in the name / these are discoveries not fields
production_yearly_df = production_yearly_df[
    ~production_yearly_df["field"].str.contains("\d", na=False)
]

['16/1-12 troldhaugen' '33/9-6 delta' '7220/11-1 (alta)']


### Field status

Adding the field status to the production data.


In [682]:
field_status_df.head()

Unnamed: 0,fldName,fldStatusFromDate,fldStatusToDate,fldStatus,fldNpdidField,fldStatusDateUpdated,datesyncNPD
0,ALBUSKJELL,25.04.1975,25.05.1979,Approved for production,43437,28.02.2023,29.05.2024
1,ALBUSKJELL,26.05.1979,25.08.1998,Producing,43437,28.02.2023,29.05.2024
2,ALBUSKJELL,26.08.1998,,Shut down,43437,28.02.2023,29.05.2024
3,ALVE,16.03.2007,18.03.2009,Approved for production,4444332,28.02.2023,29.05.2024
4,ALVE,19.03.2009,,Producing,4444332,28.02.2023,29.05.2024


In [683]:
# Convert date columns to datetime
field_status_df["fldStatusFromDate"] = pd.to_datetime(
    field_status_df["fldStatusFromDate"], format="%d.%m.%Y"
)
field_status_df["fldStatusToDate"] = pd.to_datetime(
    field_status_df["fldStatusToDate"], format="%d.%m.%Y"
)
production_yearly_df["year"] = pd.to_datetime(production_yearly_df["year"], format="%Y")

# Ensure proper field name matching
field_status_df["fldName"] = field_status_df["fldName"].str.lower()


# Function to get status based on date range
def get_status(row, status_df):
    field = row["field"]
    year = row["year"]
    status_rows = status_df[
        (status_df["fldName"] == field)
        & (status_df["fldStatusFromDate"] <= year)
        & (
            (status_df["fldStatusToDate"].isna())
            | (status_df["fldStatusToDate"] >= year)
        )
    ]
    if not status_rows.empty:
        return status_rows.iloc[0]["fldStatus"]
    return None


# Apply function to get status for each row
production_yearly_df["status"] = production_yearly_df.apply(
    get_status, axis=1, status_df=field_status_df
)

# Convert the year to only show the year
production_yearly_df["year"] = production_yearly_df["year"].dt.year

# Display the updated DataFrame
production_yearly_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status
11,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production
12,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing
13,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing
14,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing
15,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2109,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing
2110,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing
2111,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing
2112,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing


In [684]:
production_yearly_df[production_yearly_df["status"].isna()]

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status
73,balder,1991,0.12856,0.0,0.0,0.0,0.12856,0.0,43562,0.005541,0.0,0.0,0.0,0.005541,0.0,
160,byrding,2017,0.13127,0.0,0.0,0.0,0.13127,0.0021,28975067,0.012493,0.0,0.0,0.0,0.012493,0.00028,
440,gimle,2005,0.30007,0.0617,0.02144,0.0,0.38322,0.00906,4005142,0.021749,0.004569,0.001599,0.0,0.027823,0.001472,
441,gimle,2006,0.2557,0.01184,0.00422,0.0,0.27173,0.00115,4005142,0.017296,0.001677,0.000604,0.0,0.017279,0.000123,
505,grane,1996,0.0792,0.0,0.0,0.0,0.0792,0.00256,1035937,0.028425,0.0,0.0,0.0,0.028425,0.000739,
956,mime,1990,0.02584,0.00577,0.00193,0.0,0.03353,0.0,43792,0.005529,0.00125,0.000402,0.0,0.007177,0.0,
957,mime,1991,0.16236,0.03671,0.01134,0.0,0.2104,0.0,43792,0.002936,0.000695,0.000259,0.0,0.003883,0.0,
958,mime,1992,0.12234,0.0276,0.00756,0.0,0.1575,0.0,43792,0.001178,0.000239,8.8e-05,0.0,0.001486,0.0,
1244,sindre,2017,0.02029,0.0,0.0,0.0,0.02029,0.00048,29401178,0.002936,0.0,0.0,0.0,0.002936,8.9e-05,


In [685]:
# Remove balder 1991
production_yearly_df = production_yearly_df[
    ~(
        (production_yearly_df["field"] == "balder")
        & (production_yearly_df["year"] == 1991)
    )
]

# Set 'byrding' field 2017 to 'Producing'
production_yearly_df.loc[
    (production_yearly_df["field"] == "byrding")
    & (production_yearly_df["year"] == 2017),
    "status",
] = "Producing"

# Set gimle 2005 to Approved for production
production_yearly_df.loc[
    (production_yearly_df["field"] == "gimle") & (production_yearly_df["year"] == 2005),
    "status",
] = "Approved for production"

# Set gimle 2006 to Producing
production_yearly_df.loc[
    (production_yearly_df["field"] == "gimle") & (production_yearly_df["year"] == 2006),
    "status",
] = "Producing"

# Remove grane 1996
production_yearly_df = production_yearly_df[
    ~(
        (production_yearly_df["field"] == "grane")
        & (production_yearly_df["year"] == 1996)
    )
]

# Remove mime 1990 and 1991
production_yearly_df = production_yearly_df[
    ~(
        (production_yearly_df["field"] == "mime")
        & (production_yearly_df["year"].isin([1990, 1991]))
    )
]

# Set mime 1992 to Approved for production
production_yearly_df.loc[
    (production_yearly_df["field"] == "mime") & (production_yearly_df["year"] == 1992),
    "status",
] = "Approved for production"

# Set sindre 2017 to producing
production_yearly_df.loc[
    (production_yearly_df["field"] == "sindre")
    & (production_yearly_df["year"] == 2017),
    "status",
] = "Producing"

In [686]:
production_yearly_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status
11,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production
12,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing
13,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing
14,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing
15,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2109,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing
2110,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing
2111,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing
2112,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing


In [687]:
field_overview_df

Unnamed: 0,fldName,cmpLongName,fldCurrentActivitySatus,wlbName,wlbCompletionDate,fldMainArea,fldOwnerKind,fldOwnerName,fldMainSupplyBase,fldHcType,fldNpdidOwner,fldNpdidField,wlbNpdidWellbore,cmpNpdidCompany,fldFactPageUrl,fldFactMapUrl,fldDateUpdated,fldDateUpdatedMax,DatesyncNPD
0,ALBUSKJELL,ConocoPhillips Skandinavia AS,Shut down,1/6-1,26.11.1972,North sea,PRODUCTION LICENSE,018,,GAS/CONDENSATE,20900.0,43437,239,2410696.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024
1,ALVE,Equinor Energy AS,Producing,6507/3-1,26.10.1990,Norwegian sea,PRODUCTION LICENSE,159 B,Sandnessjøen,GAS/CONDENSATE,2819945.0,4444332,1533,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024
2,ALVE NORD,Aker BP ASA,Approved for production,6607/12-2 S,25.10.2011,Norwegian sea,PRODUCTION LICENSE,127 C,,OIL/GAS,29427330.0,42002483,6642,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024
3,ALVHEIM,Aker BP ASA,Producing,24/6-2,08.07.1998,North sea,PRODUCTION LICENSE,203,,OIL/GAS,22436.0,2845712,3397,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,02.01.2024,29.05.2024
4,ATLA,TotalEnergies EP Norge AS,Shut down,25/5-7,23.10.2010,North sea,PRODUCTION LICENSE,102 C,,GAS/CONDENSATE,5467035.0,21106284,6423,35000016.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,ÆRFUGL NORD,Aker BP ASA,Producing,6507/3-9 S,15.07.2012,Norwegian sea,PRODUCTION LICENSE,212 E,Sandnessjøen,GAS/CONDENSATE,4966902.0,38542241,6951,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,20.04.2024,29.05.2024
136,ØRN,Aker BP ASA,Approved for production,6507/2-5 S,14.09.2019,Norwegian sea,PRODUCTION LICENSE,942,,GAS,30676176.0,42002484,8775,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024
137,ØST FRIGG,Aker BP ASA,Shut down,25/2-1,21.09.1973,North sea,PRODUCTION LICENSE,873,,GAS,28973585.0,43576,353,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,01.03.2024,29.05.2024
138,ÅSGARD,Equinor Energy AS,Producing,6507/11-1,10.12.1981,Norwegian sea,BUSINESS ARRANGEMENT AREA,ÅSGARD UNIT,Kristiansund,GAS/CONDENSATE,40564.0,43765,68,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,01.03.2024,29.05.2024


In [688]:
# Setting the current status of the field in its own column using the field_overview_df

field_overview_df.fldName = field_overview_df.fldName.str.lower()
field_overview_df["current_status"] = field_overview_df["fldCurrentActivitySatus"]

# Merging the current status of the field to the production_yearly_df
production_yearly_df = production_yearly_df.merge(
    field_overview_df[["fldName", "current_status"]],
    left_on="field",
    right_on="fldName",
    how="left",
)


# removing fldName column
production_yearly_df = production_yearly_df.drop(columns="fldName")

display(production_yearly_df.current_status.isna().sum())
display(production_yearly_df)

0

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down


In [689]:
field_overview_df[field_overview_df["current_status"] == "Approved for production"]

# This is a list of future fields that are not yet producing

Unnamed: 0,fldName,cmpLongName,fldCurrentActivitySatus,wlbName,wlbCompletionDate,fldMainArea,fldOwnerKind,fldOwnerName,fldMainSupplyBase,fldHcType,fldNpdidOwner,fldNpdidField,wlbNpdidWellbore,cmpNpdidCompany,fldFactPageUrl,fldFactMapUrl,fldDateUpdated,fldDateUpdatedMax,DatesyncNPD,current_status
2,alve nord,Aker BP ASA,Approved for production,6607/12-2 S,25.10.2011,Norwegian sea,PRODUCTION LICENSE,127 C,,OIL/GAS,29427330.0,42002483,6642,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Approved for production
7,berling,OMV (Norge) AS,Approved for production,6506/11-10,17.04.2018,Norwegian sea,PRODUCTION LICENSE,644 B,Kristiansund,GAS/CONDENSATE,27434494.0,42002473,8317,4460594.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,02.01.2024,29.05.2024,Approved for production
20,eirin,Equinor Energy AS,Approved for production,15/5-2,16.12.1978,North sea,PRODUCTION LICENSE,048 E,Dusavik,GAS,18450020.0,42002492,316,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,18.01.2024,01.02.2024,29.05.2024,Approved for production
26,fenris,Aker BP ASA,Approved for production,2/4-21,24.05.2012,North sea,PRODUCTION LICENSE,146,Tananger,GAS/CONDENSATE,21980.0,42002478,6736,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,14.05.2024,14.05.2024,29.05.2024,Approved for production
32,fulla,Aker BP ASA,Approved for production,30/11-7,03.02.2009,North sea,PRODUCTION LICENSE,873,,OIL/CONDENSATE,28973585.0,42002479,5919,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Approved for production
45,halten øst,Equinor Energy AS,Approved for production,6507/11-6,08.07.2001,Norwegian sea,BUSINESS ARRANGEMENT AREA,HALTEN ØST UNIT,Kristiansund,GAS/CONDENSATE,38215610.0,42148955,4321,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,02.01.2024,29.05.2024,Approved for production
50,hugin,Aker BP ASA,Approved for production,25/2-10 S,19.03.1986,North sea,PRODUCTION LICENSE,442,,OIL/GAS,4237510.0,42002474,855,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,24.01.2024,29.05.2024,Approved for production
53,idun nord,Aker BP ASA,Approved for production,6507/3-7,22.07.2009,Norwegian sea,PRODUCTION LICENSE,159 D,,GAS,4939421.0,42002477,6123,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,20.04.2024,29.05.2024,Approved for production
54,irpa,Equinor Energy AS,Approved for production,6705/10-1,19.03.2009,Norwegian sea,PRODUCTION LICENSE,327 B,,GAS,4720054.0,42002482,6044,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,13.03.2024,02.01.2024,29.05.2024,Approved for production
58,johan castberg,Equinor Energy AS,Approved for production,7220/8-1,02.05.2011,Barents sea,PRODUCTION LICENSE,532,Hammerfest,OIL/GAS,5463659.0,32017325,6484,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,01.03.2024,29.05.2024,Approved for production


In [690]:
production_yearly_df.current_status.isna().sum()

0

In [691]:
# Renaming fldNpdidOwner to field_owner and fldNpdidField to field_id in the field_overview_df

field_overview_df = field_overview_df.rename(
    columns={"fldNpdidOwner": "field_owner", "fldNpdidField": "field_id"}
)

# Populating the production_yearly_df with the field_id and field_owner from the field_overview_df without merging

production_yearly_df["field_id"] = production_yearly_df["field"].map(
    field_field_id_dict
)
production_yearly_df["field_owner"] = production_yearly_df["field"].map(
    field_overview_df.set_index("fldName")["field_owner"]
)

In [692]:
production_yearly_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0


### Processing fields

Some fields does not process their own production. Even though they have reported production, their reported emissions share might be lower than usual. This is due to the fact that other fields process the oil and gas from these fields. We need to adjust the production data to reflect this.


In [693]:
# Extracting rows where 'fldDescriptionHeading' is 'Transport'
field_description_transport = field_description_df[
    field_description_df["fldDescriptionHeading"] == "Transport"
]

# Displaying the number of unique 'fldName' entries
print(
    "Number of unique fields in field_description_transport:",
    field_description_transport["fldName"].nunique(),
)


# Function to perform lookup and add new column based on matching field names within fldDescriptionText
def find_field_names_in_description(
    df, lookup_df, lookup_column, description_column, new_column
):
    """
    Searches for field names within description text and adds the field name to a new column if found.

    Parameters:
    df (pd.DataFrame): DataFrame to add the lookup values to.
    lookup_df (pd.DataFrame): DataFrame containing the lookup values.
    lookup_column (str): Column containing the values to look for.
    description_column (str): Column containing the text to search within.
    new_column (str): Name of the new column to add.

    Returns:
    pd.DataFrame: The original DataFrame with added lookup values.
    """
    # Convert the lookup column to a list of unique values in uppercase
    lookup_values = lookup_df[lookup_column].str.upper().unique().tolist()

    # Initialize the new column with None
    df[new_column] = None

    # Iterate over each row and search for lookup values in the description text
    for index, row in df.iterrows():
        found_field_names = []
        if pd.notnull(row[description_column]):
            description_text = row[description_column].upper().split()
            for value in lookup_values:
                if value in description_text:
                    found_field_names.append(value)
        if found_field_names:
            df.at[index, new_column] = ", ".join(found_field_names)

    return df


# Adding field name information to 'field_description_transport'
field_description_transport = find_field_names_in_description(
    field_description_transport,
    field_description_transport,
    "fldName",
    "fldDescriptionText",
    "processing_field",
)


len(field_description_transport.processing_field.unique())
display(
    field_description_transport[
        field_description_transport["processing_field"].isnull()
    ].head()
)

Number of unique fields in field_description_transport: 140


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[new_column] = None


Unnamed: 0,fldName,fldDescriptionHeading,fldDescriptionText,fldNpdidField,fldDescriptionDateUpdated,DatesyncNPD,processing_field
2,ALBUSKJELL,Transport,Brønnstrømmen ble transportert i rørledning ti...,43437,28.02.2023,28.05.2024,
59,BRYNHILD,Transport,Brønnstrømmen gikk i rørledning til Haewene Br...,21123063,28.02.2023,28.05.2024,
74,COD,Transport,Brønnstrømmen ble sendt i rørledning til Ekofi...,43785,28.02.2023,28.05.2024,
92,EDDA,Transport,Brønnstrømmen ble sendt i rørledning til Ekofi...,43541,28.02.2023,28.05.2024,
100,EIRIN,Transport,Brønnstrømmen skal transporteres i rørledning ...,42002492,19.01.2024,28.05.2024,


In [694]:
# In field_description_transport, rename fldName to field and fldDescriptionText to transport
field_description_transport = field_description_transport.rename(
    columns={"fldName": "field", "fldDescriptionText": "transport"}
)

# Making all field names lower case
field_description_transport["field"] = field_description_transport["field"].str.lower()
field_description_transport["processing_field"] = field_description_transport[
    "processing_field"
].str.lower()

# Displaying these columns: 'field', 'transport', 'processing_field'
display(field_description_transport[["field", "transport", "processing_field"]].head())

Unnamed: 0,field,transport,processing_field
2,albuskjell,Brønnstrømmen ble transportert i rørledning ti...,
6,alve,Oljen losses fra Norneskipet og gassen transpo...,åsgard
13,alve nord,Gassen skal eksporteres via Åsgard Transport S...,"skarv, åsgard"
16,alvheim,Oljen blir stabilisert og lagret på Alvheimski...,alvheim
20,atla,Brønnstrømmen ble transportert via undervannsi...,heimdal


In [695]:
# ------ Manual check done in Excel ------
# See the file 'field_processing_list_manual_check.xlsx' in the emissions_and_production/cleaned/ folder
# field_description_transport[['field', 'transport', 'processing_field', 'field_in_emissions']].to_excel('field_processing_list_manual_check.xlsx', index=False)
#

In [696]:
# Import the manually checked file

field_processing_list_cleaned_df = pd.read_excel(
    "../../data/output/emissions_and_production/cleaned/field_processing_list_manual_check.xlsx"
)
field_processing_list_cleaned_df.head()

Unnamed: 0,field,transport,processing_field,field_in_emissions
0,albuskjell,Brønnstrømmen ble transportert i rørledning ti...,albuskjell,False
1,alve,Oljen losses fra Norneskipet og gassen transpo...,norne,True
2,alve nord,Gassen skal eksporteres via Åsgard Transport S...,skarv,False
3,alvheim,Oljen blir stabilisert og lagret på Alvheimski...,alvheim,True
4,atla,Brønnstrømmen ble transportert via undervannsi...,heimdal,True


In [697]:
production_yearly_df.head()

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,1e-05,23395946,0.0,0.012227,0.0,0.000139,0.012361,4e-06,Approved for production,Producing,22556.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.04938,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0


In [698]:
field_processing_list_cleaned_df.head()

Unnamed: 0,field,transport,processing_field,field_in_emissions
0,albuskjell,Brønnstrømmen ble transportert i rørledning ti...,albuskjell,False
1,alve,Oljen losses fra Norneskipet og gassen transpo...,norne,True
2,alve nord,Gassen skal eksporteres via Åsgard Transport S...,skarv,False
3,alvheim,Oljen blir stabilisert og lagret på Alvheimski...,alvheim,True
4,atla,Brønnstrømmen ble transportert via undervannsi...,heimdal,True


In [699]:
# Perform the merge based on the 'field' column
merged_df = pd.merge(
    production_yearly_df,
    field_processing_list_cleaned_df[
        ["field", "processing_field", "field_in_emissions"]
    ],
    on="field",
    how="left",
)

# Display the resulting DataFrame
merged_df.head()

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,1e-05,23395946,0.0,0.012227,0.0,0.000139,0.012361,4e-06,Approved for production,Producing,22556.0,aasta hansteen,True
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.04938,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True


In [700]:
merged_df["field_in_emissions"].value_counts()

fields_yearly_df = merged_df.copy()

#### Checking the facilities data

Attribute info about facilites are [here](https://factpages.sodir.no/nb-no/facility/Attributes).


In [701]:
fixed_facilities_df.head()

Unnamed: 0,fclName,fclPhase,fclSurface,fclCurrentOperatorName,fclKind,fclBelongsToName,fclBelongsToKind,fclBelongsToS,fclStartupDate,fclGeodeticDatum,fclNsDeg,fclNsMin,fclNsSec,fclNsCode,fclEwDeg,fclEwMin,fclEwSec,fclEwCode,fclWaterDepth,fclFunctions,fclDesignLifetime,fclNationName,fclFactPageUrl,fclFactMapUrl,fclNpdidFacility,fclDateUpdated,datesyncNPD
0,1/2-1 IM Blane,IN SERVICE,N,Repsol Norge AS,SINGLE WELL TEMPLATE,BLANE,FIELD,3437650.0,12.09.2007,ED50,56.0,54.0,30.57,N,2.0,27.0,15.12,E,74,WATER INJECTION,15.0,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,373104,02.01.2024,22.05.2024
1,1/2-1 PE Blane,IN SERVICE,N,Repsol Norge AS,SINGLE WELL TEMPLATE,BLANE,FIELD,3437650.0,12.09.2007,ED50,56.0,54.0,29.87,N,2.0,27.0,15.87,E,74,OIL PRODUCER,15.0,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,373068,02.01.2024,22.05.2024
2,1/2-1 PW Blane,IN SERVICE,N,Repsol Norge AS,SINGLE WELL TEMPLATE,BLANE,FIELD,3437650.0,12.09.2007,ED50,56.0,54.0,29.86,N,2.0,27.0,14.39,E,74,OIL PRODUCER,15.0,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,373030,02.01.2024,22.05.2024
3,10/1-CDP1,PARTLY REMOVED,Y,,CONCRETE STRUCTURE,,INGEN REGISTERING,,01.09.1979,ED50,59.0,52.0,31.04,N,2.0,3.0,42.44,E,100,DRILLING,,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,271861,02.01.2024,22.05.2024
4,15/12-C Rev,SHUT DOWN,N,Repsol Norge AS,SINGLE WELL TEMPLATE,REV,FIELD,4467554.0,26.01.2009,ED50,58.0,1.0,40.68,N,1.0,55.0,28.34,E,113,GAS PRODUCER,10.0,Norway,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,378147,02.01.2024,22.05.2024


In [702]:
fixed_facilities_df["fclPhase"].unique()

array(['IN SERVICE', 'PARTLY REMOVED', 'SHUT DOWN', 'REMOVED', 'FUTURE',
       'FABRICATION', 'INSTALLATION', 'LAID UP', 'ABANDONED IN PLACE',
       'DISPOSAL COMPLETED'], dtype=object)

In [703]:
facilites_operators = fixed_facilities_df["fclCurrentOperatorName"].unique()
facilites_operators

array(['Repsol Norge AS', nan, 'Equinor Energy AS', 'Aker BP ASA',
       'Vår Energi ASA', 'TotalEnergies EP Norge AS',
       'ConocoPhillips Skandinavia AS', 'OKEA ASA', 'Gassco AS',
       'Repsol Sinopec North Sea Limited', 'Vår Energi Norge AS',
       'Wintershall Dea Norge AS', 'Sval Energi AS', 'A/S Norske Shell',
       'ConocoPhillips (U.K.) Limited.', 'DNO Norge AS'], dtype=object)

In [704]:
facilites_name_change = {
    "fclName": "facility_name",
    "fclPhase": "facility_phase",
    "fclSurface": "facility_surface",
    "fclCurrentOperatorName": "facility_operator",
    "fclKind": "facility_kind",
    "fclBelongsToName": "facility_belongs_to_name",
    "fclBelongsToKind": "facility_belongs_to_kind",
    "fclBelongsToS": "facility_belongs_to_s",
    "fclFunctions": "facility_functions",
    "fclStartupDate": "facility_startup_date",
    "fclGeodeticDatum": "facility_geodetic_datum",
    "fclWaterDepth": "facility_water_depth",
    "fclDesignLifetime": "facility_design_lifetime",
    "fclNationName": "facility_nation_name",
    "fclNpdidFacility": "facility_id",
}


# Renaming columns in the fixed_facilities_df
fixed_facilities_df = fixed_facilities_df.rename(columns=facilites_name_change)

# Making facility_name lower case
fixed_facilities_df["facility_name"] = fixed_facilities_df["facility_name"].str.lower()

# Making facility_belongs_to_name lower case
fixed_facilities_df["facility_belongs_to_name"] = fixed_facilities_df[
    "facility_belongs_to_name"
].str.lower()

fixed_facilities_df.head()

Unnamed: 0,facility_name,facility_phase,facility_surface,facility_operator,facility_kind,facility_belongs_to_name,facility_belongs_to_kind,facility_belongs_to_s,facility_startup_date,facility_geodetic_datum,fclNsDeg,fclNsMin,fclNsSec,fclNsCode,fclEwDeg,fclEwMin,fclEwSec,fclEwCode,facility_water_depth,facility_functions,facility_design_lifetime,facility_nation_name,fclFactPageUrl,fclFactMapUrl,facility_id,fclDateUpdated,datesyncNPD
0,1/2-1 im blane,IN SERVICE,N,Repsol Norge AS,SINGLE WELL TEMPLATE,blane,FIELD,3437650.0,12.09.2007,ED50,56.0,54.0,30.57,N,2.0,27.0,15.12,E,74,WATER INJECTION,15.0,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,373104,02.01.2024,22.05.2024
1,1/2-1 pe blane,IN SERVICE,N,Repsol Norge AS,SINGLE WELL TEMPLATE,blane,FIELD,3437650.0,12.09.2007,ED50,56.0,54.0,29.87,N,2.0,27.0,15.87,E,74,OIL PRODUCER,15.0,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,373068,02.01.2024,22.05.2024
2,1/2-1 pw blane,IN SERVICE,N,Repsol Norge AS,SINGLE WELL TEMPLATE,blane,FIELD,3437650.0,12.09.2007,ED50,56.0,54.0,29.86,N,2.0,27.0,14.39,E,74,OIL PRODUCER,15.0,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,373030,02.01.2024,22.05.2024
3,10/1-cdp1,PARTLY REMOVED,Y,,CONCRETE STRUCTURE,,INGEN REGISTERING,,01.09.1979,ED50,59.0,52.0,31.04,N,2.0,3.0,42.44,E,100,DRILLING,,United Kingdom,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,271861,02.01.2024,22.05.2024
4,15/12-c rev,SHUT DOWN,N,Repsol Norge AS,SINGLE WELL TEMPLATE,rev,FIELD,4467554.0,26.01.2009,ED50,58.0,1.0,40.68,N,1.0,55.0,28.34,E,113,GAS PRODUCER,10.0,Norway,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fa...,378147,02.01.2024,22.05.2024


In [705]:
fixed_facilities_df["facility_belongs_to_name"].unique()

array(['blane', nan, 'rev', 'gaupe', 'glitne', 'sleipner vest', 'solveig',
       'alvheim', 'bøyla', 'volund', 'balder', 'hanz', 'skogul', 'vilje',
       'atla', 'tyrving', 'jette', 'martin linge', 'oseberg', 'tune',
       'troll', 'gullfaks sør', 'gullfaks', 'snorre', 'vigdis', 'tordis',
       'visund', 'fram', 'norpipe oljeledning', 'halten øst', 'njord',
       'draugen', 'yttergryta', 'ærfugl nord', 'skarv', 'heidrun',
       'albuskjell', 'alve', 'snøhvit', 'norpipe gassledning', 'bauge',
       'enoch', 'brage', 'statpipe', 'breidablikk', 'brynhild', 'skirne',
       'cod', 'skuld', 'åsgard transport', 'gassled', 'duva', 'dvalin',
       'langeled', 'edda', 'edvard grieg', 'ekofisk', 'valhall',
       'eldfisk', 'embla', 'fenja', 'fenris', 'knarr', 'tampen link',
       'fram h-nord', 'franpipe', 'frigg', 'martin linge gassrør', 'frøy',
       'gina krog', 'gina krog gassimport', 'gjøa', 'goliat', 'grane',
       'grane gassrør', 'edvard grieg oljerør', 'gudrun', 'gyda',
    

In [706]:
# Checking all facilites : facility_nation_name = ~Norway

fixed_facilities_df[fixed_facilities_df["facility_nation_name"] != "Norway"]

# Remove all facilities not in Norway
fixed_facilities_df = fixed_facilities_df[
    fixed_facilities_df["facility_nation_name"] == "Norway"
]

In [707]:
# Finding instances where the operator is not the same for the same facility
facility_operator_mismatch = (
    fixed_facilities_df.groupby(["facility_name", "facility_operator"])
    .size()
    .reset_index(name="count")
)
facility_operator_mismatch = facility_operator_mismatch[
    facility_operator_mismatch["count"] > 1
]
print(facility_operator_mismatch)

Empty DataFrame
Columns: [facility_name, facility_operator, count]
Index: []


In [708]:
facilities_df = fixed_facilities_df.copy()

# Only keeping the facilites where the facility_belongs_to_name is in the production_yearly_df
facilities_df = facilities_df[
    facilities_df["facility_belongs_to_name"].isin(production_yearly_df["field"])
]
facilities_df

# Convert the facility_startup_date to datetime and only displaying the year
facilities_df["facility_startup_date"] = pd.to_datetime(
    facilities_df["facility_startup_date"], format="%d.%m.%Y"
)
facilities_df["facility_startup_date"] = facilities_df[
    "facility_startup_date"
].dt.year.astype("Int64")

# Remove facilites where startup date is NaN
facilities_df = facilities_df[facilities_df["facility_startup_date"].notna()]

# Removing all facilities with startup date before 1990
facilities_df = facilities_df[facilities_df["facility_startup_date"] >= 1990]

In [709]:
# Calculate number of IN SERVICE facilities per year and field

facilities_in_service = facilities_df[facilities_df["facility_phase"] == "IN SERVICE"]
facilities_in_service_count = (
    facilities_in_service.groupby(["facility_belongs_to_name", "facility_startup_date"])
    .size()
    .reset_index(name="count")
)
facilities_in_service_count = facilities_in_service_count.rename(
    columns={"facility_belongs_to_name": "field", "facility_startup_date": "year"}
)

facilities_in_service_count

# For all the fields that have several years with facilities in service, we want to add cumulatively add the number of facilities in service
facilities_in_service_count["cumulative_facilities_in_service"] = (
    facilities_in_service_count.groupby("field")["count"].cumsum()
)

# Remove count column and rename cumulative_facilities_in_service to count
facilities_in_service_count = facilities_in_service_count.drop(columns="count")
facilities_in_service_count = facilities_in_service_count.rename(
    columns={"cumulative_facilities_in_service": "count"}
)
facilities_in_service_count

Unnamed: 0,field,year,count
0,aasta hansteen,2018,6
1,alve,2009,1
2,alvheim,2008,10
3,alvheim,2011,13
4,alvheim,2012,14
...,...,...,...
201,åsgard,2000,20
202,åsgard,2005,22
203,åsgard,2012,23
204,åsgard,2015,26


In [710]:
# Calculate number of SHUT DOWN facilities per year and field

facilities_shut_down = facilities_df[facilities_df["facility_phase"] == "SHUT DOWN"]

facilities_shut_down_count = (
    facilities_shut_down.groupby(["facility_belongs_to_name", "facility_startup_date"])
    .size()
    .reset_index(name="count")
)
facilities_shut_down_count = facilities_shut_down_count.rename(
    columns={"facility_belongs_to_name": "field", "facility_startup_date": "year"}
)

# Using apply, we can calculate the cumulative number of shut down facilities
facilities_shut_down_count["cumulative_facilities_shut_down"] = (
    facilities_shut_down_count.groupby("field")["count"].cumsum()
)

# Remove count column and rename cumulative_facilities_shut_down to count
facilities_shut_down_count = facilities_shut_down_count.drop(columns="count")
facilities_shut_down_count = facilities_shut_down_count.rename(
    columns={"cumulative_facilities_shut_down": "count"}
)
facilities_shut_down_count

Unnamed: 0,field,year,count
0,alvheim,2015,1
1,atla,2012,1
2,balder,1999,2
3,balder,2001,3
4,balder,2013,4
5,draugen,1993,3
6,gaupe,2012,2
7,hod,1990,1
8,knarr,2015,2
9,oseberg,1991,1


In [711]:
# One hot encode the facility_kind column without the facility_kind_ prefix only keeping the one hot encoding columns
facility_kind_in_service_df = pd.get_dummies(
    facilities_in_service, columns=["facility_kind"], prefix="", prefix_sep=""
)

# creating a list of the encoded columns
encoded_columns = facilities_in_service["facility_kind"].unique().tolist()

# facility_kind_in_service_df[['facility_belongs_to_name']+['facility_startup_date']+encoded_columns]

# Group by field and year and sum the one hot encoded columns
facility_kind_in_service_df = (
    facility_kind_in_service_df.groupby(
        ["facility_belongs_to_name", "facility_startup_date"]
    )[encoded_columns]
    .sum()
    .reset_index()
)

facility_kind_in_service_df.columns = ["field", "year"] + [
    f"facility_kind_{col.lower()}" for col in facility_kind_in_service_df.columns[2:]
]

facility_kind_in_service_df

Unnamed: 0,field,year,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar
0,aasta hansteen,2018,2,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,alve,2009,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,alvheim,2008,4,2,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,alvheim,2011,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,alvheim,2012,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,åsgard,2000,9,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0
202,åsgard,2005,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
203,åsgard,2012,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
204,åsgard,2015,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [712]:
# One hot encode the facility_kind column without the facility_kind_ prefix only keeping the one hot encoding columns
facility_kind_shut_down_df = pd.get_dummies(
    facilities_shut_down, columns=["facility_kind"], prefix="", prefix_sep=""
)

# creating a list of the encoded columns
encoded_columns = facilities_shut_down["facility_kind"].unique().tolist()

# Group by field and year and sum the one hot encoded columns
facility_kind_shut_down_df = (
    facility_kind_shut_down_df.groupby(
        ["facility_belongs_to_name", "facility_startup_date"]
    )[encoded_columns]
    .sum()
    .reset_index()
)
facility_kind_shut_down_df

Unnamed: 0,facility_belongs_to_name,facility_startup_date,SINGLE WELL TEMPLATE,JACKET 4 LEGS,MULTI WELL TEMPLATE
0,alvheim,2015,1,0,0
1,atla,2012,1,0,0
2,balder,1999,2,0,0
3,balder,2001,1,0,0
4,balder,2013,1,0,0
5,draugen,1993,3,0,0
6,gaupe,2012,2,0,0
7,hod,1990,0,1,0
8,knarr,2015,0,0,2
9,oseberg,1991,1,0,0


In [713]:
# One hot encoding the facility_surface column

facility_surface_in_service_df = pd.get_dummies(
    facilities_in_service, columns=["facility_surface"], prefix="", prefix_sep=""
)
encoded_columns = facilities_in_service["facility_surface"].unique().tolist()

facility_surface_in_service_df = (
    facility_surface_in_service_df.groupby(
        ["facility_belongs_to_name", "facility_startup_date"]
    )[encoded_columns]
    .sum()
    .reset_index()
)

# Cumulative sum of the one hot encoded columns
facility_surface_in_service_df[encoded_columns] = (
    facility_surface_in_service_df.groupby(
        "facility_belongs_to_name"
    )[encoded_columns].cumsum()
)
facility_surface_in_service_df

Unnamed: 0,facility_belongs_to_name,facility_startup_date,N,Y
0,aasta hansteen,2018,5,1
1,alve,2009,1,0
2,alvheim,2008,9,1
3,alvheim,2011,12,1
4,alvheim,2012,13,1
...,...,...,...,...
201,åsgard,2000,17,3
202,åsgard,2005,19,3
203,åsgard,2012,20,3
204,åsgard,2015,23,3


In [714]:
facility_surface_shut_down_df = pd.get_dummies(
    facilities_shut_down, columns=["facility_surface"], prefix="", prefix_sep=""
)
encoded_columns = facilities_shut_down["facility_surface"].unique().tolist()

facility_surface_shut_down_df = (
    facility_surface_shut_down_df.groupby(
        ["facility_belongs_to_name", "facility_startup_date"]
    )[encoded_columns]
    .sum()
    .reset_index()
)

# Cumulative sum of the one hot encoded columns
facility_surface_shut_down_df[encoded_columns] = facility_surface_shut_down_df.groupby(
    "facility_belongs_to_name"
)[encoded_columns].cumsum()
facility_surface_shut_down_df

Unnamed: 0,facility_belongs_to_name,facility_startup_date,N,Y
0,alvheim,2015,1,0
1,atla,2012,1,0
2,balder,1999,2,0
3,balder,2001,3,0
4,balder,2013,4,0
5,draugen,1993,3,0
6,gaupe,2012,2,0
7,hod,1990,0,1
8,knarr,2015,2,0
9,oseberg,1991,1,0


In [715]:
# Calculate the mean and std water_depth for each facility_belongs_to_name and year combo

facility_water_depth_per_field = (
    facilities_in_service.groupby(["facility_belongs_to_name"])["facility_water_depth"]
    .agg(["mean", "std"])
    .reset_index()
)

# If std is NaN, set it to 0
facility_water_depth_per_field["std"].fillna(0, inplace=True)

facility_water_depth_per_field

Unnamed: 0,facility_belongs_to_name,mean,std
0,aasta hansteen,1282.666667,32.407818
1,alve,390.000000,0.000000
2,alvheim,123.080000,1.956187
3,balder,127.037037,0.436902
4,bauge,282.000000,0.000000
...,...,...,...
78,visund sør,292.000000,0.000000
79,volund,123.666667,0.516398
80,yme,86.400000,7.797435
81,åsgard,289.307692,18.878070


In [716]:
# Calculate the mean and std facility_design_lifetime for each facility_belongs_to_name and year combo

facility_design_lifetime_per_field = (
    facilities_in_service.groupby(["facility_belongs_to_name"])[
        "facility_design_lifetime"
    ]
    .agg(["mean", "std"])
    .reset_index()
)

# If std is NaN, set it to 0
facility_design_lifetime_per_field["std"].fillna(0, inplace=True)

facility_design_lifetime_per_field

Unnamed: 0,facility_belongs_to_name,mean,std
0,aasta hansteen,25.833333,2.041241
1,alve,20.000000,0.000000
2,alvheim,21.428571,2.314550
3,balder,21.750000,2.446802
4,bauge,25.000000,0.000000
...,...,...,...
78,visund sør,25.000000,0.000000
79,volund,18.333333,2.886751
80,yme,20.000000,7.071068
81,åsgard,21.730769,5.990377


In [717]:
# Adding the facility_design_lifetime_per_field to the fields_yearly_df

fields_yearly_with_facilites_df = fields_yearly_df.merge(
    facility_design_lifetime_per_field,
    left_on=["field"],
    right_on=["facility_belongs_to_name"],
    how="left",
)

# Renaming the mean and std to lifetime_mean and lifetime_std
fields_yearly_with_facilites_df = fields_yearly_with_facilites_df.rename(
    columns={"mean": "facilities_lifetime_mean", "std": "facilities_lifetime_std"}
)

fields_yearly_with_facilites_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facility_belongs_to_name,facilities_lifetime_mean,facilities_lifetime_std
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0,aasta hansteen,True,aasta hansteen,25.833333,2.041241
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,aasta hansteen,25.833333,2.041241
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,aasta hansteen,25.833333,2.041241
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,aasta hansteen,25.833333,2.041241
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,aasta hansteen,25.833333,2.041241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0,frigg,False,,,
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0,frigg,False,,,
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0,frigg,False,,,
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,


In [718]:
# Adding the facility_water_depth_per_field to the fields_yearly_with_facilites_df

fields_yearly_with_facilites_df = fields_yearly_with_facilites_df.merge(
    facility_water_depth_per_field,
    left_on=["field"],
    right_on=["facility_belongs_to_name"],
    how="left",
)

# Renaming the mean and std to water_depth_mean and water_depth_std
fields_yearly_with_facilites_df = fields_yearly_with_facilites_df.rename(
    columns={"mean": "facilities_water_depth_mean", "std": "facilities_water_depth_std"}
)

# Renaming facility_belongs_to_name_y to facility_belongs_to_name and removing facility_belongs_to_name_y
fields_yearly_with_facilites_df = fields_yearly_with_facilites_df.rename(
    columns={"facility_belongs_to_name_y": "facility_belongs_to_name"}
)
fields_yearly_with_facilites_df = fields_yearly_with_facilites_df.drop(
    columns="facility_belongs_to_name_x"
)

fields_yearly_with_facilites_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facility_belongs_to_name,facilities_water_depth_mean,facilities_water_depth_std
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,aasta hansteen,1282.666667,32.407818
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,aasta hansteen,1282.666667,32.407818
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,aasta hansteen,1282.666667,32.407818
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,aasta hansteen,1282.666667,32.407818
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,aasta hansteen,1282.666667,32.407818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,


In [719]:
facility_surface_shut_down_df

Unnamed: 0,facility_belongs_to_name,facility_startup_date,N,Y
0,alvheim,2015,1,0
1,atla,2012,1,0
2,balder,1999,2,0
3,balder,2001,3,0
4,balder,2013,4,0
5,draugen,1993,3,0
6,gaupe,2012,2,0
7,hod,1990,0,1
8,knarr,2015,2,0
9,oseberg,1991,1,0


In [720]:
fields_and_facilites_df = fields_yearly_with_facilites_df.copy()

# Adding facility_surface_shut_down_df, facility_surface_in_service_df, facility_kind_in_service_df, facility_kind_shut_down_df to fields_and_facilites_df
fields_and_facilites_df = fields_and_facilites_df.merge(
    facility_surface_shut_down_df,
    left_on=["field", "year"],
    right_on=["facility_belongs_to_name", "facility_startup_date"],
    how="left",
)
fields_and_facilites_df = fields_and_facilites_df.rename(
    columns={"Y": "surface_facilites_shut_down", "N": "subsea_facilites_shut_down"}
)

fields_and_facilites_df = fields_and_facilites_df.merge(
    facility_surface_in_service_df,
    left_on=["field", "year"],
    right_on=["facility_belongs_to_name", "facility_startup_date"],
    how="left",
)
fields_and_facilites_df = fields_and_facilites_df.rename(
    columns={"Y": "surface_facilites_in_service", "N": "subsea_facilites_in_service"}
)

# facility_kind_in_service_df
# Adding facility_kind_ to the names of the columns of facility_kind_in_service_df and making them lower case
fields_and_facilites_df = fields_and_facilites_df.merge(
    facility_kind_in_service_df,
    left_on=["field", "year"],
    right_on=["field", "year"],
    how="left",
)

# Cleaning by removing uneccessary columns: facility_startup_date_y, facility_belongs_to_name, facility_startup_date_x, facility_belongs_to_name_y, facility_belongs_to_name_x
fields_and_facilites_df = fields_and_facilites_df.drop(
    columns=[
        "facility_startup_date_y",
        "facility_belongs_to_name",
        "facility_startup_date_x",
        "facility_belongs_to_name_y",
        "facility_belongs_to_name_x",
    ]
)
fields_and_facilites_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,,,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,,,,,,,,,,,,,,,,,,,,,,,,,
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,,,,,,,,,,,,,,,,,,,,,,,,,
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,,,,,,,,,,,,,,,,,,,,,,,,,
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [721]:
# Facility kinds
facility_kind_columns = [
    col for col in fields_and_facilites_df.columns if "facility_kind_" in col
]
cols_to_make_zero = [
    "subsea_facilites_shut_down",
    "surface_facilites_shut_down",
    "subsea_facilites_in_service",
    "surface_facilites_in_service",
]

# Filling NaNs
fields_and_facilites_df[facility_kind_columns] = fields_and_facilites_df[
    facility_kind_columns
].fillna(0)
fields_and_facilites_df[cols_to_make_zero] = fields_and_facilites_df[
    cols_to_make_zero
].fillna(0)

In [722]:
# Cumatively sum all the facility kinds per field

for column in facility_kind_columns:
    fields_and_facilites_df[column] = fields_and_facilites_df.groupby("field")[
        column
    ].cumsum()

# Cumatively sum subsea_facilites_in_service and surface_facilites_in_service
fields_and_facilites_df["surface_facilites_in_service"] = (
    fields_and_facilites_df.groupby("field")["surface_facilites_in_service"].cumsum()
)
fields_and_facilites_df["subsea_facilites_in_service"] = (
    fields_and_facilites_df.groupby("field")["subsea_facilites_in_service"].cumsum()
)


fields_and_facilites_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Wellbores


In [723]:
# Checking the df
display(wellbores_df.head())

# Checking unique  wlbStatus, wlbPurpose, wlbSubSea, and the value counts for each

# wlbFinalVerticalDepth, wlbTotalDepth, wlbWaterDepth

display(wellbores_df["wlbStatus"].unique())
display(wellbores_df["wlbPurpose"].unique())
display(wellbores_df["wlbSubSea"].unique())

Unnamed: 0,wlbWellboreName,wlbWell,wlbDrillingOperator,wlbProductionLicence,wlbStatus,wlbPurpose,wlbPurposePlanned,wlbContent,wlbWellType,wlbSubSea,wlbEntryDate,wlbCompletionDate,wlbEntryPreDrillDate,wlbCompPreDrillDate,wlbField,wlbDrillPermit,wlbDiscovery,wlbDiscoveryWellbore,wlbKellyBushElevation,wlbFinalVerticalDepth,wlbTotalDepth,wlbWaterDepth,wlbKickOffPoint,wlbMainArea,wlbDrillingFacility,wlbFacilityTypeDrilling,wlbDrillingFacilityFixedOrMoveable,wlbProductionFacility,wlbLicensingActivity,wlbMultilateral,wlbContentPlanned,wlbEntryYear,wlbCompletionYear,wlbReclassFromWellbore,wlbPluggedAbandonDate,wlbPluggedDate,wlbLicenceTargetName,wlbPlotSymbol,wlbGeodeticDatum,wlbNsDeg,wlbNsMin,wlbNsSec,wlbNsCode,wlbEwDeg,wlbEwMin,wlbEwSec,wlbEwCode,wlbNsDecDeg,wlbEwDecDeg,wlbNsUtm,wlbEwUtm,wlbUtmZone,wlbNamePart1,wlbNamePart2,wlbNamePart3,wlbNamePart4,wlbNamePart5,wlbNamePart6,wlbFactPageUrl,wlbFactMapUrl,wlbDiskosWellboreType,wlbDiskosWellboreParent,wlbNpdidWellbore,dscNpdidDiscovery,fldNpdidField,wlbWdssQcDate,wlbReleasedDate,prlNpdidProductionLicence,prlNpdidProdLicenceTarget,fclNpdidFacilityDrilling,fclNpdidFacilityProducing,wlbNpdidWellboreReclass,wlbDateUpdated,wlbDateUpdatedMax,datesyncNPD
0,1/3-A-1 H,1/3-A-1,DONG E&P Norge AS,274,P&A,PRODUCTION,PRODUCTION,OIL,DEVELOPMENT,YES,22.07.2011,21.09.2011,,,OSELVAR,3365-P,1/3-6 Oselvar,NO,45.0,3163.0,5927.0,72.0,,NORTH SEA,MÆRSK GIANT,JACK-UP 3 LEGS,MOVEABLE,OSELVAR,NST2001,NO,OIL,2011,2011,,05.08.2021,24.07.2021,,50,ED50,56,55,55.06,N,2,40,16.66,E,56.931961,2.671294,6310001.5,479994.47,31,1,3,A,1,,,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=We...,initial,,6612,43832.0,5506919.0,,21.09.2013,2060266.0,,278245.0,410592.0,0,02.01.2024,06.10.2015,28.05.2024
1,1/3-A-2 H,1/3-A-2,DONG E&P Norge AS,274,P&A,PRODUCTION,PRODUCTION,OIL,DEVELOPMENT,YES,18.11.2011,19.01.2012,19.06.2011,04.07.2011,OSELVAR,3366-P,1/3-6 Oselvar,NO,45.0,3170.0,5882.0,72.0,,NORTH SEA,MÆRSK GIANT,JACK-UP 3 LEGS,MOVEABLE,OSELVAR,NST2001,NO,OIL,2011,2012,,05.08.2021,18.05.2021,,50,ED50,56,55,54.89,N,2,40,16.67,E,56.931914,2.671297,6309996.24,479994.61,31,1,3,A,2,,,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=We...,initial,,6613,43832.0,5506919.0,,19.01.2014,2060266.0,,278245.0,410592.0,0,02.01.2024,06.10.2015,28.05.2024
2,1/3-A-3 H,1/3-A-3,DONG E&P Norge AS,274,P&A,PRODUCTION,PRODUCTION,OIL,DEVELOPMENT,YES,04.03.2012,14.05.2012,05.07.2011,21.07.2011,OSELVAR,3367-P,1/3-6 Oselvar,NO,45.0,3171.0,6665.0,72.0,,NORTH SEA,MÆRSK GIANT,JACK-UP 3 LEGS,MOVEABLE,OSELVAR,NST2001,NO,OIL,2012,2012,,05.08.2021,11.07.2021,,50,ED50,56,55,55.07,N,2,40,17.32,E,56.931964,2.671478,6310001.76,480005.63,31,1,3,A,3,,,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=We...,initial,,6614,43832.0,5506919.0,,14.05.2014,2060266.0,,278245.0,410592.0,0,02.01.2024,06.10.2015,28.05.2024
3,1/3-K-1,1/3-K-1,BP Amoco Norge AS,65,PRODUCING,PRODUCTION,PRODUCTION,OIL,DEVELOPMENT,NO,08.05.1998,31.07.1998,,,TAMBAR,1942-P,1/3-3 Tambar,NO,50.0,4386.0,4543.0,68.0,,NORTH SEA,TRANSOCEAN NORDIC,JACK-UP 3 LEGS,MOVEABLE,TAMBAR,6,NO,OIL,1998,1998,1/3-9 S,,,,50,ED50,56,58,57.93,N,2,57,31.44,E,56.982758,2.958733,6315608.87,497491.85,31,1,3,K,1,,,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=We...,initial,,4320,43826.0,1028599.0,,31.07.2000,21316.0,,296191.0,280793.0,3362,22.03.2024,06.10.2015,28.05.2024
4,1/3-K-2,1/3-K-2,Aker BP ASA,65,PLUGGED,OBSERVATION,PRODUCTION,NOT APPLICABLE,DEVELOPMENT,NO,02.01.2018,19.01.2018,27.10.2017,03.11.2017,TAMBAR,4277-P,1/3-3 Tambar,NO,70.0,4379.0,4911.0,67.4,,NORTH SEA,MAERSK INTERCEPTOR,JACK-UP 3 LEGS,MOVEABLE,TAMBAR,6,NO,OIL,2018,2018,,,19.01.2018,,99,ED50,56,58,57.99,N,2,57,31.52,E,56.982775,2.958756,6315610.72,497493.21,31,1,3,K,2,,,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=We...,initial,,8286,43826.0,1028599.0,,19.01.2020,21316.0,,436222.0,280793.0,0,02.01.2024,,28.05.2024


array(['P&A', 'PRODUCING', 'PLUGGED', 'CLOSED', 'JUNKED', 'PREDRILLED',
       'INJECTING', 'WILL NEVER BE DRILLED', 'SUSPENDED', nan, 'DRILLING',
       'ONLINE/OPERATIONAL'], dtype=object)

array(['PRODUCTION', 'OBSERVATION', 'INJECTION', nan, 'NOT AVAILABLE',
       'INJECTION-CCS'], dtype=object)

array(['YES', ' NO'], dtype=object)

In [724]:
# Removing all rows where wlbPurpose = nan and 'NOT AVAILABLE'
wellbores_df = wellbores_df[wellbores_df["wlbStatus"].notna()]

# Removing all rows where
wellbores_df = wellbores_df[wellbores_df["wlbPurpose"].notna()]
wellbores_df = wellbores_df[wellbores_df["wlbPurpose"] != "NOT AVAILABLE"]

wellbores_df = wellbores_df[wellbores_df["wlbField"].notna()]

In [725]:
# Keeping only the following columns: wlbField, wlbStatus, wlbPurpose, wlbSubSea, wlbFinalVerticalDepth, wlbTotalDepth, wlbWaterDepth

wellbores_smaller_df = wellbores_df[
    [
        "wlbField",
        "wlbStatus",
        "wlbPurpose",
        "wlbSubSea",
        "wlbFinalVerticalDepth",
        "wlbWaterDepth",
        "wlbEntryYear",
        "wlbCompletionYear",
        "wlbPluggedAbandonDate",
        "wlbPluggedDate",
    ]
].copy()
wellbores_smaller_df

Unnamed: 0,wlbField,wlbStatus,wlbPurpose,wlbSubSea,wlbFinalVerticalDepth,wlbWaterDepth,wlbEntryYear,wlbCompletionYear,wlbPluggedAbandonDate,wlbPluggedDate
0,OSELVAR,P&A,PRODUCTION,YES,3163.0,72.0,2011,2011,05.08.2021,24.07.2021
1,OSELVAR,P&A,PRODUCTION,YES,3170.0,72.0,2011,2012,05.08.2021,18.05.2021
2,OSELVAR,P&A,PRODUCTION,YES,3171.0,72.0,2012,2012,05.08.2021,11.07.2021
3,TAMBAR,PRODUCING,PRODUCTION,NO,4386.0,68.0,1998,1998,,
4,TAMBAR,PLUGGED,OBSERVATION,NO,4379.0,67.4,2018,2018,,19.01.2018
...,...,...,...,...,...,...,...,...,...,...
5879,JOHAN CASTBERG,CLOSED,PRODUCTION,YES,1352.0,373.4,2021,2022,,
5880,JOHAN CASTBERG,CLOSED,PRODUCTION,YES,1354.0,373.4,2022,2022,,
5881,JOHAN CASTBERG,SUSPENDED,PRODUCTION,YES,,373.4,2021,0,,
5883,JOHAN CASTBERG,CLOSED,INJECTION,YES,1684.0,372.0,2020,2020,,


In [726]:
# Making field lower case
wellbores_smaller_df["wlbField"] = wellbores_smaller_df["wlbField"].str.lower()

# removing wlb from all column names
wellbores_smaller_df.columns = wellbores_smaller_df.columns.str.replace("wlb", "well_")

# making all column names lower case
wellbores_smaller_df.columns = wellbores_smaller_df.columns.str.lower()

In [727]:
# Filtering out well_entry_year before 1990 and after 2023
wellbores_smaller_df = wellbores_smaller_df[
    (wellbores_smaller_df["well_entryyear"] >= 1990)
    & (wellbores_smaller_df["well_entryyear"] <= 2023)
]

wellbores_smaller_df

Unnamed: 0,well_field,well_status,well_purpose,well_subsea,well_finalverticaldepth,well_waterdepth,well_entryyear,well_completionyear,well_pluggedabandondate,well_pluggeddate
0,oselvar,P&A,PRODUCTION,YES,3163.0,72.0,2011,2011,05.08.2021,24.07.2021
1,oselvar,P&A,PRODUCTION,YES,3170.0,72.0,2011,2012,05.08.2021,18.05.2021
2,oselvar,P&A,PRODUCTION,YES,3171.0,72.0,2012,2012,05.08.2021,11.07.2021
3,tambar,PRODUCING,PRODUCTION,NO,4386.0,68.0,1998,1998,,
4,tambar,PLUGGED,OBSERVATION,NO,4379.0,67.4,2018,2018,,19.01.2018
...,...,...,...,...,...,...,...,...,...,...
5879,johan castberg,CLOSED,PRODUCTION,YES,1352.0,373.4,2021,2022,,
5880,johan castberg,CLOSED,PRODUCTION,YES,1354.0,373.4,2022,2022,,
5881,johan castberg,SUSPENDED,PRODUCTION,YES,,373.4,2021,0,,
5883,johan castberg,CLOSED,INJECTION,YES,1684.0,372.0,2020,2020,,


In [728]:
# Removing trailing whitespace from the well_subsea column
wellbores_smaller_df["well_subsea"] = wellbores_smaller_df["well_subsea"].str.strip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wellbores_smaller_df['well_subsea'] = wellbores_smaller_df['well_subsea'].str.strip()


One-hot encoding the wellbore data


In [729]:
# Getting dummies for well_status, well_purpose, and well_subsea
wellbores_calc_df = wellbores_smaller_df.copy()

well_status_df = pd.get_dummies(wellbores_smaller_df, columns=["well_status"])
well_purpose_df = pd.get_dummies(wellbores_smaller_df, columns=["well_purpose"])
well_subsea_df = pd.get_dummies(wellbores_smaller_df, columns=["well_subsea"])

# Make all columns lower case
well_status_df.columns = well_status_df.columns.str.lower()
well_purpose_df.columns = well_purpose_df.columns.str.lower()
well_subsea_df.columns = well_subsea_df.columns.str.lower()

well_status_cols = [col for col in well_status_df.columns if "well_status" in col]
well_purpose_cols = [col for col in well_purpose_df.columns if "well_purpose" in col]
well_subsea_cols = [col for col in well_subsea_df.columns if "well_subsea" in col]

# Cumulative sum of the one hot encoded columns
well_status_df = (
    well_status_df.groupby(["well_field", "well_entryyear"])[well_status_cols]
    .sum()
    .reset_index()
)
well_purpose_df = (
    well_purpose_df.groupby(["well_field", "well_entryyear"])[well_purpose_cols]
    .sum()
    .reset_index()
)
well_subsea_df = (
    well_subsea_df.groupby(["well_field", "well_entryyear"])[well_subsea_cols]
    .sum()
    .reset_index()
)

# Cumalitve sum of all the three one hot encoded columns
for column in well_status_cols:
    well_status_df[column] = well_status_df.groupby("well_field")[column].cumsum()

for column in well_purpose_cols:
    well_purpose_df[column] = well_purpose_df.groupby("well_field")[column].cumsum()

for column in well_subsea_cols:
    well_subsea_df[column] = well_subsea_df.groupby("well_field")[column].cumsum()


display(well_status_df.head())
display(well_purpose_df.head())
display(well_subsea_df.head())

Unnamed: 0,well_field,well_entryyear,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended
0,aasta hansteen,2017,0,0,0,0,0,0,0,6,0
1,aasta hansteen,2018,0,0,0,0,0,0,0,8,0
2,alve,2009,0,0,0,0,0,0,0,1,0
3,alve,2010,0,0,0,0,0,0,1,2,0
4,alve,2016,0,0,0,0,0,0,1,3,0


Unnamed: 0,well_field,well_entryyear,well_purpose_injection,well_purpose_observation,well_purpose_production
0,aasta hansteen,2017,0,0,6
1,aasta hansteen,2018,0,0,8
2,alve,2009,0,0,1
3,alve,2010,0,1,2
4,alve,2016,0,1,3


Unnamed: 0,well_field,well_entryyear,well_subsea_no,well_subsea_yes
0,aasta hansteen,2017,0,6
1,aasta hansteen,2018,0,8
2,alve,2009,0,1
3,alve,2010,0,3
4,alve,2016,0,4


Merging the wellbore status, purpose and surface/subsea data with the production data


In [730]:
# Merging the well_status_df, well_purpose_df, well_subsea_df to the fields_and_facilites_df without adding the well_entryyear and well_field columns

fields_facilites_wells_df = fields_and_facilites_df.copy()

fields_facilites_wells_df = fields_facilites_wells_df.merge(
    well_status_df,
    left_on=["field", "year"],
    right_on=["well_field", "well_entryyear"],
    how="left",
)
fields_facilites_wells_df = fields_facilites_wells_df.drop(
    columns=["well_field", "well_entryyear"]
)

fields_facilites_wells_df = fields_facilites_wells_df.merge(
    well_purpose_df,
    left_on=["field", "year"],
    right_on=["well_field", "well_entryyear"],
    how="left",
)
fields_facilites_wells_df = fields_facilites_wells_df.drop(
    columns=["well_field", "well_entryyear"]
)

fields_facilites_wells_df = fields_facilites_wells_df.merge(
    well_subsea_df,
    left_on=["field", "year"],
    right_on=["well_field", "well_entryyear"],
    how="left",
)
fields_facilites_wells_df = fields_facilites_wells_df.drop(
    columns=["well_field", "well_entryyear"]
)

fields_facilites_wells_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,


In [731]:
# Replacing NaNs with 0 for the new columns

fields_facilites_wells_df[well_status_cols] = fields_facilites_wells_df[
    well_status_cols
].fillna(0)
fields_facilites_wells_df[well_purpose_cols] = fields_facilites_wells_df[
    well_purpose_cols
].fillna(0)
fields_facilites_wells_df[well_subsea_cols] = fields_facilites_wells_df[
    well_subsea_cols
].fillna(0)

# Cumulatively sum the columns

for column in well_status_cols:
    fields_facilites_wells_df[column] = fields_facilites_wells_df.groupby("field")[
        column
    ].cumsum()

for column in well_purpose_cols:
    fields_facilites_wells_df[column] = fields_facilites_wells_df.groupby("field")[
        column
    ].cumsum()

for column in well_subsea_cols:
    fields_facilites_wells_df[column] = fields_facilites_wells_df.groupby("field")[
        column
    ].cumsum()

fields_facilites_wells_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,0.00001,23395946,0.0,0.012227,0.0,0.000139,0.012361,0.000004,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.049380,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,øst frigg,1994,0.0,0.98259,0.0,0.00696,0.98951,0.00000,43576,0.0,0.048187,0.0,0.000388,0.048564,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2095,øst frigg,1995,0.0,0.31923,0.0,0.00065,0.31988,0.00000,43576,0.0,0.024758,0.0,0.000058,0.024783,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2096,øst frigg,1996,0.0,0.61029,0.0,0.00035,0.61066,0.00000,43576,0.0,0.030526,0.0,0.000021,0.030541,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2097,øst frigg,1997,0.0,0.09648,0.0,0.00009,0.09658,0.00000,43576,0.0,0.005968,0.0,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Calculating the mean and standard deviation of the wellbore


In [732]:
# Calculating the mean and std well_finalverticaldepth and well_waterdepth grouped by well_field and well_entryyear

well_finalverticaldepth_df = (
    wellbores_smaller_df.groupby(["well_field", "well_entryyear"])[
        "well_finalverticaldepth"
    ]
    .agg(["mean", "std"])
    .reset_index()
)
well_waterdepth_df = (
    wellbores_smaller_df.groupby(["well_field", "well_entryyear"])["well_waterdepth"]
    .agg(["mean", "std"])
    .reset_index()
)

# If std is NaN, set it to 0
well_finalverticaldepth_df["std"].fillna(0, inplace=True)
well_waterdepth_df["std"].fillna(0, inplace=True)

# Ranaming columns, adding well_final_vertical_depth_mean and well_final_vertical_depth_std to the well_finalverticaldepth_df
well_finalverticaldepth_df = well_finalverticaldepth_df.rename(
    columns={
        "mean": "well_final_vertical_depth_mean",
        "std": "well_final_vertical_depth_std",
    }
)

# Ranaming columns, adding well_water_depth_mean and well_water_depth_std to the well_waterdepth_df
well_waterdepth_df = well_waterdepth_df.rename(
    columns={"mean": "well_water_depth_mean", "std": "well_water_depth_std"}
)

display(well_finalverticaldepth_df)
display(well_waterdepth_df)

Unnamed: 0,well_field,well_entryyear,well_final_vertical_depth_mean,well_final_vertical_depth_std
0,aasta hansteen,2017,3051.666667,144.917448
1,aasta hansteen,2018,2601.500000,88.388348
2,alve,2009,3853.000000,0.000000
3,alve,2010,3795.500000,89.802561
4,alve,2016,3608.000000,0.000000
...,...,...,...,...
1025,åsgard,2020,4504.000000,284.256926
1026,åsgard,2021,3999.333333,238.405816
1027,åsgard,2022,4264.000000,0.000000
1028,åsgard,2023,,0.000000


Unnamed: 0,well_field,well_entryyear,well_water_depth_mean,well_water_depth_std
0,aasta hansteen,2017,1263.666667,23.754298
1,aasta hansteen,2018,1286.500000,34.648232
2,alve,2009,368.000000,0.000000
3,alve,2010,368.000000,0.000000
4,alve,2016,368.000000,0.000000
...,...,...,...,...
1025,åsgard,2020,280.500000,12.020815
1026,åsgard,2021,302.333333,13.279056
1027,åsgard,2022,289.000000,0.000000
1028,åsgard,2023,298.000000,22.516660


Merge the wellbore mean and standatd deviation data with the production data


In [733]:
# Merging into fields_facilites_wells_df

fields_facilites_wells_df = fields_facilites_wells_df.merge(
    well_finalverticaldepth_df,
    left_on=["field", "year"],
    right_on=["well_field", "well_entryyear"],
    how="left",
)
fields_facilites_wells_df = fields_facilites_wells_df.drop(
    columns=["well_field", "well_entryyear"]
)

fields_facilites_wells_df = fields_facilites_wells_df.merge(
    well_waterdepth_df,
    left_on=["field", "year"],
    right_on=["well_field", "well_entryyear"],
    how="left",
)
fields_facilites_wells_df = fields_facilites_wells_df.drop(
    columns=["well_field", "well_entryyear"]
)

fields_facilites_wells_calc_df = fields_facilites_wells_df.copy()

well_data_cols = [
    "well_final_vertical_depth_mean",
    "well_final_vertical_depth_std",
    "well_water_depth_mean",
    "well_water_depth_std",
]

# Forward filling the NaNs in the new columns
fields_facilites_wells_calc_df[well_data_cols] = fields_facilites_wells_calc_df[
    well_data_cols
].ffill()

latest_merged = fields_facilites_wells_calc_df.copy()

### Investments


In [734]:
investments_df = investments_df.rename(
    columns={
        "prfInformationCarrier": "field",
        "prfYear": "year",
        "prfInvestmentsMillNOK": "investments_mill_nok",
        "prfNpdidInformationCarrier": "field_id",
    }
)

# Removing investments with year before 1990 and after 2023
investments_df = investments_df[
    (investments_df["year"] >= 1990) & (investments_df["year"] <= 2023)
]

# converting field to lower case
investments_df["field"] = investments_df["field"].str.lower()
investments_df.head()

Unnamed: 0,field,year,investments_mill_nok,field_id,dateSyncNPD
16,albuskjell,1990,8,43437,22.05.2024
17,albuskjell,1991,4,43437,22.05.2024
18,albuskjell,1992,10,43437,22.05.2024
19,albuskjell,1993,0,43437,22.05.2024
20,albuskjell,1994,3,43437,22.05.2024


In [735]:
future_investments_df.head()

Unnamed: 0,fldName,fldInvestmentExpected,fldInvExpFixYear,fldNpdidField
0,ALVE,1167,2023,4444332
1,ALVE NORD,6602,2023,42002483
2,ALVHEIM,14290,2023,2845712
3,BALDER,31306,2023,43562
4,BAUGE,74,2023,29446221


In [736]:
future_investments_df = future_investments_df.rename(
    columns={
        "fldName": "field",
        "prfYear": "year",
        "fldInvestmentExpected": "future_investments_mill_nok",
        "prfNpdidInformationCarrier": "field_id",
    }
)

# Convert field to lower case
future_investments_df["field"] = future_investments_df["field"].str.lower()

# dropping fldInvExpFixYear and fldNpdidField
future_investments_df.drop(columns=["fldInvExpFixYear", "fldNpdidField"], inplace=True)

future_investments_df.head()

Unnamed: 0,field,future_investments_mill_nok
0,alve,1167
1,alve nord,6602
2,alvheim,14290
3,balder,31306
4,bauge,74


In [737]:
# Adding the future_investments_mill_nok to the investments_df
investments_merged_df = investments_df.merge(
    future_investments_df[["field", "future_investments_mill_nok"]],
    on=["field"],
    how="left",
)
investments_merged_df

Unnamed: 0,field,year,investments_mill_nok,field_id,dateSyncNPD,future_investments_mill_nok
0,albuskjell,1990,8,43437,22.05.2024,
1,albuskjell,1991,4,43437,22.05.2024,
2,albuskjell,1992,10,43437,22.05.2024,
3,albuskjell,1993,0,43437,22.05.2024,
4,albuskjell,1994,3,43437,22.05.2024,
...,...,...,...,...,...,...
2670,aasta hansteen,2019,763,23395946,22.05.2024,836.0
2671,aasta hansteen,2020,119,23395946,22.05.2024,836.0
2672,aasta hansteen,2021,119,23395946,22.05.2024,836.0
2673,aasta hansteen,2022,298,23395946,22.05.2024,836.0


In [738]:
# Setting all NaN in future_investments_mill_nok to 0

investments_merged_df["future_investments_mill_nok"] = investments_merged_df[
    "future_investments_mill_nok"
].fillna(0)

# Dropping dateSyncNPD and field_id column
investments_merged_df.drop(columns=["dateSyncNPD", "field_id"], inplace=True)

investments_merged_df

Unnamed: 0,field,year,investments_mill_nok,future_investments_mill_nok
0,albuskjell,1990,8,0.0
1,albuskjell,1991,4,0.0
2,albuskjell,1992,10,0.0
3,albuskjell,1993,0,0.0
4,albuskjell,1994,3,0.0
...,...,...,...,...
2670,aasta hansteen,2019,763,836.0
2671,aasta hansteen,2020,119,836.0
2672,aasta hansteen,2021,119,836.0
2673,aasta hansteen,2022,298,836.0


Merging investment data into latest merged_df


In [739]:
# Merging investments_merged into latest_merged

field_facility_well_investment_df = latest_merged.merge(
    investments_merged_df, on=["field", "year"], how="left"
)

# Removing all rows after 2023
field_facility_well_investment_df = field_facility_well_investment_df[
    field_facility_well_investment_df["year"] <= 2023
]
field_facility_well_investment_df[
    field_facility_well_investment_df["future_investments_mill_nok"].isna()
]

field_facility_well_investment_df.head()

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes,well_final_vertical_depth_mean,well_final_vertical_depth_std,well_water_depth_mean,well_water_depth_std,investments_mill_nok,future_investments_mill_nok
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,1e-05,23395946,0.0,0.012227,0.0,0.000139,0.012361,4e-06,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,4440.0,836.0
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,763.0,836.0
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,119.0,836.0
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,119.0,836.0
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.04938,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,298.0,836.0


### Licensees


In [740]:
licensees_df.head()

Unnamed: 0,fldName,fldOwnerName,fldOwnerKind,fldOwnerFrom,fldOwnerTo,fldLicenseeFrom,fldLicenseeTo,cmpLongName,fldCompanyShare,fldSdfiShare,fldNpdidField,cmpNpdidCompany,fldLicenseeDateUpdated,DatesyncNPD
0,ALBUSKJELL,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,TotalEnergies EP Norge AS,39.896,,43437,35000016,02.01.2024,22.05.2024
1,ALBUSKJELL,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,ConocoPhillips Skandinavia AS,35.112,,43437,2410696,02.01.2024,22.05.2024
2,ALBUSKJELL,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,Vår Energi ASA,12.388,,43437,50000032,02.01.2024,22.05.2024
3,ALBUSKJELL,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,Sval Energi AS,7.604,0.0,43437,36536246,02.01.2024,22.05.2024
4,ALBUSKJELL,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,Petoro AS,5.0,,43437,1626839,02.01.2024,22.05.2024


In [741]:
# Renaming columns: fldName to field, cmpLongName to old_company_name, fldCompanyShare to company_share

licensees_df = licensees_df.rename(
    columns={
        "fldName": "field",
        "cmpLongName": "old_company_name",
        "fldCompanyShare": "company_share",
        "fldNpdidField": "field_id",
        "cmpNpdidCompany": "company_id",
    }
)

In [742]:
# Making field lower case
licensees_df["field"] = licensees_df["field"].str.lower()
display(licensees_df.shape)

(9434, 14)

In [743]:
# Only keeping the fields that are in field_facility_well_investment_df 1997-2023

field_facility_well_investment_df_1997_2023 = field_facility_well_investment_df[
    (field_facility_well_investment_df["year"] >= 1997)
    & (field_facility_well_investment_df["year"] <= 2023)
]
field_licensees_df = licensees_df[
    licensees_df["field"].isin(field_facility_well_investment_df_1997_2023["field"])
]
display(field_licensees_df.head())
display(field_licensees_df.shape)
display(field_licensees_df.old_company_name.nunique())

Unnamed: 0,field,fldOwnerName,fldOwnerKind,fldOwnerFrom,fldOwnerTo,fldLicenseeFrom,fldLicenseeTo,old_company_name,company_share,fldSdfiShare,field_id,company_id,fldLicenseeDateUpdated,DatesyncNPD
0,albuskjell,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,TotalEnergies EP Norge AS,39.896,,43437,35000016,02.01.2024,22.05.2024
1,albuskjell,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,ConocoPhillips Skandinavia AS,35.112,,43437,2410696,02.01.2024,22.05.2024
2,albuskjell,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,Vår Energi ASA,12.388,,43437,50000032,02.01.2024,22.05.2024
3,albuskjell,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,Sval Energi AS,7.604,0.0,43437,36536246,02.01.2024,22.05.2024
4,albuskjell,18,PRODUCTION LICENSE,25.04.1975,,30.09.2022,,Petoro AS,5.0,,43437,1626839,02.01.2024,22.05.2024


(9218, 14)

250

In [744]:
# Converting fldOwnerFrom and fldLicenseeFrom to datetime

field_licensees_df["fldOwnerFrom"] = pd.to_datetime(
    field_licensees_df["fldOwnerFrom"], format="%d.%m.%Y"
)
field_licensees_df["fldLicenseeFrom"] = pd.to_datetime(
    field_licensees_df["fldLicenseeFrom"], format="%d.%m.%Y"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  field_licensees_df['fldOwnerFrom'] = pd.to_datetime(field_licensees_df['fldOwnerFrom'], format='%d.%m.%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  field_licensees_df['fldLicenseeFrom'] = pd.to_datetime(field_licensees_df['fldLicenseeFrom'], format='%d.%m.%Y')


In [745]:
# field_licensees_df between 1997-2023

field_licensees_df_1997_2023 = field_licensees_df[
    (field_licensees_df["fldLicenseeFrom"].dt.year >= 1997)
    & (field_licensees_df["fldLicenseeFrom"].dt.year <= 2023)
]
display(field_licensees_df_1997_2023.old_company_name.nunique())
display(field_licensees_df_1997_2023.shape)

216

(7842, 14)

In [746]:
# Checking all unique old_company_name

display(field_licensees_df_1997_2023.old_company_name.nunique())
display(field_licensees_df_1997_2023.old_company_name.unique())

# Creating a a copy of old_company_name and calling it new_company_name
field_licensees_df_1997_2023["new_company_name"] = field_licensees_df_1997_2023[
    "old_company_name"
]

216

array(['TotalEnergies EP Norge AS', 'ConocoPhillips Skandinavia AS',
       'Vår Energi ASA', 'Sval Energi AS', 'Petoro AS',
       'Equinor Energy AS', 'Vår Energi AS', 'Total E&P Norge AS',
       'Eni Norge AS', 'Statoil Petroleum AS',
       'StatoilHydro Petroleum AS', 'StatoilHydro ASA',
       'Norsk Hydro Petroleum AS', 'Statoil ASA (old)',
       'Norsk Hydro Produksjon AS', 'Norsk Agip AS',
       'TotalFinaElf Exploration Norge AS', 'Phillips Petroleum Norsk AS',
       'ConocoPhillips Norge', 'Phillips Petroleum Company Norway',
       'Total Norge AS', 'Statens Direkte Økonomiske Engasjement SDØE',
       'Den norske stats oljeselskap a.s', 'Saga Petroleum ASA',
       'Fina Production Licenses AS', 'Elf Petroleum Norge AS',
       'DNO Norge AS', 'PGNiG Upstream Norway AS', 'INEOS E&P Norge AS',
       'DNO North Sea (Norge) AS', 'Faroe Petroleum Norge AS',
       'DONG E&P Norge AS', 'Aker BP ASA', 'ABP Norway AS',
       'Lundin Energy Norway AS', 'Lundin Norway AS',
  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  field_licensees_df_1997_2023['new_company_name'] = field_licensees_df_1997_2023['old_company_name']


In [747]:
# Define the replacement operator
def replace_operator(name, name_contains, replacement):
    if name_contains.lower() in name.lower():
        return replacement
    return name

In [748]:
# Define the replacements as a list of tuples (substring, replacement)
name_replacements = [
    # BP
    ("det norske oljeselskap", "Aker BP"),
    ("det norske exploration as", "Aker BP"),
    ("aker", "Aker BP"),
    ("bp norge", "Aker BP"),
    ("Marathon", "Aker BP"),
    ("Pertra ASA", "Aker BP"),
    ("Pertra AS", "Aker BP"),
    ("Hess", "Aker BP"),
    ("ABP Norway AS", "Aker BP"),
    ("bp", "Aker BP"),
    ("lundin", "Aker BP"),
    # Equinor
    ("statoil", "Equinor ASA"),
    ("equinor", "Equinor ASA"),
    ("den norske stats oljeselskap", "Equinor ASA"),
    ("Saga Petroleum ASA", "Equinor ASA"),
    # CapeOmega
    ("capeomega", "CapeOmega AS"),
    ("Norwegian Energy Company ASA", "CapeOmega AS"),
    # Concedo
    ("concedo", "Concedo ASA"),
    # ConocoPhillips
    ("conoco", "ConocoPhillips"),
    ("phillips", "ConocoPhillips"),
    ("Conoco Phillips", "ConocoPhillips"),
    # Kuwait Petroleum Company
    ("AEDC", "Kuwait Petroleum Company"),
    ("KUFPEC", "Kuwait Petroleum Company"),
    # DNO
    ("dno", "DNO ASA"),
    ("faroe petroleum", "DNO ASA"),
    # Harbour Energy
    ("chrysaor", "Harbour Energy"),
    # Lime Petroleum
    ("lime", "Lime Petroleum"),
    # Neptune
    ("VNG", "Neptune Energy Norge AS"),
    ("ENGIE E&P Norge AS", "Neptune Energy Norge AS"),
    ("GDF SUEZ E&P Norge AS", "Neptune Energy Norge AS"),
    ("neptune", "Neptune Energy Norge AS"),
    # Okea
    ("okea", "OKEA ASA"),
    # Pandion Energy
    ("one-dyas", "Pandion Energy"),
    ("tullow", "Pandion Energy"),
    # Petoro
    ("statens direkte økonomiske engasjement sdøe", "Petoro AS"),
    # PGniG
    ("PGNiG", "PGNiG"),
    ("Pelican", "PGNiG"),
    ("dong", "PGNiG"),
    ("ineos", "PGNiG"),
    # Shell
    ("enterprise oil norwegian as", "Shell"),
    ("BG Norge AS", "Shell"),
    ("Enterprise Oil Norge AS", "Shell"),
    # Repsol
    ("repsol", "Repsol"),
    ("talisman", "Repsol"),
    ("Oryx (UK) Energy Company", "Repsol"),
    ("paladin", "Repsol"),
    # Sval Energi
    ("sval", "Sval Energi AS"),
    ("capricorn", "Sval Energi AS"),
    ("bayern", "Sval Energi AS"),
    ("spirit", "Sval Energi AS"),
    ("centrica", "Sval Energi AS"),
    ("pa resources", "Sval Energi AS"),
    ("suncor", "Sval Energi AS"),
    ("petro canada", "Sval Energi AS"),
    ("petro-canada", "Sval Energi AS"),
    # TotalEnergies EP Norge
    ("Total", "TotalEnergies EP Norge"),
    ("Totalfinaelf", "TotalEnergies EP Norge"),
    ("Fina Production Licenses AS", "TotalEnergies EP Norge"),
    ("Fina", "TotalEnergies EP Norge"),
    ("Elf Rex Norge AS", "TotalEnergies EP Norge"),
    ("Elf", "TotalEnergies EP Norge"),
    ("Kerr Mc-Gee North Sea (UK) Ltd", "TotalEnergies EP Norge"),
    ("Maersk Oil UK Limited", "TotalEnergies EP Norge"),
    # Vår Energi
    ("vår energi", "Vår Energi AS"),
    ("eni", "Vår Energi AS"),
    ("exxon", "Vår Energi AS"),
    ("Norsk Agip AS", "Vår Energi AS"),
    # Wintershall Dea
    ("Wintershall Norge AS", "Wintershall Dea Norge AS"),
    ("RWE Dea Norge AS", "Wintershall Dea Norge AS"),
    ("Norske RWE-DEA AS", "Wintershall Dea Norge AS"),
    ("E.ON", "Wintershall Dea Norge AS"),
    ("Dea E&P Norge AS", "Wintershall Dea Norge AS"),
    ("DEA Norge AS", "Wintershall Dea Norge AS"),
    # Misc
    ("hydro", "Norsk Hydro ASA"),
    ("Chevron", "Chevron"),
    ("pgs", "PGS"),
    ("Revus", "Revus Energy AS"),
    ("Harbour", "Harbour Energy"),
]

In [749]:
# Apply the replacements
for name_contains, replacement in name_replacements:
    field_licensees_df_1997_2023["new_company_name"] = field_licensees_df_1997_2023[
        "new_company_name"
    ].apply(lambda name: replace_operator(name, name_contains, replacement))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  field_licensees_df_1997_2023['new_company_name'] = field_licensees_df_1997_2023['new_company_name'].apply(


In [750]:
# Check the unique values of new_company_name

display(field_licensees_df_1997_2023.new_company_name.nunique())
display(field_licensees_df_1997_2023.new_company_name.unique())

81

array(['TotalEnergies EP Norge', 'ConocoPhillips', 'Vår Energi AS',
       'Sval Energi AS', 'Petoro AS', 'Equinor ASA', 'Norsk Hydro ASA',
       'DNO ASA', 'PGNiG', 'Aker BP',
       'LOTOS Exploration and Production Norge AS',
       'Kistos Energy (Norway) AS', 'Mime Petroleum AS',
       'Point Resources AS', 'Wintershall Dea Norge AS',
       'Neptune Energy Norge AS', 'Repsol',
       'Dana Petroleum (BVUK) Limited',
       'JX Nippon Exploration and Production (UK) Limited',
       'Roc Oil (GB) Limited',
       'Nippon Oil Exploration and Production UKl Limited',
       'Bow Valley Petroleum (UK) Limited',
       'Moc Exploration (U.K.) Limited', 'OKEA ASA', 'Lime Petroleum',
       'Petrolia NOCO AS', 'M Vest Energy AS', 'Core Energy AS',
       'Pandion Energy', 'Spring Energy Norway AS',
       'Altinex Oil Norway AS', 'Endeavour Energy Norge AS',
       'Revus Energy AS', 'Altinex Oil AS', 'OER Oil AS',
       'Esso Exploration and Production Norway A/S',
       'Fortum Pe

In [751]:
field_licensees_df_1997_2023.head()

Unnamed: 0,field,fldOwnerName,fldOwnerKind,fldOwnerFrom,fldOwnerTo,fldLicenseeFrom,fldLicenseeTo,old_company_name,company_share,fldSdfiShare,field_id,company_id,fldLicenseeDateUpdated,DatesyncNPD,new_company_name
0,albuskjell,18,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,TotalEnergies EP Norge AS,39.896,,43437,35000016,02.01.2024,22.05.2024,TotalEnergies EP Norge
1,albuskjell,18,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,ConocoPhillips Skandinavia AS,35.112,,43437,2410696,02.01.2024,22.05.2024,ConocoPhillips
2,albuskjell,18,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,Vår Energi ASA,12.388,,43437,50000032,02.01.2024,22.05.2024,Vår Energi AS
3,albuskjell,18,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,Sval Energi AS,7.604,0.0,43437,36536246,02.01.2024,22.05.2024,Sval Energi AS
4,albuskjell,18,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,Petoro AS,5.0,,43437,1626839,02.01.2024,22.05.2024,Petoro AS


In [752]:
# Group by year
field_licensees_grouped_old_names = (
    field_licensees_df_1997_2023.groupby(["field", "fldLicenseeFrom", "company_share"])[
        "old_company_name"
    ]
    .apply(list)
    .reset_index()
)
field_licensees_grouped_new_names = (
    field_licensees_df_1997_2023.groupby(["field", "fldLicenseeFrom", "company_share"])[
        "new_company_name"
    ]
    .apply(list)
    .reset_index()
)

display(field_licensees_grouped_old_names)
display(field_licensees_grouped_new_names)

Unnamed: 0,field,fldLicenseeFrom,company_share,old_company_name
0,aasta hansteen,2013-06-07,10.0,[ConocoPhillips Skandinavia AS]
1,aasta hansteen,2013-06-07,15.0,[OMV (Norge) AS]
2,aasta hansteen,2013-06-07,75.0,[Statoil Petroleum AS]
3,aasta hansteen,2014-12-01,10.0,[ConocoPhillips Skandinavia AS]
4,aasta hansteen,2014-12-01,15.0,[OMV (Norge) AS]
...,...,...,...,...
7457,øst frigg,2021-12-31,40.0,[Equinor Energy AS]
7458,øst frigg,2021-12-31,47.7,[Aker BP ASA]
7459,øst frigg,2023-05-02,12.3,[PGNiG Upstream Norway AS]
7460,øst frigg,2023-05-02,40.0,[Equinor Energy AS]


Unnamed: 0,field,fldLicenseeFrom,company_share,new_company_name
0,aasta hansteen,2013-06-07,10.0,[ConocoPhillips]
1,aasta hansteen,2013-06-07,15.0,[OMV (Norge) AS]
2,aasta hansteen,2013-06-07,75.0,[Equinor ASA]
3,aasta hansteen,2014-12-01,10.0,[ConocoPhillips]
4,aasta hansteen,2014-12-01,15.0,[OMV (Norge) AS]
...,...,...,...,...
7457,øst frigg,2021-12-31,40.0,[Equinor ASA]
7458,øst frigg,2021-12-31,47.7,[Aker BP]
7459,øst frigg,2023-05-02,12.3,[PGNiG]
7460,øst frigg,2023-05-02,40.0,[Equinor ASA]


In [753]:
# Get the latest licensee for each field
latest_licensee = field_licensees_grouped.groupby("field").last().reset_index()
latest_licensee

Unnamed: 0,field,fldLicenseeFrom,company_share,0
0,aasta hansteen,2019-11-13,51.000,"[old_company_name, new_company_name]"
1,albuskjell,2022-09-30,39.896,"[old_company_name, new_company_name]"
2,alve,2021-09-30,53.000,"[old_company_name, new_company_name]"
3,alvheim,2023-01-01,80.000,"[old_company_name, new_company_name]"
4,atla,2023-05-02,40.000,"[old_company_name, new_company_name]"
...,...,...,...,...
114,yme,2023-05-02,55.000,"[old_company_name, new_company_name]"
115,yttergryta,2022-01-28,45.750,"[old_company_name, new_company_name]"
116,åsgard,2023-01-01,35.010,"[old_company_name, new_company_name]"
117,ærfugl nord,2019-11-13,30.000,"[old_company_name, new_company_name]"


## Merging


Merging production and emissions data


In [754]:
display(emissions_df.head())
display(len(emissions_df.field.unique()))
display(emissions_df.describe())

Unnamed: 0,field,year,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3
0,grane,2022,180.655154,993246298,equinor energy as,36.071693,404.47072,16.327993,1896859.0
1,grane,2023,188.26798,993246298,equinor energy as,66.983955,306.38291,24.543974,2161262.82
2,norne,1997,761.336994,893246592,equinor energy as,383.058529,2944.992072,1.967516,343.0
3,norne,1998,418.303952,893246592,equinor energy as,826.358898,1792.114235,3.134315,68208.0
4,norne,1999,347.05988,893246592,equinor energy as,1042.015537,1412.344364,11.709334,280113.0


89

Unnamed: 0,year,yearly_co2_emissions_1000_tonnes,org_number,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3
count,1317.0,1317.0,1317.0,1317.0,1317.0,1136.0,1079.0
mean,2011.767654,213.257702,954290100.0,464.842584,887.828624,53.804176,4280611.0
std,7.316805,280.550893,49788270.0,1165.504078,1180.669094,172.118372,11334120.0
min,1997.0,0.0,812723500.0,0.0,0.0,2e-06,3.0
25%,2006.0,13.201286,912729800.0,0.2525,149.681292,0.072987,6574.24
50%,2012.0,123.824173,993246300.0,116.289996,437.424082,4.541435,331847.5
75%,2018.0,270.683473,993246800.0,362.858337,1125.734511,37.739572,2367200.0
max,2023.0,1860.344868,997004800.0,11427.196781,8145.884915,4137.197766,79072760.0


In [755]:
# Merging field_facility_well_investment_df with emissions_df

field_emissions_df = field_facility_well_investment_df.merge(
    emissions_df, on=["field", "year"], how="left"
)
field_emissions_df.head()

fields_prod_emissions_1997_2023_df = field_emissions_df[
    (field_emissions_df["year"] >= 1997) & (field_emissions_df["year"] <= 2023)
]

In [756]:
fields_prod_emissions_1997_2023_df.head()

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes,well_final_vertical_depth_mean,well_final_vertical_depth_std,well_water_depth_mean,well_water_depth_std,investments_mill_nok,future_investments_mill_nok,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3
0,aasta hansteen,2018,0.0,0.02734,0.0,0.00031,0.02764,1e-05,23395946,0.0,0.012227,0.0,0.000139,0.012361,4e-06,Approved for production,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,4440.0,836.0,72.503572,912731456.0,equinor energy as,67.148045,988.44101,0.183095,8498.23606
1,aasta hansteen,2019,0.0,6.75362,0.0,0.16691,6.92054,0.01109,23395946,0.0,0.218831,0.0,0.005535,0.224157,0.000811,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,763.0,836.0,177.647453,912731456.0,equinor energy as,93.355221,202.908995,0.771865,19591.5
2,aasta hansteen,2020,0.0,8.74941,0.0,0.19168,8.94108,0.02058,23395946,0.0,0.078717,0.0,0.001442,0.079973,0.000354,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,119.0,836.0,192.207951,912731456.0,equinor energy as,94.063341,147.469687,1.465783,30992.76825
3,aasta hansteen,2021,0.0,8.91959,0.0,0.17228,9.09186,0.01942,23395946,0.0,0.074974,0.0,0.002298,0.076771,0.000203,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,119.0,836.0,191.436769,912731456.0,equinor energy as,103.162276,138.530349,0.397254,24960.228401
4,aasta hansteen,2022,0.0,8.88961,0.0,0.18506,9.07466,0.02119,23395946,0.0,0.04938,0.0,0.001643,0.049901,0.000179,Producing,Producing,22556.0,aasta hansteen,True,25.833333,2.041241,1282.666667,32.407818,0.0,0.0,5.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,8.0,2601.5,88.388348,1286.5,34.648232,298.0,836.0,192.895469,912731456.0,equinor energy as,34.041866,206.022062,0.42155,25951.940863


In [757]:
field_overview_df

Unnamed: 0,fldName,cmpLongName,fldCurrentActivitySatus,wlbName,wlbCompletionDate,fldMainArea,fldOwnerKind,fldOwnerName,fldMainSupplyBase,fldHcType,field_owner,field_id,wlbNpdidWellbore,cmpNpdidCompany,fldFactPageUrl,fldFactMapUrl,fldDateUpdated,fldDateUpdatedMax,DatesyncNPD,current_status
0,albuskjell,ConocoPhillips Skandinavia AS,Shut down,1/6-1,26.11.1972,North sea,PRODUCTION LICENSE,018,,GAS/CONDENSATE,20900.0,43437,239,2410696.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Shut down
1,alve,Equinor Energy AS,Producing,6507/3-1,26.10.1990,Norwegian sea,PRODUCTION LICENSE,159 B,Sandnessjøen,GAS/CONDENSATE,2819945.0,4444332,1533,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Producing
2,alve nord,Aker BP ASA,Approved for production,6607/12-2 S,25.10.2011,Norwegian sea,PRODUCTION LICENSE,127 C,,OIL/GAS,29427330.0,42002483,6642,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Approved for production
3,alvheim,Aker BP ASA,Producing,24/6-2,08.07.1998,North sea,PRODUCTION LICENSE,203,,OIL/GAS,22436.0,2845712,3397,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,02.01.2024,29.05.2024,Producing
4,atla,TotalEnergies EP Norge AS,Shut down,25/5-7,23.10.2010,North sea,PRODUCTION LICENSE,102 C,,GAS/CONDENSATE,5467035.0,21106284,6423,35000016.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Shut down
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,ærfugl nord,Aker BP ASA,Producing,6507/3-9 S,15.07.2012,Norwegian sea,PRODUCTION LICENSE,212 E,Sandnessjøen,GAS/CONDENSATE,4966902.0,38542241,6951,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,20.04.2024,29.05.2024,Producing
136,ørn,Aker BP ASA,Approved for production,6507/2-5 S,14.09.2019,Norwegian sea,PRODUCTION LICENSE,942,,GAS,30676176.0,42002484,8775,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,02.01.2024,29.05.2024,Approved for production
137,øst frigg,Aker BP ASA,Shut down,25/2-1,21.09.1973,North sea,PRODUCTION LICENSE,873,,GAS,28973585.0,43576,353,28544099.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,02.01.2024,01.03.2024,29.05.2024,Shut down
138,åsgard,Equinor Energy AS,Producing,6507/11-1,10.12.1981,Norwegian sea,BUSINESS ARRANGEMENT AREA,ÅSGARD UNIT,Kristiansund,GAS/CONDENSATE,40564.0,43765,68,32011216.0,https://factpages.sodir.no/factpages/default.a...,https://factmaps.sodir.no/factmaps/3_0/?run=Fi...,03.04.2024,01.03.2024,29.05.2024,Producing


Merging operator data


In [758]:
latest_licensee

Unnamed: 0,field,fldLicenseeFrom,company_share,0
0,aasta hansteen,2019-11-13,51.000,"[old_company_name, new_company_name]"
1,albuskjell,2022-09-30,39.896,"[old_company_name, new_company_name]"
2,alve,2021-09-30,53.000,"[old_company_name, new_company_name]"
3,alvheim,2023-01-01,80.000,"[old_company_name, new_company_name]"
4,atla,2023-05-02,40.000,"[old_company_name, new_company_name]"
...,...,...,...,...
114,yme,2023-05-02,55.000,"[old_company_name, new_company_name]"
115,yttergryta,2022-01-28,45.750,"[old_company_name, new_company_name]"
116,åsgard,2023-01-01,35.010,"[old_company_name, new_company_name]"
117,ærfugl nord,2019-11-13,30.000,"[old_company_name, new_company_name]"


In [759]:
field_licensees_df_1997_2023

Unnamed: 0,field,fldOwnerName,fldOwnerKind,fldOwnerFrom,fldOwnerTo,fldLicenseeFrom,fldLicenseeTo,old_company_name,company_share,fldSdfiShare,field_id,company_id,fldLicenseeDateUpdated,DatesyncNPD,new_company_name
0,albuskjell,018,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,TotalEnergies EP Norge AS,39.896,,43437,35000016,02.01.2024,22.05.2024,TotalEnergies EP Norge
1,albuskjell,018,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,ConocoPhillips Skandinavia AS,35.112,,43437,2410696,02.01.2024,22.05.2024,ConocoPhillips
2,albuskjell,018,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,Vår Energi ASA,12.388,,43437,50000032,02.01.2024,22.05.2024,Vår Energi AS
3,albuskjell,018,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,Sval Energi AS,7.604,0.0,43437,36536246,02.01.2024,22.05.2024,Sval Energi AS
4,albuskjell,018,PRODUCTION LICENSE,1975-04-25,,2022-09-30,,Petoro AS,5.000,,43437,1626839,02.01.2024,22.05.2024,Petoro AS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9429,aasta hansteen,218,PRODUCTION LICENSE,2013-06-07,,2014-12-01,15.05.2018,OMV (Norge) AS,15.000,,23395946,4460594,02.01.2024,22.05.2024,OMV (Norge) AS
9430,aasta hansteen,218,PRODUCTION LICENSE,2013-06-07,,2014-12-01,15.05.2018,ConocoPhillips Skandinavia AS,10.000,,23395946,2410696,02.01.2024,22.05.2024,ConocoPhillips
9431,aasta hansteen,218,PRODUCTION LICENSE,2013-06-07,,2013-06-07,30.11.2014,Statoil Petroleum AS,75.000,,23395946,17237817,02.01.2024,22.05.2024,Equinor ASA
9432,aasta hansteen,218,PRODUCTION LICENSE,2013-06-07,,2013-06-07,30.11.2014,OMV (Norge) AS,15.000,,23395946,4460594,02.01.2024,22.05.2024,OMV (Norge) AS


In [760]:
from collections import defaultdict

# Convert to DataFrame
ownership_df = field_licensees_df_1997_2023.copy()
big_df = fields_prod_emissions_1997_2023_df.copy()

# Ensure date columns are in datetime format
ownership_df["fldLicenseeFrom"] = pd.to_datetime(ownership_df["fldLicenseeFrom"])
big_df["date"] = pd.to_datetime(big_df["year"].astype(str) + "-01-01")

# Create a dictionary to hold ownership information
ownership_dict = defaultdict(list)

for idx, row in ownership_df.iterrows():
    ownership_dict[(row["field"], row["fldLicenseeFrom"])].append(
        (row["old_company_name"], row["company_share"])
    )

# Create a new dataframe with the aggregated ownership information
aggregated_ownership = []

for (field, date), owners in ownership_dict.items():
    aggregated_ownership.append(
        {
            "field": field,
            "date": date,
            "ownership_original": {owner: share for owner, share in owners},
        }
    )

aggregated_ownership_df = pd.DataFrame(aggregated_ownership)

# Merge the aggregated ownership information with the main dataframe
merged_dict_df = pd.merge_asof(
    big_df.sort_values("date"),
    aggregated_ownership_df.sort_values("date"),
    by="field",
    left_on="date",
    right_on="date",
    direction="nearest",
)

# Drop the extra 'date' column used for merging
merged_dict_df.drop(columns=["date"], inplace=True)

# Display the merged dataframe
merged_dict_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes,well_final_vertical_depth_mean,well_final_vertical_depth_std,well_water_depth_mean,well_water_depth_std,investments_mill_nok,future_investments_mill_nok,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3,ownership_original
0,statfjord nord,1997,3.93531,0.17288,0.09230,0.00000,4.20051,0.00000,43679,0.053898,0.000405,0.000334,0.000000,0.054005,0.000000,Producing,Producing,21084.0,statfjord,True,27.500000,15.000000,241.750000,65.030121,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,6.0,9.0,0.0,4.0,0.0,13.0,0.0,17.0,2888.800000,0.000000,285.000000,0.000000,255.0,2946.0,,,,,,,,"{'Den norske stats oljeselskap a.s': 50.0, 'Mo..."
1,øst frigg,1997,0.00000,0.09648,0.00000,0.00009,0.09658,0.00000,43576,0.000000,0.005968,0.000000,0.000006,0.005970,0.000000,Producing,Shut down,28973585.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2893.000000,14.142136,368.000000,0.000000,0.0,0.0,,,,,,,,"{'TotalFinaElf Exploration Norge AS': 37.2254,..."
2,oseberg,1997,27.26827,0.00000,0.00000,0.00000,27.26827,0.00000,43625,0.212682,0.000000,0.000000,0.000000,0.212682,0.000000,Producing,Producing,3500071.0,oseberg,True,20.625000,6.232117,106.444444,3.045944,0.0,0.0,3.0,2.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,19.0,10.0,0.0,11.0,331.0,10.0,0.0,66.0,113.0,213.0,373.0,19.0,2676.114286,74.724437,108.857143,0.377964,1489.0,26912.0,808.695820,993246603.0,equinor energy as,933.013065,3606.140000,74.756825,2.591747e+06,"{'Den norske stats oljeselskap a.s': 64.78379,..."
3,troll,1997,14.76705,14.31718,0.00000,0.00000,29.08419,0.00000,46437,0.113838,0.497360,0.000000,0.000000,0.546631,0.000000,Producing,Producing,41105.0,troll,True,27.042857,5.797140,329.230769,9.896770,0.0,0.0,82.0,5.0,0.0,23.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,26.0,95.0,58.0,0.0,4.0,45.0,135.0,65.0,119.0,1585.742424,111.623225,315.024242,14.539494,3747.0,26235.0,239.330258,912732401.0,equinor energy as,1722.809231,1466.556400,187.452010,7.022956e+06,"{'Den norske stats oljeselskap a.s': 74.7431, ..."
4,gyda,1997,2.50518,0.58070,0.31003,0.00000,3.39588,0.00000,43492,0.018114,0.005547,0.002083,0.000000,0.023784,0.000000,Producing,Shut down,,gyda,True,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,132.0,0.0,0.0,61.0,0.0,75.0,136.0,0.0,4126.550000,5.020458,66.000000,0.000000,191.0,0.0,96.382270,993258253.0,repsol norge as,277.710030,380.180000,17.686231,8.166150e+05,"{'BP Petroleum Dev. of Norway AS': 46.625, 'De..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1798,goliat,2023,1.49160,0.00000,0.00000,0.00000,1.49160,1.69684,5774394,0.022000,0.000000,0.000000,0.000000,0.022000,0.023739,Producing,Producing,22644.0,goliat,True,21.111111,3.333333,326.500000,117.391321,0.0,0.0,8.0,2.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0,0.0,35.0,0.0,0.0,0.0,17.0,90.0,0.0,66.0,11.0,111.0,0.0,188.0,1473.333333,0.000000,350.000000,0.000000,0.0,2904.0,19.036410,812726242.0,vår energi asa,42.932918,72.508489,0.004420,4.095000e+02,"{'Vår Energi ASA': 65.0, 'Equinor Energy AS': ..."
1799,skogul,2023,0.17127,0.00930,0.00000,0.00000,0.18056,0.47065,31164600,0.004611,0.000288,0.000000,0.000000,0.004895,0.013090,Producing,Producing,4910195.0,alvheim,False,20.000000,0.000000,115.000000,7.071068,0.0,0.0,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2986.000000,0.000000,325.000000,0.000000,0.0,19.0,,,,,,,,"{'Aker BP ASA': 65.0, 'PGNiG Upstream Norway A..."
1800,vigdis,2023,1.04500,0.00000,0.00000,0.00000,1.04500,3.36423,43732,0.009673,0.000000,0.000000,0.000000,0.009673,0.050919,Producing,Producing,21516.0,snorre,True,19.000000,5.477226,277.090909,22.416309,0.0,0.0,41.0,0.0,7.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,0.0,107.0,0.0,0.0,0.0,321.0,92.0,0.0,158.0,99.0,352.0,0.0,609.0,2523.000000,0.000000,283.000000,0.000000,0.0,5921.0,7.827598,993246751.0,equinor energy as,0.250000,106.185733,0.024241,2.360600e+03,"{'Equinor Energy AS': 41.5, 'Petoro AS': 30.0,..."
1801,sleipner øst,2023,0.01610,0.08822,0.00528,0.00000,0.10958,0.01414,43478,0.000484,0.001789,0.000130,0.000000,0.002140,0.000250,Producing,Producing,41211.0,sleipner øst,True,38.000000,16.431677,83.600000,0.894427,0.0,0.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,152.0,0.0,31.0,0.0,0.0,24.0,40.0,18.0,0.0,69.0,2.0,194.0,221.0,44.0,2619.000000,321.026479,82.000000,0.000000,0.0,1423.0,453.372912,993246646.0,equinor energy as,75.722882,1729.250691,0.371457,4.873243e+04,"{'Equinor Energy AS': 59.6, 'Vår Energi ASA': ..."


In [761]:
# Ensure date columns are in datetime format
ownership_df["fldLicenseeFrom"] = pd.to_datetime(ownership_df["fldLicenseeFrom"])
merged_dict_df["date"] = pd.to_datetime(merged_dict_df["year"].astype(str) + "-01-01")

# Create a dictionary to hold ownership information
ownership_dict = defaultdict(list)

for idx, row in ownership_df.iterrows():
    ownership_dict[(row["field"], row["fldLicenseeFrom"])].append(
        (row["new_company_name"], row["company_share"])
    )

# Create a new dataframe with the aggregated ownership information
aggregated_ownership = []

for (field, date), owners in ownership_dict.items():
    aggregated_ownership.append(
        {
            "field": field,
            "date": date,
            "ownership_new_name": {owner: share for owner, share in owners},
        }
    )

aggregated_ownership_df = pd.DataFrame(aggregated_ownership)

# Merge the aggregated ownership information with the main dataframe
merged_dict_df = pd.merge_asof(
    merged_dict_df.sort_values("date"),
    aggregated_ownership_df.sort_values("date"),
    by="field",
    left_on="date",
    right_on="date",
    direction="nearest",
)

# Drop the extra 'date' column used for merging
merged_dict_df.drop(columns=["date"], inplace=True)

# Display the merged dataframe
merged_dict_df

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes,well_final_vertical_depth_mean,well_final_vertical_depth_std,well_water_depth_mean,well_water_depth_std,investments_mill_nok,future_investments_mill_nok,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3,ownership_original,ownership_new_name
0,statfjord nord,1997,3.93531,0.17288,0.09230,0.00000,4.20051,0.00000,43679,0.053898,0.000405,0.000334,0.000000,0.054005,0.000000,Producing,Producing,21084.0,statfjord,True,27.5,15.0,241.75,65.030121,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,6.0,9.0,0.0,4.0,0.0,13.0,0.0,17.0,2888.8,0.000000,285.000000,0.000000,255.0,2946.0,,,,,,,,"{'Den norske stats oljeselskap a.s': 50.0, 'Mo...","{'Equinor ASA': 1.875, 'Mobil Development Norw..."
1,veslefrikk,1997,3.47468,0.13919,0.08596,0.00000,3.69981,0.00000,43618,0.052851,0.008952,0.005461,0.000000,0.053418,0.000000,Producing,Shut down,21212.0,veslefrikk,True,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,111.0,0.0,0.0,69.0,6.0,36.0,111.0,0.0,3317.0,0.000000,175.000000,0.000000,229.0,0.0,156.204920,993246905.0,equinor energy as,64.319881,973.466100,88.599389,2.782760e+06,"{'Den norske stats oljeselskap a.s': 55.0, 'To...","{'Equinor ASA': 55.0, 'TotalEnergies EP Norge'..."
2,frøy,1997,1.39602,0.28878,0.00000,0.01732,1.70211,0.00000,43597,0.026260,0.005287,0.000000,0.000942,0.030860,0.000000,Producing,Shut down,3810636.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,6.0,0.0,6.0,12.0,0.0,3352.2,70.481913,120.000000,0.000000,0.0,0.0,,,,,,,,"{'Den norske stats oljeselskap a.s': 53.96, 'T...","{'Equinor ASA': 53.96, 'TotalEnergies EP Norge..."
3,hod,1997,0.46766,0.09469,0.02645,0.00000,0.58879,0.00000,43485,0.003632,0.001044,0.000238,0.000000,0.004729,0.000000,Producing,Producing,21052.0,valhall,True,30.0,0.0,72.00,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,6.0,30.0,36.0,0.0,2917.5,159.099026,72.000000,0.000000,3.0,119.0,,,,,,,,"{'Enterprise Oil Norwegian AS': 25.0, 'Elf Pet...","{'Shell': 25.0, 'TotalEnergies EP Norge': 25.0..."
4,albuskjell,1997,0.04615,0.19171,0.02005,0.00000,0.25789,0.00000,43437,0.000487,0.001819,0.000225,0.000000,0.002297,0.000000,Producing,Shut down,20900.0,albuskjell,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2601.5,88.388348,1286.500000,34.648232,0.0,0.0,,,,,,,,"{'Phillips Petroleum Company Norway': 36.96, '...","{'ConocoPhillips': 36.96, 'TotalEnergies EP No..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1798,visund sør,2023,0.05782,0.11078,0.02729,0.00000,0.19589,0.00596,20461008,0.008405,0.019391,0.003919,0.000000,0.028926,0.001467,Producing,Producing,40726.0,gullfaks,False,25.0,0.0,292.00,0.000000,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,19.0,3.0,0.0,0.0,11.0,14.0,0.0,25.0,2882.0,0.000000,291.000000,0.000000,0.0,600.0,,,,,,,,"{'Equinor Energy AS': 53.2, 'Petoro AS': 30.0,...","{'Equinor ASA': 53.2, 'Petoro AS': 30.0, 'Cono..."
1799,skuld,2023,0.02175,0.00000,0.00000,0.00000,0.02175,0.10527,21350124,0.002882,0.000000,0.000000,0.000000,0.002882,0.013971,Producing,Producing,21836.0,norne,True,20.0,0.0,353.00,8.660254,0.0,0.0,5.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,20.0,15.0,33.0,0.0,68.0,2941.0,0.000000,358.000000,0.000000,0.0,310.0,0.647334,893246592.0,equinor energy as,0.000000,7.764015,,,"{'Equinor Energy AS': 63.95455, 'Petoro AS': 2...","{'Equinor ASA': 63.95455, 'Petoro AS': 24.5454..."
1800,byrding,2023,0.01183,0.00000,0.00000,0.00000,0.01183,0.00000,28975067,0.000421,0.000000,0.000000,0.000000,0.000421,0.000000,Producing,Producing,3426781.0,troll,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,3120.5,6.363961,361.000000,0.000000,0.0,16.0,,,,,,,,"{'Equinor Energy AS': 70.0, 'Neptune Energy No...","{'Equinor ASA': 70.0, 'Neptune Energy Norge AS..."
1801,hyme,2023,0.30909,0.05169,0.03130,0.00000,0.39208,0.29335,20474183,0.018244,0.003477,0.002052,0.000000,0.023324,0.022483,Producing,Producing,2987047.0,njord,False,25.0,0.0,256.00,0.000000,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3910.0,14.583095,125.000000,0.000000,0.0,23.0,,,,,,,,"{'Equinor Energy AS': 42.5, 'Wintershall Dea N...","{'Equinor ASA': 42.5, 'Wintershall Dea Norge A..."


### Field reserves


In [762]:
# Rename fldName to field, fldDateOffResEstDisplay to dt year showing year

field_reserves_original_df = field_reserves_df.copy()
field_reserves_df = field_reserves_df.rename(
    columns={"fldName": "field", "fldDateOffResEstDisplay": "dt_year"}
)

# Making fields lower case
field_reserves_df["field"] = field_reserves_df["field"].str.lower()

# Rename fldRecoverableOil, fldRecoverableGas, fldRecoverableNGL, fldRecoverableCondensate, fldRecoverableOE to original_recoverable_oil, original_recoverable_gas, original_recoverable_ngl, original_recoverable_condensate, original_recoverable_oe

field_reserves_df = field_reserves_df.rename(
    columns={
        "fldRecoverableOil": "original_recoverable_oil",
        "fldRecoverableGas": "original_recoverable_gas",
        "fldRecoverableNGL": "original_recoverable_ngl",
        "fldRecoverableCondensate": "original_recoverable_condensate",
        "fldRecoverableOE": "original_recoverable_oe",
    }
)

field_reserves_df.head()

Unnamed: 0,field,original_recoverable_oil,original_recoverable_gas,original_recoverable_ngl,original_recoverable_condensate,original_recoverable_oe,fldRemainingOil,fldRemainingGas,fldRemainingNGL,fldRemainingCondensate,fldRemainingOE,dt_year,fldNpdidField,DatesyncNPD
0,albuskjell,7.35,15.53,0.99,0.0,24.76,0.0,0.0,0.0,0.0,0.0,31.12.2023,43437,07.06.2024
1,alve,2.62,11.99,1.84,0.0,18.11,0.33,3.0,0.41,0.0,4.11,31.12.2023,4444332,07.06.2024
2,alve nord,1.55,4.15,0.59,0.0,6.82,1.55,4.15,0.59,0.0,6.82,31.12.2023,42002483,07.06.2024
3,alvheim,62.78,17.19,0.0,0.0,79.97,11.5,8.85,0.0,0.0,20.35,31.12.2023,2845712,07.06.2024
4,atla,0.28,1.48,0.0,0.0,1.76,0.0,0.0,0.0,0.0,0.0,31.12.2023,21106284,07.06.2024


In [763]:
# Convert dy_year to show only year

field_reserves_df["dt_year"] = pd.to_datetime(field_reserves_df["dt_year"]).dt.year

# Rename fldRemainingOil	fldRemainingGas	fldRemainingNGL	fldRemainingCondensate to remaining_recoverable_oil	remaining_recoverable_gas	remaining_recoverable_ngl	remaining_recoverable_condensate

field_reserves_df = field_reserves_df.rename(
    columns={
        "fldRemainingOil": "current_remaining_recoverable_oil",
        "fldRemainingGas": "current_remaining_recoverable_gas",
        "fldRemainingNGL": "current_remaining_recoverable_ngl",
        "fldRemainingCondensate": "current_remaining_recoverable_condensate",
        "fldRemainingOE": "current_remaining_recoverable_oe",
    }
)
field_reserves_df.head()

  cache_array = _maybe_cache(arg, format, cache, convert_listlike)


Unnamed: 0,field,original_recoverable_oil,original_recoverable_gas,original_recoverable_ngl,original_recoverable_condensate,original_recoverable_oe,current_remaining_recoverable_oil,current_remaining_recoverable_gas,current_remaining_recoverable_ngl,current_remaining_recoverable_condensate,current_remaining_recoverable_oe,dt_year,fldNpdidField,DatesyncNPD
0,albuskjell,7.35,15.53,0.99,0.0,24.76,0.0,0.0,0.0,0.0,0.0,2023,43437,07.06.2024
1,alve,2.62,11.99,1.84,0.0,18.11,0.33,3.0,0.41,0.0,4.11,2023,4444332,07.06.2024
2,alve nord,1.55,4.15,0.59,0.0,6.82,1.55,4.15,0.59,0.0,6.82,2023,42002483,07.06.2024
3,alvheim,62.78,17.19,0.0,0.0,79.97,11.5,8.85,0.0,0.0,20.35,2023,2845712,07.06.2024
4,atla,0.28,1.48,0.0,0.0,1.76,0.0,0.0,0.0,0.0,0.0,2023,21106284,07.06.2024


In [764]:
# rename fldRemainingOE to remaining_recoverable_oe

# Columns containing 'current' and 'original'
current_columns = [col for col in field_reserves_df.columns if "current" in col]
original_columns = [col for col in field_reserves_df.columns if "original" in col]


field_original_current_reserves_df = field_reserves_df[
    ["field"] + current_columns + original_columns
].copy()
field_original_current_reserves_df.head()

Unnamed: 0,field,current_remaining_recoverable_oil,current_remaining_recoverable_gas,current_remaining_recoverable_ngl,current_remaining_recoverable_condensate,current_remaining_recoverable_oe,original_recoverable_oil,original_recoverable_gas,original_recoverable_ngl,original_recoverable_condensate,original_recoverable_oe
0,albuskjell,0.0,0.0,0.0,0.0,0.0,7.35,15.53,0.99,0.0,24.76
1,alve,0.33,3.0,0.41,0.0,4.11,2.62,11.99,1.84,0.0,18.11
2,alve nord,1.55,4.15,0.59,0.0,6.82,1.55,4.15,0.59,0.0,6.82
3,alvheim,11.5,8.85,0.0,0.0,20.35,62.78,17.19,0.0,0.0,79.97
4,atla,0.0,0.0,0.0,0.0,0.0,0.28,1.48,0.0,0.0,1.76


In [771]:
# Merging field_original_reserves_df with fields_prod_emissions_1997_2023_df

final_merged_df = merged_dict_df.merge(
    field_original_current_reserves_df, on=["field"], how="left"
)
final_merged_df.head()

Unnamed: 0,field,year,net_oil_prod_yearly_mill_sm3,net_gas_prod_yearly_bill_sm3,net_ngl_prod_yearly_mill_sm3,net_condensate_prod_yearly_mill_sm3,net_oil_eq_prod_yearly_mill_sm3,produced_water_yearly_mill_sm3,field_id,net_oil_prod_monthly_sm3_volatility,net_gas_prod_monthly_sm3_volatility,net_ngl_prod_monthly_sm3_volatility,net_condensate_prod_monthly_sm3_volatility,net_oil_eq_prod_monthly_sm3_volatility,produced_water_in_field_volatility,status,current_status,field_owner,processing_field,field_in_emissions,facilities_lifetime_mean,facilities_lifetime_std,facilities_water_depth_mean,facilities_water_depth_std,subsea_facilites_shut_down,surface_facilites_shut_down,subsea_facilites_in_service,surface_facilites_in_service,facility_kind_multi well template,facility_kind_single well template,facility_kind_offshore wind turbine,facility_kind_subsea structure,facility_kind_fpso,facility_kind_jacket 8 legs,facility_kind_condeep monoshaft,facility_kind_loading system,facility_kind_jacket 4 legs,facility_kind_jacket tripod,facility_kind_fsu,facility_kind_semisub steel,facility_kind_condeep 4 shafts,facility_kind_landfall,facility_kind_tlp concrete,facility_kind_jack-up 3 legs,facility_kind_jacket 6 legs,facility_kind_tlp steel,facility_kind_semisub concrete,facility_kind_mopustor,facility_kind_spar,well_status_closed,well_status_drilling,well_status_injecting,well_status_junked,well_status_online/operational,well_status_p&a,well_status_plugged,well_status_producing,well_status_suspended,well_purpose_injection,well_purpose_observation,well_purpose_production,well_subsea_no,well_subsea_yes,well_final_vertical_depth_mean,well_final_vertical_depth_std,well_water_depth_mean,well_water_depth_std,investments_mill_nok,future_investments_mill_nok,yearly_co2_emissions_1000_tonnes,org_number,operator,yearly_ch4_emissions_tons,yearly_nox_emissions_tons,yearly_oil_spill_emissions_tons,yearly_water_emissions_m3,ownership_original,ownership_new_name,current_remaining_recoverable_oil,current_remaining_recoverable_gas,current_remaining_recoverable_ngl,current_remaining_recoverable_condensate,current_remaining_recoverable_oe,original_recoverable_oil,original_recoverable_gas,original_recoverable_ngl,original_recoverable_condensate,original_recoverable_oe
0,statfjord nord,1997,3.93531,0.17288,0.0923,0.0,4.20051,0.0,43679,0.053898,0.000405,0.000334,0.0,0.054005,0.0,Producing,Producing,21084.0,statfjord,True,27.5,15.0,241.75,65.030121,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,6.0,9.0,0.0,4.0,0.0,13.0,0.0,17.0,2888.8,0.0,285.0,0.0,255.0,2946.0,,,,,,,,"{'Den norske stats oljeselskap a.s': 50.0, 'Mo...","{'Equinor ASA': 1.875, 'Mobil Development Norw...",3.72,0.23,0.05,0.0,4.05,44.12,2.37,1.12,0.0,48.62
1,veslefrikk,1997,3.47468,0.13919,0.08596,0.0,3.69981,0.0,43618,0.052851,0.008952,0.005461,0.0,0.053418,0.0,Producing,Shut down,21212.0,veslefrikk,True,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,111.0,0.0,0.0,69.0,6.0,36.0,111.0,0.0,3317.0,0.0,175.0,0.0,229.0,0.0,156.20492,993246905.0,equinor energy as,64.319881,973.4661,88.599389,2782760.0,"{'Den norske stats oljeselskap a.s': 55.0, 'To...","{'Equinor ASA': 55.0, 'TotalEnergies EP Norge'...",0.0,0.0,0.0,0.0,0.0,55.34,4.19,1.81,0.0,62.97
2,frøy,1997,1.39602,0.28878,0.0,0.01732,1.70211,0.0,43597,0.02626,0.005287,0.0,0.000942,0.03086,0.0,Producing,Shut down,3810636.0,frigg,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,6.0,0.0,6.0,12.0,0.0,3352.2,70.481913,120.0,0.0,0.0,0.0,,,,,,,,"{'Den norske stats oljeselskap a.s': 53.96, 'T...","{'Equinor ASA': 53.96, 'TotalEnergies EP Norge...",0.0,0.0,0.0,0.0,0.0,5.55,1.61,0.0,0.11,7.27
3,hod,1997,0.46766,0.09469,0.02645,0.0,0.58879,0.0,43485,0.003632,0.001044,0.000238,0.0,0.004729,0.0,Producing,Producing,21052.0,valhall,True,30.0,0.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,6.0,30.0,36.0,0.0,2917.5,159.099026,72.0,0.0,3.0,119.0,,,,,,,,"{'Enterprise Oil Norwegian AS': 25.0, 'Elf Pet...","{'Shell': 25.0, 'TotalEnergies EP Norge': 25.0...",4.75,0.82,0.12,0.0,5.8,15.52,2.64,0.62,0.0,19.34
4,albuskjell,1997,0.04615,0.19171,0.02005,0.0,0.25789,0.0,43437,0.000487,0.001819,0.000225,0.0,0.002297,0.0,Producing,Shut down,20900.0,albuskjell,False,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2601.5,88.388348,1286.5,34.648232,0.0,0.0,,,,,,,,"{'Phillips Petroleum Company Norway': 36.96, '...","{'ConocoPhillips': 36.96, 'TotalEnergies EP No...",0.0,0.0,0.0,0.0,0.0,7.35,15.53,0.99,0.0,24.76


## Output


Exporting the final merged data to the `/data/output/emissions_and_production/` folder.


In [772]:
# Checking if file ../../data/output/emissions_and_production/cleaned/fields_prod_emissions_1997_2023.csv exists

final_output = "../../data/output/emissions_and_production/cleaned/fields_prod_emissions_1997_2023.csv"

if not os.path.exists(final_output):
    final_merged_df.to_csv(final_output, index=False)
    print("Saved file")
else:
    print("File already exists")

Saved file


In [767]:
# Inserting watermark of environment and package versions used

%load_ext watermark

%watermark -a "Per Christian Wessel" -d -u -v -m -p pandas,numpy,scipy,matplotlib

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Author: Per Christian Wessel

Last updated: 2024-06-07

Python implementation: CPython
Python version       : 3.9.13
IPython version      : 8.12.0

pandas    : 1.4.3
numpy     : 1.23.5
scipy     : 1.10.0
matplotlib: 3.7.1

Compiler    : Clang 13.0.1 
OS          : Darwin
Release     : 23.4.0
Machine     : x86_64
Processor   : i386
CPU cores   : 8
Architecture: 64bit



Last updated: 2024-06-02

Python implementation: CPython
Python version : 3.9.13
IPython version : 8.12.0

pandas : 1.4.3
numpy : 1.23.5
scipy : 1.10.0
matplotlib: 3.7.1

Compiler : Clang 13.0.1
OS : Darwin
Release : 23.4.0
Machine : x86_64
Processor : i386
CPU cores : 8
Architecture: 64bit
