### VOLVE Dataset Preliminary Study - production data




In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os


import utils

In [8]:
csvfile = os.path.join(os.getcwd(), 'data', 'Volve_production_data.csv')
df = pd.read_csv(csvfile, sep=';', decimal=',')

rate_cols = ['BORE_OIL_VOL','BORE_GAS_VOL','BORE_WAT_VOL','BORE_WI_VOL']
df[rate_cols] = df[rate_cols].astype(float)
df['DATEPRD'] = df['DATEPRD'].apply(utils.convert_dates)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15634 entries, 0 to 15633
Data columns (total 28 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   DATEPRD                   15634 non-null  datetime64[ns]
 1   month                     15634 non-null  int64         
 2   year                      15634 non-null  int64         
 3   month-year                15634 non-null  object        
 4   WELL_BORE_CODE            15634 non-null  object        
 5   NPD_WELL_BORE_CODE        15634 non-null  int64         
 6   NPD_WELL_BORE_NAME        15634 non-null  object        
 7   NPD_FIELD_CODE            15634 non-null  int64         
 8   NPD_FIELD_NAME            15634 non-null  object        
 9   NPD_FACILITY_CODE         15634 non-null  int64         
 10  NPD_FACILITY_NAME         15634 non-null  object        
 11  ON_STREAM_HRS             15349 non-null  float64       
 12  AVG_DOWNHOLE_PRESS

In [9]:
cum_cols = ['BORE_OIL_CUM','BORE_GAS_CUM','BORE_WAT_CUM','BORE_WI_CUM']
df.insert(1,'N', df.groupby(['NPD_WELL_BORE_NAME']).cumcount())
cumdf = df[rate_cols + ['NPD_WELL_BORE_NAME']].groupby(['NPD_WELL_BORE_NAME']).cumsum()
for col_orig, col_end in zip(rate_cols, cum_cols):
    df.insert(1,col_end, cumdf[col_orig])
df['BORE_LIQ_CUM'] = df['BORE_OIL_CUM'] + df['BORE_WAT_CUM']
df['BORE_LIQ_VOL'] = df['BORE_OIL_VOL'] + df['BORE_WAT_VOL']

df = df.drop(columns=['month', 'year', 'month-year', 'WELL_BORE_CODE', 'NPD_WELL_BORE_CODE', 'NPD_FIELD_CODE', 'NPD_FACILITY_CODE', 'FLOW_KIND'])

In [10]:
output = {}
for well in np.unique(df['NPD_WELL_BORE_NAME']):
    well_df = df[df['NPD_WELL_BORE_NAME']==well]
    well_type = np.unique(well_df['WELL_TYPE'])[0]
    if not well_type in output.keys():
        output[well_type] = {}
    output[well_type][well] = well_df.drop(columns=['NPD_WELL_BORE_NAME', 'WELL_TYPE'])
    

In [11]:
outfile = os.path.join(os.getcwd(), 'data', 'volve_compiled.pkl')
with open(outfile, 'wb') as outf:
    pickle.dump(output, outf)

In [12]:
len(output.keys())

2

In [13]:
len(output['WI'])

1